#!/usr/bin/env python3 import argparse import re from pathlib import Path import sys import json import openai import tiktoken import threading import traceback """ Convert SillyTavern jsonl chats to TXT files using AI. HOW TO USE: 1. `pip install tiktoken openai` 2. Find the chat file you want to convert. It's the `jsonl` file located in `SillyTavern/public/chats//` 3. Run this script with `python3 sillytavern-chat-to-txt.py --key ` This uses a temperature of 0 so don't re-run this expecting something different. If your chat is larger than the context window it will be sent in batches. After each batch, the response is written to your output file. """ class TimerThread(threading.Thread): def __init__(self, prompt:str='Waiting for response...'): super().__init__() self._stop_event = threading.Event() self.prompt = prompt def run(self): seconds = 0 while not self._stop_event.is_set(): print(f"\r{self.prompt} {seconds}s", end="") seconds += 1 self._stop_event.wait(1) def stop(self): self._stop_event.set() print('') def count_tokens(string: str, encoding_name: str = 'cl100k_base', encoding_for_model: str = None) -> int: if encoding_for_model: enc = tiktoken.encoding_for_model(encoding_for_model) else: enc = tiktoken.get_encoding(encoding_name) num_tokens = len(enc.encode(string)) return num_tokens def array_of_dicts_to_jsonl(array_of_dicts): jsonl_string = "\n".join(json.dumps(d) for d in array_of_dicts) return jsonl_string def send_to_openai(msg, model): user_msg = f"I have a jsonl transcript of an internet roleplay session. I need you to strip everything that isn't important to the story and write a summary of each message. For each message, use the format:\n\n```\n:\n```\n\nSome messages include things that other characters say so please organize it accordingly.\n\n{msg}" timer_thread = TimerThread(prompt=f'Sending {count_tokens(user_msg)} tokens to the AI...') timer_thread.start() try: response = openai.ChatCompletion.create( model=model, messages=[ {"role": "user", "content": user_msg} ], temperature=0, ) except Exception as e: print('Exception:', e) sys.exit(1) timer_thread.stop() return response def main(): parser = argparse.ArgumentParser(description='Convert SillyTavern jsonl files to TXT files using AI for importing into the infinite context server.') parser.add_argument('filepath', help='The path to the jsonl file to parse') parser.add_argument('output_txt', help='The output TXT file to create.') parser.add_argument('--key', required=True, help='Your OpenAI API key') parser.add_argument('--model', default='gpt-4', help='Name of the OpenAI model to use. GPT-4 seems to work the best for this. Default: gpt-4') args = parser.parse_args() openai.api_key = args.key input_jsonl = Path(args.filepath).expanduser().absolute().resolve() output_txt = Path(args.output_txt).expanduser().absolute().resolve() # Empty the file since we append to it output_txt.unlink() output_txt.touch() print('Converting chat:', input_jsonl) print('Using model:', args.model) if not input_jsonl.exists(): print('Input file does not exist:', input_jsonl) sys.exit(1) if not output_txt.parent.exists(): print('Output parent directory does not exist:', output_txt.parent) sys.exit(1) if args.model == "gpt-3.5-turbo" or args.model == "text-davinci-003": max_tokens = 3050 / 2 # div. by 2 since input+output tokens elif args.model == "gpt-4": max_tokens = 8050 / 2 else: print('Unknown model:', args.model) sys.exit(1) chatlines = [] total_tokens = 0 raw = input_jsonl.read_text().splitlines() for i in range(len(raw)): try: tmp = json.loads(raw[i]) # We don't want metadata messages if 'mes' not in tmp.keys(): continue # Trim the message down to save tokens msg = json.dumps({'name': tmp['name'], 'mes': tmp['mes']}) # We can't split messages so if one is larger than the context limit we have to quit token_count = count_tokens(msg) total_tokens += token_count if token_count > max_tokens: print('Message on line', i + 1, 'is too long at', 'token_count', 'tokens. Max tokens is', max_tokens, 'You need to decide out how to handle this.') sys.exit(1) chatlines.append(json.loads(msg)) except json.decoder.JSONDecodeError: print(f'JSON decode error on line {i + 1}:') sys.exit(1) num_chat_messages = len(chatlines) print('Total tokens:', total_tokens) while len(chatlines): ai_input_data = [] output_data = [] while True: # Check that the message fits in the max tokens ai_input_data.append(chatlines[0]) ai_input = array_of_dicts_to_jsonl(ai_input_data) token_count = count_tokens(ai_input) if token_count <= max_tokens: # Only remove a message if we fit it in the context del chatlines[0] else: # If the message is at the max token count, remove the item we just put in (we will process it next iteration) del ai_input_data[0] output_data.append(send_to_openai(ai_input, args.model).choices[0].message.content.strip()) break # If there aren't any more messages to process that means they all fit in the context if len(chatlines) == 0: output_data.append(send_to_openai(ai_input, args.model).choices[0].message.content.strip()) break if len(output_data): with open(output_txt, 'a') as f: for msg in output_data: f.write(f"{msg}\n\n\n") print(f'Converted {num_chat_messages} lines.') print('Saved to:', output_txt) if __name__ == "__main__": main()