Last active 1686796540

Convert SillyTavern jsonl chats to TXT files.

sillytavern-chat-to-txt.py Raw
1#!/usr/bin/env python3
2import argparse
3import re
4from pathlib import Path
5import sys
6import json
7
8"""
9Convert SillyTavern jsonl chats to TXT files.
10
11HOW TO USE:
121. Find the chat file you want to convert. It's the `jsonl` file located in `SillyTavern/public/chats/<character name>/``
132. Run this script with `python3 sillytavern-chat-to-txt.py <path to the jsonl file> <path to where you want to save the TXT file>
14
15These things are stripped from the chat:
16- OOC chat like this: (OOC: bla bla)
17- Text between brackets: [bla bla bla]
18
19This script isn't going to produce a perfect transcript, but it's close enough.
20
21"""
22
23cleaner_re = re.compile(r'((\(|\[)OOC:.*?(\]|\)))|((^|\n)\[.*?\]($|\n))')
24
25def main():
26 parser = argparse.ArgumentParser(description='Convert SillyTavern jsonl files to TXT files for importing into the infinite context server.')
27 parser.add_argument('filepath', help='The path to the jsonl file to parse')
28 parser.add_argument('output_txt', help='The output TXT file to create.')
29 args = parser.parse_args()
30
31 input_jsonl = Path(args.filepath).expanduser().absolute().resolve()
32 output_txt = Path(args.output_txt).expanduser().absolute().resolve()
33
34 print('Converting chat:', input_jsonl)
35
36 if not input_jsonl.exists():
37 print('Input file does not exist:', input_jsonl)
38 sys.exit(1)
39 if not output_txt.parent.exists():
40 print('Output parent directory does not exist:', output_txt.parent)
41 sys.exit(1)
42
43 chatlines = []
44 formatted_chat = []
45 raw = input_jsonl.read_text().splitlines()
46 for i in range(len(raw)):
47 try:
48 chatlines.append(json.loads(raw[i]))
49 except json.decoder.JSONDecodeError:
50 print(f'JSON decode error on line {i + 1}:')
51 sys.exit(1)
52
53 for msg in chatlines:
54 if 'mes' in msg.keys():
55 clean_str = re.sub(cleaner_re, '', msg['mes']).strip().replace('\n\n', '\n').strip('\n')
56 if not len(clean_str):
57 continue
58 formatted_chat.append({'name': msg['name'], 'msg': clean_str})
59
60 with open(output_txt, 'w') as f:
61 for msg in formatted_chat:
62 # content = json.loads('"' + msg['msg'] + '"')
63 f.write(f"{msg['name']}:\n{msg['msg']}\n\n")
64
65 print(f'Converted {len(formatted_chat)} lines.')
66 print('Saved to:', output_txt)
67
68if __name__ == "__main__":
69 main()