Last active 1686796540

Convert SillyTavern jsonl chats to TXT files.

cyberes's Avatar cyberes revised this gist 1686796540. Go to revision

1 file changed, 69 insertions created)

@@ -0,0 +1,69 @@
1 + #!/usr/bin/env python3
2 + import argparse
3 + import re
4 + from pathlib import Path
5 + import sys
6 + import json
7 +
8 + """
9 + Convert SillyTavern jsonl chats to TXT files.
10 +
11 + HOW TO USE:
12 + 1. Find the chat file you want to convert. It's the `jsonl` file located in `SillyTavern/public/chats/<character name>/``
13 + 2. Run this script with `python3 <path to the jsonl file> <path to where you want to save the TXT file>
14 +
15 + These things are stripped from the chat:
16 + - OOC chat like this: (OOC: bla bla)
17 + - Text between brackets: [bla bla bla]
18 +
19 + This script isn't going to produce a perfect transcript, but it's close enough.
20 +
21 + """
22 +
23 + cleaner_re = re.compile(r'((\(|\[)OOC:.*?(\]|\)))|((^|\n)\[.*?\]($|\n))')
24 +
25 + def main():
26 + parser = argparse.ArgumentParser(description='Convert SillyTavern jsonl files to TXT files for importing into the infinite context server.')
27 + parser.add_argument('filepath', help='The path to the jsonl file to parse')
28 + parser.add_argument('output_txt', help='The output TXT file to create.')
29 + args = parser.parse_args()
30 +
31 + input_jsonl = Path(args.filepath).expanduser().absolute().resolve()
32 + output_txt = Path(args.output_txt).expanduser().absolute().resolve()
33 +
34 + print('Converting chat:', input_jsonl)
35 +
36 + if not input_jsonl.exists():
37 + print('Input file does not exist:', input_jsonl)
38 + sys.exit(1)
39 + if not output_txt.parent.exists():
40 + print('Output parent directory does not exist:', output_txt.parent)
41 + sys.exit(1)
42 +
43 + chatlines = []
44 + formatted_chat = []
45 + raw = input_jsonl.read_text().splitlines()
46 + for i in range(len(raw)):
47 + try:
48 + chatlines.append(json.loads(raw[i]))
49 + except json.decoder.JSONDecodeError:
50 + print(f'JSON decode error on line {i + 1}:')
51 + sys.exit(1)
52 +
53 + for msg in chatlines:
54 + if 'mes' in msg.keys():
55 + clean_str = re.sub(cleaner_re, '', msg['mes']).strip().replace('\n\n', '\n').strip('\n')
56 + if not len(clean_str):
57 + continue
58 + formatted_chat.append({'name': msg['name'], 'msg': clean_str})
59 +
60 + with open(output_txt, 'w') as f:
61 + for msg in formatted_chat:
62 + # content = json.loads('"' + msg['msg'] + '"')
63 + f.write(f"{msg['name']}:\n{msg['msg']}\n\n")
64 +
65 + print(f'Converted {len(formatted_chat)} lines.')
66 + print('Saved to:', output_txt)
67 +
68 + if __name__ == "__main__":
69 + main()
Newer Older