cyberes revised this gist . Go to revision
1 file changed, 69 insertions
sillytavern-chat-to-txt.py(file created)
@@ -0,0 +1,69 @@ | |||
1 | + | #!/usr/bin/env python3 | |
2 | + | import argparse | |
3 | + | import re | |
4 | + | from pathlib import Path | |
5 | + | import sys | |
6 | + | import json | |
7 | + | ||
8 | + | """ | |
9 | + | Convert SillyTavern jsonl chats to TXT files. | |
10 | + | ||
11 | + | HOW TO USE: | |
12 | + | 1. Find the chat file you want to convert. It's the `jsonl` file located in `SillyTavern/public/chats/<character name>/`` | |
13 | + | 2. Run this script with `python3 sillytavern-chat-to-txt.py <path to the jsonl file> <path to where you want to save the TXT file> | |
14 | + | ||
15 | + | These things are stripped from the chat: | |
16 | + | - OOC chat like this: (OOC: bla bla) | |
17 | + | - Text between brackets: [bla bla bla] | |
18 | + | ||
19 | + | This script isn't going to produce a perfect transcript, but it's close enough. | |
20 | + | ||
21 | + | """ | |
22 | + | ||
23 | + | cleaner_re = re.compile(r'((\(|\[)OOC:.*?(\]|\)))|((^|\n)\[.*?\]($|\n))') | |
24 | + | ||
25 | + | def main(): | |
26 | + | parser = argparse.ArgumentParser(description='Convert SillyTavern jsonl files to TXT files for importing into the infinite context server.') | |
27 | + | parser.add_argument('filepath', help='The path to the jsonl file to parse') | |
28 | + | parser.add_argument('output_txt', help='The output TXT file to create.') | |
29 | + | args = parser.parse_args() | |
30 | + | ||
31 | + | input_jsonl = Path(args.filepath).expanduser().absolute().resolve() | |
32 | + | output_txt = Path(args.output_txt).expanduser().absolute().resolve() | |
33 | + | ||
34 | + | print('Converting chat:', input_jsonl) | |
35 | + | ||
36 | + | if not input_jsonl.exists(): | |
37 | + | print('Input file does not exist:', input_jsonl) | |
38 | + | sys.exit(1) | |
39 | + | if not output_txt.parent.exists(): | |
40 | + | print('Output parent directory does not exist:', output_txt.parent) | |
41 | + | sys.exit(1) | |
42 | + | ||
43 | + | chatlines = [] | |
44 | + | formatted_chat = [] | |
45 | + | raw = input_jsonl.read_text().splitlines() | |
46 | + | for i in range(len(raw)): | |
47 | + | try: | |
48 | + | chatlines.append(json.loads(raw[i])) | |
49 | + | except json.decoder.JSONDecodeError: | |
50 | + | print(f'JSON decode error on line {i + 1}:') | |
51 | + | sys.exit(1) | |
52 | + | ||
53 | + | for msg in chatlines: | |
54 | + | if 'mes' in msg.keys(): | |
55 | + | clean_str = re.sub(cleaner_re, '', msg['mes']).strip().replace('\n\n', '\n').strip('\n') | |
56 | + | if not len(clean_str): | |
57 | + | continue | |
58 | + | formatted_chat.append({'name': msg['name'], 'msg': clean_str}) | |
59 | + | ||
60 | + | with open(output_txt, 'w') as f: | |
61 | + | for msg in formatted_chat: | |
62 | + | # content = json.loads('"' + msg['msg'] + '"') | |
63 | + | f.write(f"{msg['name']}:\n{msg['msg']}\n\n") | |
64 | + | ||
65 | + | print(f'Converted {len(formatted_chat)} lines.') | |
66 | + | print('Saved to:', output_txt) | |
67 | + | ||
68 | + | if __name__ == "__main__": | |
69 | + | main() |
Newer
Older