sillytavern-chat-to-txt.py
Eredeti
#!/usr/bin/env python3
import argparse
import re
from pathlib import Path
import sys
import json
"""
Convert SillyTavern jsonl chats to TXT files.
HOW TO USE:
1. Find the chat file you want to convert. It's the `jsonl` file located in `SillyTavern/public/chats/<character name>/``
2. Run this script with `python3 sillytavern-chat-to-txt.py <path to the jsonl file> <path to where you want to save the TXT file>
These things are stripped from the chat:
- OOC chat like this: (OOC: bla bla)
- Text between brackets: [bla bla bla]
This script isn't going to produce a perfect transcript, but it's close enough.
"""
cleaner_re = re.compile(r'((\(|\[)OOC:.*?(\]|\)))|((^|\n)\[.*?\]($|\n))')
def main():
parser = argparse.ArgumentParser(description='Convert SillyTavern jsonl files to TXT files for importing into the infinite context server.')
parser.add_argument('filepath', help='The path to the jsonl file to parse')
parser.add_argument('output_txt', help='The output TXT file to create.')
args = parser.parse_args()
input_jsonl = Path(args.filepath).expanduser().absolute().resolve()
output_txt = Path(args.output_txt).expanduser().absolute().resolve()
print('Converting chat:', input_jsonl)
if not input_jsonl.exists():
print('Input file does not exist:', input_jsonl)
sys.exit(1)
if not output_txt.parent.exists():
print('Output parent directory does not exist:', output_txt.parent)
sys.exit(1)
chatlines = []
formatted_chat = []
raw = input_jsonl.read_text().splitlines()
for i in range(len(raw)):
try:
chatlines.append(json.loads(raw[i]))
except json.decoder.JSONDecodeError:
print(f'JSON decode error on line {i + 1}:')
sys.exit(1)
for msg in chatlines:
if 'mes' in msg.keys():
clean_str = re.sub(cleaner_re, '', msg['mes']).strip().replace('\n\n', '\n').strip('\n')
if not len(clean_str):
continue
formatted_chat.append({'name': msg['name'], 'msg': clean_str})
with open(output_txt, 'w') as f:
for msg in formatted_chat:
# content = json.loads('"' + msg['msg'] + '"')
f.write(f"{msg['name']}:\n{msg['msg']}\n\n")
print(f'Converted {len(formatted_chat)} lines.')
print('Saved to:', output_txt)
if __name__ == "__main__":
main()
1 | #!/usr/bin/env python3 |
2 | import argparse |
3 | import re |
4 | from pathlib import Path |
5 | import sys |
6 | import json |
7 | |
8 | """ |
9 | Convert SillyTavern jsonl chats to TXT files. |
10 | |
11 | HOW TO USE: |
12 | 1. Find the chat file you want to convert. It's the `jsonl` file located in `SillyTavern/public/chats/<character name>/`` |
13 | 2. Run this script with `python3 sillytavern-chat-to-txt.py <path to the jsonl file> <path to where you want to save the TXT file> |
14 | |
15 | These things are stripped from the chat: |
16 | - OOC chat like this: (OOC: bla bla) |
17 | - Text between brackets: [bla bla bla] |
18 | |
19 | This script isn't going to produce a perfect transcript, but it's close enough. |
20 | |
21 | """ |
22 | |
23 | cleaner_re = re.compile(r'((\(|\[)OOC:.*?(\]|\)))|((^|\n)\[.*?\]($|\n))') |
24 | |
25 | def main(): |
26 | parser = argparse.ArgumentParser(description='Convert SillyTavern jsonl files to TXT files for importing into the infinite context server.') |
27 | parser.add_argument('filepath', help='The path to the jsonl file to parse') |
28 | parser.add_argument('output_txt', help='The output TXT file to create.') |
29 | args = parser.parse_args() |
30 | |
31 | input_jsonl = Path(args.filepath).expanduser().absolute().resolve() |
32 | output_txt = Path(args.output_txt).expanduser().absolute().resolve() |
33 | |
34 | print('Converting chat:', input_jsonl) |
35 | |
36 | if not input_jsonl.exists(): |
37 | print('Input file does not exist:', input_jsonl) |
38 | sys.exit(1) |
39 | if not output_txt.parent.exists(): |
40 | print('Output parent directory does not exist:', output_txt.parent) |
41 | sys.exit(1) |
42 | |
43 | chatlines = [] |
44 | formatted_chat = [] |
45 | raw = input_jsonl.read_text().splitlines() |
46 | for i in range(len(raw)): |
47 | try: |
48 | chatlines.append(json.loads(raw[i])) |
49 | except json.decoder.JSONDecodeError: |
50 | print(f'JSON decode error on line {i + 1}:') |
51 | sys.exit(1) |
52 | |
53 | for msg in chatlines: |
54 | if 'mes' in msg.keys(): |
55 | clean_str = re.sub(cleaner_re, '', msg['mes']).strip().replace('\n\n', '\n').strip('\n') |
56 | if not len(clean_str): |
57 | continue |
58 | formatted_chat.append({'name': msg['name'], 'msg': clean_str}) |
59 | |
60 | with open(output_txt, 'w') as f: |
61 | for msg in formatted_chat: |
62 | # content = json.loads('"' + msg['msg'] + '"') |
63 | f.write(f"{msg['name']}:\n{msg['msg']}\n\n") |
64 | |
65 | print(f'Converted {len(formatted_chat)} lines.') |
66 | print('Saved to:', output_txt) |
67 | |
68 | if __name__ == "__main__": |
69 | main() |