|
import json |
|
import sys |
|
|
|
if len(sys.argv) != 2: |
|
print("Usage: python script.py <json filename>") |
|
sys.exit(1) |
|
|
|
filename = sys.argv[1] |
|
|
|
|
|
with open(filename, 'r', encoding='utf-8') as f: |
|
data = json.load(f) |
|
print(len(data)) |
|
|
|
INPUT_PROMPT = r'<s>\n以下は、タスクを説明する指示と、文脈のある入力の組み合わせです。要求を適切に満たす応答を書きなさい。\n[SEP]\n指示:\n{instruction}\n[SEP]\n入力:\n{input}\n[SEP]\n応答:\n{output}\n</s>' |
|
NO_INPUT_PROMPT = r'<s>\n以下は、タスクを説明する指示です。要求を適切に満たす応答を書きなさい。\n[SEP]\n指示:\n{instruction}\n[SEP]\n応答:\n{output}\n</s>' |
|
with open('dolly-oasst1-ja.txt', 'w', encoding='utf-8') as output_file: |
|
for d in data: |
|
if d["output"].strip() == "": |
|
continue |
|
elif d['input'] != "": |
|
text = INPUT_PROMPT.format( |
|
instruction=d["instruction"].strip().replace("\n", "\\n"), |
|
input=d["input"].strip().replace("\n", "\\n"), |
|
output=d["output"].strip().replace("\n", "\\n") |
|
) |
|
else: |
|
text = NO_INPUT_PROMPT.format( |
|
instruction=d["instruction"].strip().replace("\n", "\\n"), |
|
output=d["output"].strip().replace("\n", "\\n") |
|
) |
|
output_file.write(text + '\n') |
|
|