File size: 1,348 Bytes
ce24f5e 8d959a7 ce24f5e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
import json
import sys
class FileReader:
def read(self, file_path):
with open(file_path, "r") as file:
return file.read()
class FileWriter:
def __init__(self, file_path):
self.file_path = file_path
def write(self, content):
with open(self.file_path, "w") as file:
file.write(content)
class StdoutWriter:
def write(self, content):
sys.stdout.write(content)
sys.stdout.write("\n")
class JsonParser:
def parse(self, content):
return json.loads(content)
class JsonlSerializer:
def serialize(self, data):
lines = [json.dumps(item) for item in data]
return "\n".join(lines)
class JsonToJsonlConverter:
def __init__(self, file_reader, file_writer, json_parser, jsonl_serializer):
self.file_reader = file_reader
self.file_writer = file_writer
self.json_parser = json_parser
self.jsonl_serializer = jsonl_serializer
def convert(self, input_file_path, output_file_path):
content = self.file_reader.read(input_file_path)
data = self.json_parser.parse(content)
# data = [r for r in data if r["conversations"]] # vicuna cleaned has rows with empty conversations
jsonl_content = self.jsonl_serializer.serialize(data)
self.file_writer.write(jsonl_content)
|