junyi_bot_external / utils /work_flow_controller.py
Chenyu
Add prod app
f807e7d
raw
history blame
1.03 kB
import json
from .pdf_processor import PDFProcessor
from .gpt_processor import Translator, EmbeddingGenerator, KeywordsGenerator, TopicsGenerator
processors = {
'pdf': PDFProcessor,
}
class WorkFlowController():
def __init__(self, file_path: str, file_name: str) -> None:
# get file raw content
self.file_name = file_name
file_format = file_path.split('.')[-1]
self.file_processor = processors[file_format]
self.file_info = self.file_processor(file_path).file_info
def process_file(self):
# process file content
# return processed data
if not self.file_info['is_chinese']:
translator = Translator()
self.file_info[1]['file_content'] = translator.translate_to_chinese(self.file_info[1]['file_content'])
# save file_info data to json file
def dump_to_json(self) -> None:
with open(f'{self.file_name}.json', 'w', encoding='utf-8') as f:
json.dump(self.file_info, f, indent=4, ensure_ascii=False)