import json from .pdf_processor import PDFProcessor from .gpt_processor import Translator, EmbeddingGenerator, KeywordsGenerator, TopicsGenerator processors = { 'pdf': PDFProcessor, } class WorkFlowController(): def __init__(self, file_path: str, file_name: str) -> None: # get file raw content self.file_name = file_name file_format = file_path.split('.')[-1] self.file_processor = processors[file_format] self.file_info = self.file_processor(file_path).file_info def process_file(self): # process file content # return processed data if not self.file_info['is_chinese']: translator = Translator() self.file_info[1]['file_content'] = translator.translate_to_chinese(self.file_info[1]['file_content']) # save file_info data to json file def dump_to_json(self) -> None: with open(f'{self.file_name}.json', 'w', encoding='utf-8') as f: json.dump(self.file_info, f, indent=4, ensure_ascii=False)