import json import time import random import os import openai import gradio as gr import pandas as pd import numpy as np from openai.embeddings_utils import distances_from_embeddings from utils.gpt_processor import QuestionAnswerer from utils.work_flow_controller import WorkFlowController qa_processor = QuestionAnswerer() CSV_FILE_PATHS = '' JSON_FILE_PATHS = '' KNOWLEDGE_BASE = None CONTEXT = None CONTEXT_PAGE_NUM = None CONTEXT_FILE_NAME = None def build_knowledge_base(files): global CSV_FILE_PATHS global JSON_FILE_PATHS global KNOWLEDGE_BASE work_flow_controller = WorkFlowController(files) CSV_FILE_PATHS = work_flow_controller.csv_result_path JSON_FILE_PATHS = work_flow_controller.result_path with open(CSV_FILE_PATHS, 'r', encoding='UTF-8') as fp: knowledge_base = pd.read_csv(fp) knowledge_base['page_embedding'] = knowledge_base['page_embedding'].apply(eval).apply(np.array) KNOWLEDGE_BASE = knowledge_base def construct_summary(): with open(JSON_FILE_PATHS, 'r', encoding='UTF-8') as fp: knowledge_base = json.load(fp) context = """""" for key in knowledge_base.keys(): file_name = knowledge_base[key]['file_name'] total_page = knowledge_base[key]['total_pages'] summary = knowledge_base[key]['summarized_content'] file_context = f""" ### 文件摘要 {file_name} (共 {total_page} 頁)

{summary}

""" context += file_context return context def change_md(): content = construct_summary() return gr.Markdown.update(content, visible=True) def user(message, history): return "", history + [[message, None]] def system_notification(action): if action == 'upload': return [['已上傳文件', '文件處理中(摘要、翻譯等),結束後將自動回覆']] else: return [['已上傳文件', '文件處理完成,請開始提問']] def get_index_file(user_message): global KNOWLEDGE_BASE global CONTEXT global CONTEXT_PAGE_NUM global CONTEXT_FILE_NAME user_message_embedding = openai.Embedding.create(input=user_message, engine='text-embedding-ada-002')['data'][0]['embedding'] KNOWLEDGE_BASE['distance'] = distances_from_embeddings(user_message_embedding, KNOWLEDGE_BASE['page_embedding'].values, distance_metric='cosine') KNOWLEDGE_BASE = KNOWLEDGE_BASE.sort_values(by='distance', ascending=True).head(1) if KNOWLEDGE_BASE['distance'].values[0] > 0.2: CONTEXT = None else: CONTEXT = KNOWLEDGE_BASE['page_content'].values[0] CONTEXT_PAGE_NUM = KNOWLEDGE_BASE['page_num'].values[0] CONTEXT_FILE_NAME = KNOWLEDGE_BASE['file_name'].values[0] def bot(history): user_message = history[-1][0] global CONTEXT print(f'user_message: {user_message}') if KNOWLEDGE_BASE is None: response = [ [user_message, "請先上傳文件"], ] history = response return history elif CONTEXT is None: get_index_file(user_message) print(f'CONTEXT: {CONTEXT}') if CONTEXT is None: response = [ [user_message, "無法找到相關文件,請重新提問"], ] history = response return history else: pass if CONTEXT is not None: bot_message = qa_processor.answer_question(CONTEXT, CONTEXT_PAGE_NUM, CONTEXT_FILE_NAME, history) print(f'bot_message: {bot_message}') response = [ [user_message, bot_message], ] history[-1] = response[0] return history def clear_state(): global CONTEXT global CONTEXT_PAGE_NUM global CONTEXT_FILE_NAME CONTEXT = None CONTEXT_PAGE_NUM = None CONTEXT_FILE_NAME = None with gr.Blocks() as demo: history = gr.State([]) upload_state = gr.State("upload") finished = gr.State("finished") user_question = gr.State("") with gr.Row(): gr.HTML('Junyi Academy Chatbot') #status_display = gr.Markdown("Success", elem_id="status_display") with gr.Row(equal_height=True): with gr.Column(scale=5): with gr.Row(): chatbot = gr.Chatbot() with gr.Row(): with gr.Column(scale=12): user_input = gr.Textbox( show_label=False, placeholder="Enter text", container=False, ) # with gr.Column(min_width=70, scale=1): # submit_btn = gr.Button("Send") with gr.Column(min_width=70, scale=1): clear_btn = gr.Button("清除") with gr.Column(min_width=70, scale=1): submit_btn = gr.Button("傳送") response = user_input.submit(user, [user_input, chatbot], [user_input, chatbot], queue=False, ).then(bot, chatbot, chatbot) response.then(lambda: gr.update(interactive=True), None, [user_input], queue=False) clear_btn.click(lambda: None, None, chatbot, queue=False) submit_btn.click(user, [user_input, chatbot], [user_input, chatbot], chatbot, queue=False).then(bot, chatbot, chatbot).then(lambda: gr.update(interactive=True), None, [user_input], queue=False) clear_btn.click(clear_state, None, None, queue=False) with gr.Row(): index_file = gr.File(file_count="multiple", file_types=["pdf"], label="Upload PDF file") with gr.Row(): instruction = gr.Markdown(""" ## 使用說明 1. 上傳一個或多個 PDF 檔案,系統將自動進行摘要、翻譯等處理後建立知識庫 2. 在上方輸入欄輸入問題,系統將自動回覆 3. 可以根據下方的摘要內容來提問 4. 每次對話會根據第一個問題的內容來檢索所有文件,並挑選最能回答問題的文件來回覆 5. 要切換檢索的文件,請點選「清除對話記錄」按鈕後再重新提問 """) with gr.Row(): describe = gr.Markdown('', visible=True) index_file.upload(system_notification, [upload_state], chatbot) \ .then(lambda: gr.update(interactive=True), None, None, queue=False) \ .then(build_knowledge_base, [index_file]) \ .then(system_notification, [finished], chatbot) \ .then(lambda: gr.update(interactive=True), None, None, queue=False) \ .then(change_md, None, describe) if __name__ == "__main__": demo.launch()