import os os.system("pip install -U transformers==3.0.0") os.system("pip install nltk torch docx2txt") os.system("python -m nltk.downloader punkt") import gradio as gr import pandas as pd from question_generation.pipelines import pipeline import docx2txt qa_list = [] def process_file(Notes): nlp = pipeline("question-generation", model="valhalla/t5-small-qg-prepend", qg_format="prepend") target_word_doc = Notes.name raw_word_file = docx2txt.process(target_word_doc) #remove empty lines preprocessed_sentence_list = [i for i in raw_word_file.splitlines() if i != ""] #grab content processed_sentence_list = [] content = False for i in preprocessed_sentence_list: if "Outline" in i: content = True continue if "Summary Learning Points" in i: content = False continue if "Learning Activity" in i: content = False continue if content == True: processed_sentence_list.append(i.lstrip()) qa_list.extend(nlp(" ".join(processed_sentence_list))) formatted_questions = "\n".join([str(idx+1) + ". " + i["question"] for idx, i in enumerate(qa_list)]) formatted_answers = "\n".join([str(idx+1) + ". " + i["answer"] for idx, i in enumerate(qa_list)]) return [formatted_questions, formatted_answers] def reveal_answer(): global qa_list qa_list = [] return formatted_answers io = gr.Interface(process_file, "file", outputs= [gr.Textbox(lines=1, label="Questions"), gr.Textbox(lines=1, label="Answers")]) io.launch()