flynn-chen commited on
Commit
1c371ff
1 Parent(s): 6fc1649

Add application file

Browse files
Files changed (2) hide show
  1. app.py +50 -0
  2. question_generation +1 -0
app.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import pandas as pd
4
+ from question_generation.pipelines import pipeline
5
+ import docx2txt
6
+
7
+ qa_list = []
8
+ def process_file(Notes):
9
+
10
+ os.system("pip install -U transformers==3.0.0")
11
+ os.system("python -m nltk.downloader punkt")
12
+ nlp = pipeline("question-generation", model="valhalla/t5-small-qg-prepend", qg_format="prepend")
13
+
14
+ target_word_doc = Notes.name
15
+ raw_word_file = docx2txt.process(target_word_doc)
16
+
17
+ #remove empty lines
18
+ preprocessed_sentence_list = [i for i in raw_word_file.splitlines() if i != ""]
19
+
20
+ #grab content
21
+ processed_sentence_list = []
22
+ content = False
23
+ for i in preprocessed_sentence_list:
24
+ if "Outline" in i:
25
+ content = True
26
+ continue
27
+ if "Summary Learning Points" in i:
28
+ content = False
29
+ continue
30
+ if "Learning Activity" in i:
31
+ content = False
32
+ continue
33
+ if content == True:
34
+ processed_sentence_list.append(i.lstrip())
35
+
36
+ qa_list.extend(nlp(" ".join(processed_sentence_list)))
37
+ formatted_questions = "\n".join([str(idx+1) + ". " + i["question"] for idx, i in enumerate(qa_list)])
38
+ formatted_answers = "\n".join([str(idx+1) + ". " + i["answer"] for idx, i in enumerate(qa_list)])
39
+ return [formatted_questions, formatted_answers]
40
+
41
+ def reveal_answer():
42
+ global qa_list
43
+
44
+ qa_list = []
45
+ return formatted_answers
46
+
47
+ io = gr.Interface(process_file, "file", outputs=
48
+ [gr.Textbox(lines=1, label="Questions"),
49
+ gr.Textbox(lines=1, label="Answers")])
50
+ io.launch()
question_generation ADDED
@@ -0,0 +1 @@
 
 
1
+ Subproject commit c30e2976d65c4ef6200c7504097e8e07545fb240