Spaces:

samarthagarwal23
/

QuestionAnswering_on_annual_reports

Runtime error

App Files Files Community

samarthagarwal23 commited on Oct 17, 2022

Commit

d7f0548

•

1 Parent(s): b37de3c

Update app.py

Browse files

Files changed (1) hide show

app.py +7 -7

app.py CHANGED Viewed

@@ -17,11 +17,6 @@ overlap = 15
 param_top_k_retriver = 15
 param_top_k_ranker = 3
-qa_model = pipeline("question-answering",
-                    #model = "deepset/minilm-uncased-squad2")
-                    model = "deepset/roberta-base-squad2")
 def read_pdf(file):
   text = extract_text(file.name)
   # Split text into smaller docs
@@ -80,7 +75,7 @@ def print_colored(text, start_idx, end_idx, confidence):
                         cstr_break(conf_str, color='grey')]), color='black')
     return a
-def final_qa_pipeline(file, query):
     docs = read_pdf(file)
     tokenized_corpus = []
     for doc in docs:
@@ -91,6 +86,10 @@ def final_qa_pipeline(file, query):
     top_k_retriver, top_k_ranker = param_top_k_retriver, param_top_k_ranker
     lvl1 = retrieval(query, top_k_retriver, docs, bm25)
     if len(lvl1) > 0:
         fnl_rank = qa_ranker(query, [l["docs"] for l in lvl1], top_k_ranker)
         top1 = print_colored(fnl_rank[0]['doc'], fnl_rank[0]['start'], fnl_rank[0]['end'], str(np.round(100*fnl_rank[0]["score"],1))+"%")
@@ -111,11 +110,12 @@ examples = [
     [os.path.abspath("NASDAQ_AAPL_2020.pdf"), "how much are the outstanding shares ?"],
     [os.path.abspath("NASDAQ_AAPL_2020.pdf"), "what is competitors strategy ?"],
     [os.path.abspath("NASDAQ_AAPL_2020.pdf"), "who is the chief executive officer ?"],
 ]
 iface = gr.Interface(
    fn = final_qa_pipeline,
-   inputs = [gr.inputs.File(label="input pdf file"), gr.inputs.Textbox(label="Question:")],
    outputs = [gr.outputs.HTML(label="Top 1 answer"), gr.outputs.HTML(label="Top 2 answer")],
    examples=examples,
    theme = "grass",

 param_top_k_retriver = 15
 param_top_k_ranker = 3
 def read_pdf(file):
   text = extract_text(file.name)
   # Split text into smaller docs
                         cstr_break(conf_str, color='grey')]), color='black')
     return a
+def final_qa_pipeline(file, query, model_nm):
     docs = read_pdf(file)
     tokenized_corpus = []
     for doc in docs:
     top_k_retriver, top_k_ranker = param_top_k_retriver, param_top_k_ranker
     lvl1 = retrieval(query, top_k_retriver, docs, bm25)
+    qa_model = pipeline("question-answering",
+                    #model = "deepset/minilm-uncased-squad2")
+                    model = "deepset/"+model_nm)
     if len(lvl1) > 0:
         fnl_rank = qa_ranker(query, [l["docs"] for l in lvl1], top_k_ranker)
         top1 = print_colored(fnl_rank[0]['doc'], fnl_rank[0]['start'], fnl_rank[0]['end'], str(np.round(100*fnl_rank[0]["score"],1))+"%")
     [os.path.abspath("NASDAQ_AAPL_2020.pdf"), "how much are the outstanding shares ?"],
     [os.path.abspath("NASDAQ_AAPL_2020.pdf"), "what is competitors strategy ?"],
     [os.path.abspath("NASDAQ_AAPL_2020.pdf"), "who is the chief executive officer ?"],
+    [os.path.abspath("NASDAQ_MSFT_2020.pdf"), "How much is the guided revenue for next quarter?"],
 ]
 iface = gr.Interface(
    fn = final_qa_pipeline,
+   inputs = [gr.inputs.File(label="input pdf file"), gr.inputs.Textbox(label="Question:"), gr.inputs.Dropdown(["minilm-uncased-squad2","roberta-base-squad2"], "minilm-uncased-squad2", label="Model")],
    outputs = [gr.outputs.HTML(label="Top 1 answer"), gr.outputs.HTML(label="Top 2 answer")],
    examples=examples,
    theme = "grass",