Spaces:
Runtime error
Runtime error
samarthagarwal23
commited on
Commit
•
0e90d70
1
Parent(s):
de8c106
Update app.py
Browse files
app.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
import gradio as gr
|
2 |
import os
|
3 |
import numpy as np
|
4 |
-
os.system("pip install pdfminer.six rank_bm25 torch transformers")
|
5 |
|
6 |
from gradio.mix import Series
|
7 |
import re
|
@@ -11,7 +11,7 @@ import torch
|
|
11 |
from transformers import pipeline
|
12 |
import pdfminer
|
13 |
from pdfminer.high_level import extract_text
|
14 |
-
|
15 |
|
16 |
def read_pdf(file):
|
17 |
text = extract_text(file.name)
|
@@ -60,7 +60,13 @@ def qa_ranker(query, docs_, top_k_ranker):
|
|
60 |
answer['doc'] = doc
|
61 |
ans.append(answer)
|
62 |
return sorted(ans, key=lambda x: x['score'], reverse=True)[:top_k_ranker]
|
63 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
def final_qa_pipeline(file, query):
|
65 |
docs = read_pdf(file)
|
66 |
tokenized_corpus = []
|
@@ -74,7 +80,7 @@ def final_qa_pipeline(file, query):
|
|
74 |
|
75 |
if len(lvl1) > 0:
|
76 |
fnl_rank = qa_ranker(query, [l["docs"] for l in lvl1], top_k_ranker)
|
77 |
-
return (fnl_rank[0]["answer"], np.round(fnl_rank[0]["score"],3), fnl_rank[0][
|
78 |
#for fnl_ in fnl_rank:
|
79 |
# print("\n")
|
80 |
# print_colored(fnl_['doc'], fnl_['start'], fnl_['end'])
|
@@ -93,7 +99,7 @@ examples = [
|
|
93 |
iface = gr.Interface(
|
94 |
fn = final_qa_pipeline,
|
95 |
inputs = [gr.inputs.File(label="input pdf file"), gr.inputs.Textbox(label="Question:")],
|
96 |
-
outputs = [gr.outputs.
|
97 |
examples=examples,
|
98 |
)
|
99 |
iface.launch()
|
|
|
1 |
import gradio as gr
|
2 |
import os
|
3 |
import numpy as np
|
4 |
+
os.system("pip install pdfminer.six rank_bm25 torch transformers termcolor")
|
5 |
|
6 |
from gradio.mix import Series
|
7 |
import re
|
|
|
11 |
from transformers import pipeline
|
12 |
import pdfminer
|
13 |
from pdfminer.high_level import extract_text
|
14 |
+
from termcolor import colored
|
15 |
|
16 |
def read_pdf(file):
|
17 |
text = extract_text(file.name)
|
|
|
60 |
answer['doc'] = doc
|
61 |
ans.append(answer)
|
62 |
return sorted(ans, key=lambda x: x['score'], reverse=True)[:top_k_ranker]
|
63 |
+
|
64 |
+
def print_colored(text, start_idx, end_idx):
|
65 |
+
a = colored(text[:start_idx]) + \
|
66 |
+
colored(text[start_idx:end_idx], 'red', 'on_yellow') + \
|
67 |
+
colored(text[end_idx:]))
|
68 |
+
return a
|
69 |
+
|
70 |
def final_qa_pipeline(file, query):
|
71 |
docs = read_pdf(file)
|
72 |
tokenized_corpus = []
|
|
|
80 |
|
81 |
if len(lvl1) > 0:
|
82 |
fnl_rank = qa_ranker(query, [l["docs"] for l in lvl1], top_k_ranker)
|
83 |
+
return (fnl_rank[0]["answer"], np.round(fnl_rank[0]["score"],3), print_colored(fnl_rank[0]['doc'], fnl_rank[0]['start'], fnl_rank[0]['end']))
|
84 |
#for fnl_ in fnl_rank:
|
85 |
# print("\n")
|
86 |
# print_colored(fnl_['doc'], fnl_['start'], fnl_['end'])
|
|
|
99 |
iface = gr.Interface(
|
100 |
fn = final_qa_pipeline,
|
101 |
inputs = [gr.inputs.File(label="input pdf file"), gr.inputs.Textbox(label="Question:")],
|
102 |
+
outputs = [gr.outputs.TextBox(label="Answer"), gr.outputs.TextBox(label="Score"), gr.outputs.HTML(label="Reference text")],
|
103 |
examples=examples,
|
104 |
)
|
105 |
iface.launch()
|