Spaces:
Runtime error
Runtime error
Commit
·
0e90d70
1
Parent(s):
de8c106
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import os
|
| 3 |
import numpy as np
|
| 4 |
-
os.system("pip install pdfminer.six rank_bm25 torch transformers")
|
| 5 |
|
| 6 |
from gradio.mix import Series
|
| 7 |
import re
|
|
@@ -11,7 +11,7 @@ import torch
|
|
| 11 |
from transformers import pipeline
|
| 12 |
import pdfminer
|
| 13 |
from pdfminer.high_level import extract_text
|
| 14 |
-
|
| 15 |
|
| 16 |
def read_pdf(file):
|
| 17 |
text = extract_text(file.name)
|
|
@@ -60,7 +60,13 @@ def qa_ranker(query, docs_, top_k_ranker):
|
|
| 60 |
answer['doc'] = doc
|
| 61 |
ans.append(answer)
|
| 62 |
return sorted(ans, key=lambda x: x['score'], reverse=True)[:top_k_ranker]
|
| 63 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
def final_qa_pipeline(file, query):
|
| 65 |
docs = read_pdf(file)
|
| 66 |
tokenized_corpus = []
|
|
@@ -74,7 +80,7 @@ def final_qa_pipeline(file, query):
|
|
| 74 |
|
| 75 |
if len(lvl1) > 0:
|
| 76 |
fnl_rank = qa_ranker(query, [l["docs"] for l in lvl1], top_k_ranker)
|
| 77 |
-
return (fnl_rank[0]["answer"], np.round(fnl_rank[0]["score"],3), fnl_rank[0][
|
| 78 |
#for fnl_ in fnl_rank:
|
| 79 |
# print("\n")
|
| 80 |
# print_colored(fnl_['doc'], fnl_['start'], fnl_['end'])
|
|
@@ -93,7 +99,7 @@ examples = [
|
|
| 93 |
iface = gr.Interface(
|
| 94 |
fn = final_qa_pipeline,
|
| 95 |
inputs = [gr.inputs.File(label="input pdf file"), gr.inputs.Textbox(label="Question:")],
|
| 96 |
-
outputs = [gr.outputs.
|
| 97 |
examples=examples,
|
| 98 |
)
|
| 99 |
iface.launch()
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import os
|
| 3 |
import numpy as np
|
| 4 |
+
os.system("pip install pdfminer.six rank_bm25 torch transformers termcolor")
|
| 5 |
|
| 6 |
from gradio.mix import Series
|
| 7 |
import re
|
|
|
|
| 11 |
from transformers import pipeline
|
| 12 |
import pdfminer
|
| 13 |
from pdfminer.high_level import extract_text
|
| 14 |
+
from termcolor import colored
|
| 15 |
|
| 16 |
def read_pdf(file):
|
| 17 |
text = extract_text(file.name)
|
|
|
|
| 60 |
answer['doc'] = doc
|
| 61 |
ans.append(answer)
|
| 62 |
return sorted(ans, key=lambda x: x['score'], reverse=True)[:top_k_ranker]
|
| 63 |
+
|
| 64 |
+
def print_colored(text, start_idx, end_idx):
|
| 65 |
+
a = colored(text[:start_idx]) + \
|
| 66 |
+
colored(text[start_idx:end_idx], 'red', 'on_yellow') + \
|
| 67 |
+
colored(text[end_idx:]))
|
| 68 |
+
return a
|
| 69 |
+
|
| 70 |
def final_qa_pipeline(file, query):
|
| 71 |
docs = read_pdf(file)
|
| 72 |
tokenized_corpus = []
|
|
|
|
| 80 |
|
| 81 |
if len(lvl1) > 0:
|
| 82 |
fnl_rank = qa_ranker(query, [l["docs"] for l in lvl1], top_k_ranker)
|
| 83 |
+
return (fnl_rank[0]["answer"], np.round(fnl_rank[0]["score"],3), print_colored(fnl_rank[0]['doc'], fnl_rank[0]['start'], fnl_rank[0]['end']))
|
| 84 |
#for fnl_ in fnl_rank:
|
| 85 |
# print("\n")
|
| 86 |
# print_colored(fnl_['doc'], fnl_['start'], fnl_['end'])
|
|
|
|
| 99 |
iface = gr.Interface(
|
| 100 |
fn = final_qa_pipeline,
|
| 101 |
inputs = [gr.inputs.File(label="input pdf file"), gr.inputs.Textbox(label="Question:")],
|
| 102 |
+
outputs = [gr.outputs.TextBox(label="Answer"), gr.outputs.TextBox(label="Score"), gr.outputs.HTML(label="Reference text")],
|
| 103 |
examples=examples,
|
| 104 |
)
|
| 105 |
iface.launch()
|