Spaces:
Runtime error
Runtime error
samarthagarwal23
commited on
Commit
•
ed1b00c
1
Parent(s):
328b676
Update app.py
Browse files
app.py
CHANGED
@@ -13,11 +13,12 @@ import pdfminer
|
|
13 |
from pdfminer.high_level import extract_text
|
14 |
#from termcolor import colored
|
15 |
|
|
|
|
|
|
|
16 |
def read_pdf(file):
|
17 |
text = extract_text(file.name)
|
18 |
# Split text into smaller docs
|
19 |
-
len_doc = 400
|
20 |
-
overlap = 50
|
21 |
docs = []
|
22 |
|
23 |
i = 0
|
@@ -50,7 +51,8 @@ def retrieval(query, top_k_retriver, docs, bm25_):
|
|
50 |
return bm25_hits
|
51 |
|
52 |
qa_model = pipeline("question-answering",
|
53 |
-
model = "deepset/
|
|
|
54 |
|
55 |
def qa_ranker(query, docs_, top_k_ranker):
|
56 |
ans = []
|
@@ -84,7 +86,7 @@ def final_qa_pipeline(file, query):
|
|
84 |
|
85 |
bm25 = BM25Okapi(tokenized_corpus)
|
86 |
|
87 |
-
top_k_retriver, top_k_ranker =
|
88 |
lvl1 = retrieval(query, top_k_retriver, docs, bm25)
|
89 |
|
90 |
if len(lvl1) > 0:
|
|
|
13 |
from pdfminer.high_level import extract_text
|
14 |
#from termcolor import colored
|
15 |
|
16 |
+
len_doc = 400
|
17 |
+
overlap = 50
|
18 |
+
|
19 |
def read_pdf(file):
|
20 |
text = extract_text(file.name)
|
21 |
# Split text into smaller docs
|
|
|
|
|
22 |
docs = []
|
23 |
|
24 |
i = 0
|
|
|
51 |
return bm25_hits
|
52 |
|
53 |
qa_model = pipeline("question-answering",
|
54 |
+
model = "deepset/minilm-uncased-squad2")
|
55 |
+
#model = "deepset/roberta-base-squad2")
|
56 |
|
57 |
def qa_ranker(query, docs_, top_k_ranker):
|
58 |
ans = []
|
|
|
86 |
|
87 |
bm25 = BM25Okapi(tokenized_corpus)
|
88 |
|
89 |
+
top_k_retriver, top_k_ranker = 30,3
|
90 |
lvl1 = retrieval(query, top_k_retriver, docs, bm25)
|
91 |
|
92 |
if len(lvl1) > 0:
|