samarthagarwal23 commited on
Commit
ed1b00c
1 Parent(s): 328b676

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -4
app.py CHANGED
@@ -13,11 +13,12 @@ import pdfminer
13
  from pdfminer.high_level import extract_text
14
  #from termcolor import colored
15
 
 
 
 
16
  def read_pdf(file):
17
  text = extract_text(file.name)
18
  # Split text into smaller docs
19
- len_doc = 400
20
- overlap = 50
21
  docs = []
22
 
23
  i = 0
@@ -50,7 +51,8 @@ def retrieval(query, top_k_retriver, docs, bm25_):
50
  return bm25_hits
51
 
52
  qa_model = pipeline("question-answering",
53
- model = "deepset/roberta-base-squad2")
 
54
 
55
  def qa_ranker(query, docs_, top_k_ranker):
56
  ans = []
@@ -84,7 +86,7 @@ def final_qa_pipeline(file, query):
84
 
85
  bm25 = BM25Okapi(tokenized_corpus)
86
 
87
- top_k_retriver, top_k_ranker = 20,3
88
  lvl1 = retrieval(query, top_k_retriver, docs, bm25)
89
 
90
  if len(lvl1) > 0:
 
13
  from pdfminer.high_level import extract_text
14
  #from termcolor import colored
15
 
16
+ len_doc = 400
17
+ overlap = 50
18
+
19
  def read_pdf(file):
20
  text = extract_text(file.name)
21
  # Split text into smaller docs
 
 
22
  docs = []
23
 
24
  i = 0
 
51
  return bm25_hits
52
 
53
  qa_model = pipeline("question-answering",
54
+ model = "deepset/minilm-uncased-squad2")
55
+ #model = "deepset/roberta-base-squad2")
56
 
57
  def qa_ranker(query, docs_, top_k_ranker):
58
  ans = []
 
86
 
87
  bm25 = BM25Okapi(tokenized_corpus)
88
 
89
+ top_k_retriver, top_k_ranker = 30,3
90
  lvl1 = retrieval(query, top_k_retriver, docs, bm25)
91
 
92
  if len(lvl1) > 0: