Spaces:

domenicrosati
/

scite-qa-demo

Runtime error

App Files Files Community

domenicrosati commited on Sep 19, 2022

Commit

a91b925

•

1 Parent(s): e15c8b9

strict and then lenient

Browse files

Files changed (1) hide show

app.py +10 -11

app.py CHANGED Viewed

@@ -151,18 +151,11 @@ st.markdown("""
 with st.expander("Settings (strictness, context limit, top hits)"):
     confidence_threshold = st.slider('Confidence threshold for answering questions? This number represents how confident the model should be in the answers it gives. The number is out of 100%', 0, 100, 1)
-    strict_mode = st.radio(
-        "Query mode? Strict means all words must match in source snippet. Lenient means only some words must match.",
-        ('lenient', 'strict'))
     use_reranking = st.radio(
         "Use Reranking? Reranking will rerank the top hits using semantic similarity of document and query.",
         ('yes', 'no'))
-    top_hits_limit = st.slider('Top hits? How many documents to use for reranking. Larger is slower but higher quality', 10, 300, 200)
     context_lim = st.slider('Context limit? How many documents to use for answering from. Larger is slower but higher quality', 10, 300, 25)
-    use_query_exp = st.radio(
-        "(Experimental) use query expansion? Right now it just recommends queries",
-        ('yes', 'no'))
-    suggested_queries = st.slider('Number of suggested queries to use', 0, 10, 5)
 # def paraphrase(text, max_length=128):
 #     input_ids = queryexp_tokenizer.encode(text, return_tensors="pt", add_special_tokens=True)
@@ -180,7 +173,14 @@ def run_query(query):
 # """)
     limit = top_hits_limit or 100
     context_limit = context_lim or 10
-    contexts, orig_docs = search(query, limit=limit, strict=strict_mode == 'strict')
     if len(contexts) == 0 or not ''.join(contexts).strip():
         return st.markdown("""
         <div class="container-fluid">
@@ -197,8 +197,7 @@ def run_query(query):
         hits = {contexts[idx]: scores[idx] for idx in range(len(scores))}
         sorted_contexts = [k for k,v in sorted(hits.items(), key=lambda x: x[0], reverse=True)]
         context = '\n'.join(sorted_contexts[:context_limit])
-    else:
-        context = '\n'.join(contexts[:context_limit])
     results = []
     model_results = qa_model(question=query, context=context, top_k=10)
     for result in model_results:

 with st.expander("Settings (strictness, context limit, top hits)"):
     confidence_threshold = st.slider('Confidence threshold for answering questions? This number represents how confident the model should be in the answers it gives. The number is out of 100%', 0, 100, 1)
     use_reranking = st.radio(
         "Use Reranking? Reranking will rerank the top hits using semantic similarity of document and query.",
         ('yes', 'no'))
+    top_hits_limit = st.slider('Top hits? How many documents to use for reranking. Larger is slower but higher quality', 10, 300, 100)
     context_lim = st.slider('Context limit? How many documents to use for answering from. Larger is slower but higher quality', 10, 300, 25)
 # def paraphrase(text, max_length=128):
 #     input_ids = queryexp_tokenizer.encode(text, return_tensors="pt", add_special_tokens=True)
 # """)
     limit = top_hits_limit or 100
     context_limit = context_lim or 10
+    contexts_strict, orig_docs_strict = search(query, limit=limit, strict=True)
+    contexts_lenient, orig_docs_lenient = search(query, limit=limit, strict=False)
+    contexts = list(
+        set(contexts_strict + contexts_lenient)
+    )
+    orig_docs = orig_docs_strict + orig_docs_lenient
     if len(contexts) == 0 or not ''.join(contexts).strip():
         return st.markdown("""
         <div class="container-fluid">
         hits = {contexts[idx]: scores[idx] for idx in range(len(scores))}
         sorted_contexts = [k for k,v in sorted(hits.items(), key=lambda x: x[0], reverse=True)]
         context = '\n'.join(sorted_contexts[:context_limit])
     results = []
     model_results = qa_model(question=query, context=context, top_k=10)
     for result in model_results: