Spaces:
Sleeping
Sleeping
Stefano Fiorucci
commited on
Commit
Β·
8314602
1
Parent(s):
5bbc60d
refactoring
Browse files- app.py +15 -32
- haystack_utils.py β backend_utils.py +12 -26
- config.py +1 -1
- data/questions/generated_questions.txt +0 -0
- data/{questions.txt β questions/selected_questions.txt} +0 -0
- frontend_utils.py +14 -0
app.py
CHANGED
@@ -9,20 +9,9 @@ from typing import List, Dict, Any, Tuple, Optional
|
|
9 |
from annotated_text import annotation
|
10 |
from urllib.parse import unquote
|
11 |
|
12 |
-
from
|
13 |
-
|
14 |
-
|
15 |
-
INDEX_DIR = 'data/index'
|
16 |
-
QUESTIONS_PATH = 'data/questions.txt'
|
17 |
-
RETRIEVER_MODEL = "sentence-transformers/multi-qa-mpnet-base-dot-v1"
|
18 |
-
RETRIEVER_MODEL_FORMAT = "sentence_transformers"
|
19 |
-
READER_MODEL = "deepset/roberta-base-squad2"
|
20 |
-
READER_CONFIG_THRESHOLD = 0.15
|
21 |
-
RETRIEVER_TOP_K = 10
|
22 |
-
READER_TOP_K = 5
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
|
27 |
def main():
|
28 |
|
@@ -35,13 +24,19 @@ def main():
|
|
35 |
set_state_if_absent('raw_json', None)
|
36 |
set_state_if_absent('random_question_requested', False)
|
37 |
|
38 |
-
# Small callback to reset the interface in case the text of the question changes
|
39 |
-
def reset_results(*args):
|
40 |
-
st.session_state.answer = None
|
41 |
-
st.session_state.results = None
|
42 |
-
st.session_state.raw_json = None
|
43 |
|
44 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
st.markdown(
|
46 |
"""
|
47 |
<style>
|
@@ -55,18 +50,6 @@ def main():
|
|
55 |
""",
|
56 |
unsafe_allow_html=True,
|
57 |
)
|
58 |
-
# Title
|
59 |
-
st.write("# Who killed Laura Palmer?")
|
60 |
-
st.write("### The first Twin Peaks Question Answering system!")
|
61 |
-
|
62 |
-
st.markdown("""
|
63 |
-
Ask any question about [Twin Peaks] (https://twinpeaks.fandom.com/wiki/Twin_Peaks)
|
64 |
-
and see if the AI ββcan find an answer...
|
65 |
-
|
66 |
-
*Note: do not use keywords, but full-fledged questions.*
|
67 |
-
""")
|
68 |
-
|
69 |
-
# Sidebar
|
70 |
st.sidebar.header("Who killed Laura Palmer?")
|
71 |
st.sidebar.image(
|
72 |
"https://upload.wikimedia.org/wikipedia/it/3/39/Twin-peaks-1990.jpg")
|
|
|
9 |
from annotated_text import annotation
|
10 |
from urllib.parse import unquote
|
11 |
|
12 |
+
from backend_utils import load_questions, query
|
13 |
+
from frontend_utils import set_state_if_absent, reset_results
|
14 |
+
from config import RETRIEVER_TOP_K, READER_TOP_K
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
def main():
|
17 |
|
|
|
24 |
set_state_if_absent('raw_json', None)
|
25 |
set_state_if_absent('random_question_requested', False)
|
26 |
|
|
|
|
|
|
|
|
|
|
|
27 |
|
28 |
+
# Header
|
29 |
+
st.write("# Who killed Laura Palmer?")
|
30 |
+
st.write("### The first Twin Peaks Question Answering system!")
|
31 |
+
st.markdown("""
|
32 |
+
Ask any question about [Twin Peaks] (https://twinpeaks.fandom.com/wiki/Twin_Peaks)
|
33 |
+
and see if the AI ββcan find an answer...
|
34 |
+
|
35 |
+
*Note: do not use keywords, but full-fledged questions.*
|
36 |
+
""")
|
37 |
+
|
38 |
+
# Sidebar
|
39 |
+
# sidebar style
|
40 |
st.markdown(
|
41 |
"""
|
42 |
<style>
|
|
|
50 |
""",
|
51 |
unsafe_allow_html=True,
|
52 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
st.sidebar.header("Who killed Laura Palmer?")
|
54 |
st.sidebar.image(
|
55 |
"https://upload.wikimedia.org/wikipedia/it/3/39/Twin-peaks-1990.jpg")
|
haystack_utils.py β backend_utils.py
RENAMED
@@ -8,6 +8,7 @@ import streamlit as st
|
|
8 |
from config import (INDEX_DIR, RETRIEVER_MODEL, RETRIEVER_MODEL_FORMAT,
|
9 |
READER_MODEL, READER_CONFIG_THRESHOLD, QUESTIONS_PATH)
|
10 |
|
|
|
11 |
@st.cache(hash_funcs={"builtins.SwigPyObject": lambda _: None},
|
12 |
allow_output_mutation=True)
|
13 |
def start_haystack():
|
@@ -33,32 +34,7 @@ def start_haystack():
|
|
33 |
pipe = ExtractiveQAPipeline(reader, retriever)
|
34 |
return pipe
|
35 |
|
36 |
-
def set_state_if_absent(key, value):
|
37 |
-
if key not in st.session_state:
|
38 |
-
st.session_state[key] = value
|
39 |
-
|
40 |
-
@st.cache()
|
41 |
-
def load_questions():
|
42 |
-
with open(QUESTIONS_PATH) as fin:
|
43 |
-
questions = [line.strip() for line in fin.readlines()
|
44 |
-
if not line.startswith('#')]
|
45 |
-
return questions
|
46 |
-
|
47 |
-
# # the following function is a wrapper for start_haystack,
|
48 |
-
# # which loads document store, retriever, reader and creates pipeline.
|
49 |
-
# # cached to make index and models load only at start
|
50 |
-
# @st.cache(hash_funcs={"builtins.SwigPyObject": lambda _: None},
|
51 |
-
# allow_output_mutation=True)
|
52 |
-
# def start_app():
|
53 |
-
# return start_haystack()
|
54 |
-
|
55 |
-
|
56 |
-
# @st.cache()
|
57 |
-
# def load_questions_wrapper():
|
58 |
-
# return load_questions()
|
59 |
-
|
60 |
pipe = start_haystack()
|
61 |
-
|
62 |
# the pipeline is not included as parameter of the following function,
|
63 |
# because it is difficult to cache
|
64 |
@st.cache(persist=True, allow_output_mutation=True)
|
@@ -67,4 +43,14 @@ def query(question: str, retriever_top_k: int = 10, reader_top_k: int = 5):
|
|
67 |
params = {"Retriever": {"top_k": retriever_top_k},
|
68 |
"Reader": {"top_k": reader_top_k}}
|
69 |
results = pipe.run(question, params=params)
|
70 |
-
return results
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
from config import (INDEX_DIR, RETRIEVER_MODEL, RETRIEVER_MODEL_FORMAT,
|
9 |
READER_MODEL, READER_CONFIG_THRESHOLD, QUESTIONS_PATH)
|
10 |
|
11 |
+
# cached to make index and models load only at start
|
12 |
@st.cache(hash_funcs={"builtins.SwigPyObject": lambda _: None},
|
13 |
allow_output_mutation=True)
|
14 |
def start_haystack():
|
|
|
34 |
pipe = ExtractiveQAPipeline(reader, retriever)
|
35 |
return pipe
|
36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
pipe = start_haystack()
|
|
|
38 |
# the pipeline is not included as parameter of the following function,
|
39 |
# because it is difficult to cache
|
40 |
@st.cache(persist=True, allow_output_mutation=True)
|
|
|
43 |
params = {"Retriever": {"top_k": retriever_top_k},
|
44 |
"Reader": {"top_k": reader_top_k}}
|
45 |
results = pipe.run(question, params=params)
|
46 |
+
return results
|
47 |
+
|
48 |
+
@st.cache()
|
49 |
+
def load_questions():
|
50 |
+
"""Load selected questions from file"""
|
51 |
+
with open(QUESTIONS_PATH) as fin:
|
52 |
+
questions = [line.strip() for line in fin.readlines()
|
53 |
+
if not line.startswith('#')]
|
54 |
+
return questions
|
55 |
+
|
56 |
+
|
config.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
|
2 |
INDEX_DIR = 'data/index'
|
3 |
-
QUESTIONS_PATH = 'data/questions.txt'
|
4 |
RETRIEVER_MODEL = "sentence-transformers/multi-qa-mpnet-base-dot-v1"
|
5 |
RETRIEVER_MODEL_FORMAT = "sentence_transformers"
|
6 |
READER_MODEL = "deepset/roberta-base-squad2"
|
|
|
1 |
|
2 |
INDEX_DIR = 'data/index'
|
3 |
+
QUESTIONS_PATH = 'data/questions/selected_questions.txt'
|
4 |
RETRIEVER_MODEL = "sentence-transformers/multi-qa-mpnet-base-dot-v1"
|
5 |
RETRIEVER_MODEL_FORMAT = "sentence_transformers"
|
6 |
READER_MODEL = "deepset/roberta-base-squad2"
|
data/questions/generated_questions.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/{questions.txt β questions/selected_questions.txt}
RENAMED
File without changes
|
frontend_utils.py
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
|
3 |
+
def set_state_if_absent(key, value):
|
4 |
+
if key not in st.session_state:
|
5 |
+
st.session_state[key] = value
|
6 |
+
|
7 |
+
# Small callback to reset the interface in case the text of the question changes
|
8 |
+
def reset_results(*args):
|
9 |
+
st.session_state.answer = None
|
10 |
+
st.session_state.results = None
|
11 |
+
st.session_state.raw_json = None
|
12 |
+
|
13 |
+
|
14 |
+
|