Henryk Borzymowski commited on
Commit
b6208a3
β€’
1 Parent(s): 8329090

support for switching between tasks and ui

Browse files
.env ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ OPENAI_KEY=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
2
+ EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L12-v2
3
+ GENERATIVE_MODEL=text-davinci-003
.streamlit/config.toml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ [theme]
2
+ primaryColor = "#E694FF"
3
+ backgroundColor = "#FFFFFF"
4
+ secondaryBackgroundColor = "#F0F0F0"
5
+ textColor = "#262730"
6
+ font = "sans-serif"
.vscode/launch.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "0.2.0",
3
+ "configurations": [
4
+ // {
5
+ // "name": "Python: Streamlit",
6
+ // "type": "python",
7
+ // "request": "launch",
8
+ // "program": "${workspaceFolder}/app.py",
9
+ // "args": ["--name", "My Opensearch Documentation Search"],
10
+ // "cwd": "${workspaceFolder}",
11
+ // "env": {
12
+ // "STREAMLIT_SERVER_ON": "1"
13
+ // },
14
+ // }
15
+ {
16
+ "name": "Python:Streamlit",
17
+ "type": "python",
18
+ "request": "launch",
19
+ "module": "streamlit",
20
+ "args": [
21
+ "run",
22
+ "${workspaceFolder}/app.py",
23
+ "--",
24
+ "--name",
25
+ "Document Insights: Extractive & Generative Methods",
26
+ ]
27
+ }
28
+ ]
29
+ }
.vscode/settings.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "python.pythonPath": "/Users/h.borzymowski/opt/anaconda3/envs/haystag_rag/bin/python3"
3
+ }
app.py CHANGED
@@ -1,4 +1,4 @@
1
-
2
  import streamlit as st
3
  import logging
4
  import os
@@ -7,79 +7,131 @@ from annotated_text import annotation
7
  from json import JSONDecodeError
8
  from markdown import markdown
9
  from utils.config import parser
10
- from utils.haystack import start_document_store, start_haystack_extractive, start_haystack_rag, query
11
  from utils.ui import reset_results, set_initial_state
 
12
 
13
  try:
14
  args = parser.parse_args()
15
- document_store = start_document_store(type = args.store)
16
- if args.task == 'extractive':
17
- pipeline = start_haystack_extractive(document_store)
18
- else:
19
- pipeline = start_haystack_rag(document_store)
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  set_initial_state()
22
 
23
- st.write('# '+args.name)
24
 
 
 
25
  # Search bar
26
- question = st.text_input("Ask a question", value=st.session_state.question, max_chars=100, on_change=reset_results)
27
- #question = "what is Pi?"
28
 
29
  run_pressed = st.button("Run")
30
- #run_pressed = True
31
 
32
  run_query = (
33
- run_pressed or question != st.session_state.question
34
  )
35
 
36
  # Get results for query
37
  if run_query and question:
38
- reset_results()
39
- st.session_state.question = question
40
- with st.spinner("πŸ”Ž    Running your pipeline"):
41
- try:
42
- st.session_state.results = query(pipeline, question)
43
- except JSONDecodeError as je:
44
- st.error(
45
- "πŸ‘“    An error occurred reading the results. Is the document store working?"
46
- )
47
- except Exception as e:
48
- logging.exception(e)
49
- st.error("🐞    An error occurred during the request.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
-
52
 
53
- if st.session_state.results:
54
- results = st.session_state.results
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
- if args.task == 'extractive':
57
- answers = results['answers']
58
- for count, answer in enumerate(answers):
59
- if answer.answer:
60
- text, context = answer.answer, answer.context
61
- start_idx = context.find(text)
62
- end_idx = start_idx + len(text)
63
- st.write(
64
- f" Answer: {markdown(context[:start_idx] + str(annotation(body=text, label='ANSWER', background='#964448', color='#ffffff')) + context[end_idx:])}",
65
- unsafe_allow_html=True,
66
- )
67
- else:
68
- st.info(
69
- "πŸ€”    Haystack is unsure whether any of the documents contain an answer to your question. Try to reformulate it!"
70
- )
71
- elif args.task == 'rag':
72
- st.write(f" Answer: {results['results'][0]}")
73
-
74
- # Extract and display information from the 'documents' list
75
- retrieved_documents = results['documents']
76
- st.subheader("Retriever Results:")
77
- for document in retrieved_documents:
78
- st.write(f"Document Name: {document.meta['name']}")
79
- st.write(f"Score: {document.score}")
80
- st.write(f"Text: {document.content}")
81
  except SystemExit as e:
82
- # This exception will be raised if --help or invalid command line arguments
83
- # are used. Currently streamlit prevents the program from exiting normally
84
- # so we have to do a hard exit.
85
- os._exit(e.code)
 
1
+ from operator import index
2
  import streamlit as st
3
  import logging
4
  import os
 
7
  from json import JSONDecodeError
8
  from markdown import markdown
9
  from utils.config import parser
10
+ from utils.haystack import start_document_store, query, initialize_pipeline
11
  from utils.ui import reset_results, set_initial_state
12
+ import pandas as pd
13
 
14
  try:
15
  args = parser.parse_args()
16
+ document_store = start_document_store(type=args.store)
17
+ st.set_page_config(
18
+ page_title="test",
19
+ layout="centered",
20
+ page_icon = (":shark:"),
21
+ menu_items={
22
+ 'Get Help': 'https://www.extremelycoolapp.com/help',
23
+ 'Report a bug': "https://www.extremelycoolapp.com/bug",
24
+ 'About': "# This is a header. This is an *extremely* cool app!"
25
+ })
26
+ st.sidebar.image("ml_logo.png", use_column_width=True)
27
+
28
+ # Sidebar for Task Selection
29
+ st.sidebar.header('Options:')
30
+ task_selection = st.sidebar.radio('Select the task:', ['Extractive', 'Generative'])
31
+
32
+ pipeline_rag = initialize_pipeline("rag", document_store)
33
+ pipeline_extractive = initialize_pipeline("extractive", document_store)
34
 
35
  set_initial_state()
36
 
37
+ st.write('# ' + args.name)
38
 
39
+ if "question" not in st.session_state:
40
+ st.session_state.question = ""
41
  # Search bar
42
+ question = st.text_input("", value=st.session_state.question, max_chars=100, on_change=reset_results)
 
43
 
44
  run_pressed = st.button("Run")
 
45
 
46
  run_query = (
47
+ run_pressed or question != st.session_state.question #or task_selection != st.session_state.task
48
  )
49
 
50
  # Get results for query
51
  if run_query and question:
52
+ if task_selection == 'Extractive':
53
+ reset_results()
54
+ st.session_state.question = question
55
+ with st.spinner("πŸ”Ž    Running your pipeline"):
56
+ try:
57
+ st.session_state.results_extractive = query(pipeline_extractive, question)
58
+ st.session_state.task = task_selection
59
+ except JSONDecodeError as je:
60
+ st.error(
61
+ "πŸ‘“    An error occurred reading the results. Is the document store working?"
62
+ )
63
+ except Exception as e:
64
+ logging.exception(e)
65
+ st.error("🐞    An error occurred during the request.")
66
+
67
+ elif task_selection == 'Generative':
68
+ reset_results()
69
+ st.session_state.question = question
70
+ with st.spinner("πŸ”Ž    Running your pipeline"):
71
+ try:
72
+ st.session_state.results_generative = query(pipeline_rag, question)
73
+ st.session_state.task = task_selection
74
+ except JSONDecodeError as je:
75
+ st.error(
76
+ "πŸ‘“    An error occurred reading the results. Is the document store working?"
77
+ )
78
+ except Exception as e:
79
+ logging.exception(e)
80
+ st.error("🐞    An error occurred during the request.")
81
+
82
+ # Display results
83
+ if (st.session_state.results_extractive or st.session_state.results_generative) and run_query:
84
+
85
+ # Handle Extractive Answers
86
+ if task_selection == 'Extractive':
87
+ results = st.session_state.results_extractive
88
 
89
+ st.subheader("Extracted Answers:")
90
 
91
+ if 'answers' in results:
92
+ answers = results['answers']
93
+ treshold = 0.2
94
+ higher_then_treshold = any(ans.score > treshold for ans in answers)
95
+ if not higher_then_treshold:
96
+ st.markdown(f"<span style='color:red'>Please note none of the answers achieved a score higher then {int(treshold) * 100}%. Which probably means that the desired answer is not in the searched documents.</span>", unsafe_allow_html=True)
97
+ for count, answer in enumerate(answers):
98
+ if answer.answer:
99
+ text, context = answer.answer, answer.context
100
+ start_idx = context.find(text)
101
+ end_idx = start_idx + len(text)
102
+ score = round(answer.score, 3)
103
+ st.markdown(f"**Answer {count + 1}:**")
104
+ st.markdown(
105
+ context[:start_idx] + str(annotation(body=text, label=f'SCORE {score}', background='#964448', color='#ffffff')) + context[end_idx:],
106
+ unsafe_allow_html=True,
107
+ )
108
+ else:
109
+ st.info(
110
+ "πŸ€” &nbsp;&nbsp; Haystack is unsure whether any of the documents contain an answer to your question. Try to reformulate it!"
111
+ )
112
+
113
+ # Handle Generative Answers
114
+ elif task_selection == 'Generative':
115
+ results = st.session_state.results_generative
116
+ st.subheader("Generated Answer:")
117
+ if 'results' in results:
118
+ st.markdown("**Answer:**")
119
+ st.write(results['results'][0])
120
+
121
+ # Handle Retrieved Documents
122
+ if 'documents' in results:
123
+ retrieved_documents = results['documents']
124
+ st.subheader("Retriever Results:")
125
+
126
+ data = []
127
+ for i, document in enumerate(retrieved_documents):
128
+ # Truncate the content
129
+ truncated_content = (document.content[:150] + '...') if len(document.content) > 150 else document.content
130
+ data.append([i + 1, document.meta['name'], truncated_content])
131
+
132
+ # Convert data to DataFrame and display using Streamlit
133
+ df = pd.DataFrame(data, columns=['Ranked Context', 'Document Name', 'Content'])
134
+ st.table(df)
135
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
  except SystemExit as e:
137
+ os._exit(e.code)
 
 
 
ml_logo.png ADDED
utils/__pycache__/config.cpython-310.pyc CHANGED
Binary files a/utils/__pycache__/config.cpython-310.pyc and b/utils/__pycache__/config.cpython-310.pyc differ
 
utils/__pycache__/haystack.cpython-310.pyc CHANGED
Binary files a/utils/__pycache__/haystack.cpython-310.pyc and b/utils/__pycache__/haystack.cpython-310.pyc differ
 
utils/__pycache__/ui.cpython-310.pyc CHANGED
Binary files a/utils/__pycache__/ui.cpython-310.pyc and b/utils/__pycache__/ui.cpython-310.pyc differ
 
utils/config.py CHANGED
@@ -9,7 +9,7 @@ parser = argparse.ArgumentParser(description='This app lists animals')
9
  document_store_choices = ('inmemory', 'weaviate', 'milvus', 'opensearch')
10
  task_choices = ('extractive', 'rag')
11
  parser.add_argument('--store', choices=document_store_choices, default='inmemory', help='DocumentStore selection (default: %(default)s)')
12
- parser.add_argument('--task', choices=task_choices, default='rag', help='Task selection (default: %(default)s)')
13
  parser.add_argument('--name', default="My Search App")
14
 
15
  model_configs = {
 
9
  document_store_choices = ('inmemory', 'weaviate', 'milvus', 'opensearch')
10
  task_choices = ('extractive', 'rag')
11
  parser.add_argument('--store', choices=document_store_choices, default='inmemory', help='DocumentStore selection (default: %(default)s)')
12
+ #parser.add_argument('--task', choices=task_choices, default='rag', help='Task selection (default: %(default)s)')
13
  parser.add_argument('--name', default="My Search App")
14
 
15
  model_configs = {
utils/haystack.py CHANGED
@@ -77,8 +77,14 @@ def start_haystack_rag(_document_store: BaseDocumentStore):
77
 
78
  return pipe
79
 
80
- @st.cache_data(show_spinner=True)
81
  def query(_pipeline, question):
82
  params = {}
83
  results = _pipeline.run(question, params=params)
84
- return results
 
 
 
 
 
 
 
77
 
78
  return pipe
79
 
80
+ #@st.cache_data(show_spinner=True)
81
  def query(_pipeline, question):
82
  params = {}
83
  results = _pipeline.run(question, params=params)
84
+ return results
85
+
86
+ def initialize_pipeline(task, document_store):
87
+ if task == 'extractive':
88
+ return start_haystack_extractive(document_store)
89
+ elif task == 'rag':
90
+ return start_haystack_rag(document_store)
utils/ui.py CHANGED
@@ -6,7 +6,11 @@ def set_state_if_absent(key, value):
6
 
7
  def set_initial_state():
8
  set_state_if_absent("question", "Ask something here?")
9
- set_state_if_absent("results", None)
 
 
10
 
11
  def reset_results(*args):
12
- st.session_state.results = None
 
 
 
6
 
7
  def set_initial_state():
8
  set_state_if_absent("question", "Ask something here?")
9
+ set_state_if_absent("results_extractive", None)
10
+ set_state_if_absent("results_generative", None)
11
+ set_state_if_absent("task", None)
12
 
13
  def reset_results(*args):
14
+ st.session_state.results_extractive = None
15
+ st.session_state.results_generative = None
16
+ st.session_state.task = None