Sourabh Zanwar sourabhzanwar commited on
Commit
3ccc981
Β·
unverified Β·
1 Parent(s): 60fc52c

added login, upload floater options(#8)

Browse files

Co-authored-by: Sourabh Zanwar <s.zanwar@reply.de>

.DS_Store CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
 
.gitignore CHANGED
@@ -1,4 +1,5 @@
1
  .env
2
  .vscode
3
  .idea
4
- *.pyc
 
 
1
  .env
2
  .vscode
3
  .idea
4
+ *.pyc
5
+ **/.DS_Store
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: Haystack Search Pipeline with Streamlit
3
  emoji: πŸ‘‘
4
  colorFrom: indigo
5
  colorTo: indigo
 
1
  ---
2
+ title: Document Insights - Extractive & Generative Methods
3
  emoji: πŸ‘‘
4
  colorFrom: indigo
5
  colorTo: indigo
app.py CHANGED
@@ -1,3 +1,7 @@
 
 
 
 
1
  from operator import index
2
  import streamlit as st
3
  import logging
@@ -12,17 +16,45 @@ from utils.ui import reset_results, set_initial_state
12
  import pandas as pd
13
  import haystack
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  # Whether the file upload should be enabled or not
16
  DISABLE_FILE_UPLOAD = bool(os.getenv("DISABLE_FILE_UPLOAD"))
 
 
 
 
 
 
 
 
 
 
 
17
  # Define a function to handle file uploads
18
  def upload_files():
19
- uploaded_files = st.sidebar.file_uploader(
20
- "upload", type=["pdf", "txt", "docx"], accept_multiple_files=True, label_visibility="hidden"
21
  )
22
  return uploaded_files
23
 
24
- # Define a function to process a single file
25
 
 
26
  def process_file(data_file, preprocesor, document_store):
27
  # read file and add content
28
  file_contents = data_file.read().decode("utf-8")
@@ -47,10 +79,34 @@ def process_file(data_file, preprocesor, document_store):
47
  except Exception as e:
48
  print(e)
49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  try:
51
  args = parser.parse_args()
52
  preprocesor = start_preprocessor_node()
53
  document_store = start_document_store(type=args.store)
 
54
  retriever = start_retriever(document_store)
55
  reader = start_reader()
56
  st.set_page_config(
@@ -65,151 +121,164 @@ try:
65
  )
66
  st.sidebar.image("ml_logo.png", use_column_width=True)
67
 
68
- # Sidebar for Task Selection
69
- st.sidebar.header('Options:')
70
 
71
- # OpenAI Key Input
72
- openai_key = st.sidebar.text_input("Enter OpenAI Key:", type="password")
73
 
74
- if openai_key:
75
- task_options = ['Extractive', 'Generative']
76
- else:
77
- task_options = ['Extractive']
78
 
79
- task_selection = st.sidebar.radio('Select the task:', task_options)
 
80
 
81
- # Check the task and initialize pipeline accordingly
82
- if task_selection == 'Extractive':
83
- pipeline_extractive = initialize_pipeline("extractive", document_store, retriever, reader)
84
- elif task_selection == 'Generative' and openai_key: # Check for openai_key to ensure user has entered it
85
- pipeline_rag = initialize_pipeline("rag", document_store, retriever, reader, openai_key=openai_key)
86
 
 
 
87
 
88
- set_initial_state()
 
89
 
90
- st.write('# ' + args.name)
 
 
 
91
 
 
92
 
93
- # File upload block
94
- if not DISABLE_FILE_UPLOAD:
95
- st.sidebar.write("## File Upload:")
96
- #data_files = st.sidebar.file_uploader(
97
- # "upload", type=["pdf", "txt", "docx"], accept_multiple_files=True, label_visibility="hidden"
98
- #)
99
- data_files = upload_files()
100
- if data_files is not None:
101
- for data_file in data_files:
102
- # Upload file
103
- if data_file:
104
- try:
105
- #raw_json = upload_doc(data_file)
106
- # Call the process_file function for each uploaded file
107
- if args.store == 'inmemory':
108
- processed_data = process_file(data_file, preprocesor, document_store)
109
- st.sidebar.write(str(data_file.name) + " &nbsp;&nbsp; βœ… ")
110
- except Exception as e:
111
- st.sidebar.write(str(data_file.name) + " &nbsp;&nbsp; ❌ ")
112
- st.sidebar.write("_This file could not be parsed, see the logs for more information._")
113
 
114
- if "question" not in st.session_state:
115
- st.session_state.question = ""
116
- # Search bar
117
- question = st.text_input("", value=st.session_state.question, max_chars=100, on_change=reset_results)
118
 
119
- run_pressed = st.button("Run")
120
 
121
- run_query = (
122
- run_pressed or question != st.session_state.question #or task_selection != st.session_state.task
123
- )
 
124
 
125
- # Get results for query
126
- if run_query and question:
127
- if task_selection == 'Extractive':
128
- reset_results()
129
- st.session_state.question = question
130
- with st.spinner("πŸ”Ž &nbsp;&nbsp; Running your pipeline"):
131
- try:
132
- st.session_state.results_extractive = query(pipeline_extractive, question)
133
- st.session_state.task = task_selection
134
- except JSONDecodeError as je:
135
- st.error(
136
- "πŸ‘“ &nbsp;&nbsp; An error occurred reading the results. Is the document store working?"
137
- )
138
- except Exception as e:
139
- logging.exception(e)
140
- st.error("🐞 &nbsp;&nbsp; An error occurred during the request.")
 
141
 
142
- elif task_selection == 'Generative':
143
- reset_results()
144
- st.session_state.question = question
145
- with st.spinner("πŸ”Ž &nbsp;&nbsp; Running your pipeline"):
146
- try:
147
- st.session_state.results_generative = query(pipeline_rag, question)
148
- st.session_state.task = task_selection
149
- except JSONDecodeError as je:
150
- st.error(
151
- "πŸ‘“ &nbsp;&nbsp; An error occurred reading the results. Is the document store working?"
152
- )
153
- except Exception as e:
154
- if "API key is invalid" in str(e):
155
- logging.exception(e)
156
- st.error("🐞 &nbsp;&nbsp; incorrect API key provided. You can find your API key at https://platform.openai.com/account/api-keys.")
157
- else:
 
 
 
 
 
 
 
 
 
 
 
 
158
  logging.exception(e)
159
  st.error("🐞 &nbsp;&nbsp; An error occurred during the request.")
160
- # Display results
161
- if (st.session_state.results_extractive or st.session_state.results_generative) and run_query:
162
 
163
- # Handle Extractive Answers
164
- if task_selection == 'Extractive':
165
- results = st.session_state.results_extractive
166
-
167
- st.subheader("Extracted Answers:")
168
-
169
- if 'answers' in results:
170
- answers = results['answers']
171
- treshold = 0.2
172
- higher_then_treshold = any(ans.score > treshold for ans in answers)
173
- if not higher_then_treshold:
174
- st.markdown(f"<span style='color:red'>Please note none of the answers achieved a score higher then {int(treshold) * 100}%. Which probably means that the desired answer is not in the searched documents.</span>", unsafe_allow_html=True)
175
- for count, answer in enumerate(answers):
176
- if answer.answer:
177
- text, context = answer.answer, answer.context
178
- start_idx = context.find(text)
179
- end_idx = start_idx + len(text)
180
- score = round(answer.score, 3)
181
- st.markdown(f"**Answer {count + 1}:**")
182
- st.markdown(
183
- context[:start_idx] + str(annotation(body=text, label=f'SCORE {score}', background='#964448', color='#ffffff')) + context[end_idx:],
184
- unsafe_allow_html=True,
185
- )
186
- else:
187
- st.info(
188
- "πŸ€” &nbsp;&nbsp; Haystack is unsure whether any of the documents contain an answer to your question. Try to reformulate it!"
189
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
 
191
- # Handle Generative Answers
192
- elif task_selection == 'Generative':
193
- results = st.session_state.results_generative
194
- st.subheader("Generated Answer:")
195
- if 'results' in results:
196
- st.markdown("**Answer:**")
197
- st.write(results['results'][0])
198
-
199
- # Handle Retrieved Documents
200
- if 'documents' in results:
201
- retrieved_documents = results['documents']
202
- st.subheader("Retriever Results:")
203
-
204
- data = []
205
- for i, document in enumerate(retrieved_documents):
206
- # Truncate the content
207
- truncated_content = (document.content[:150] + '...') if len(document.content) > 150 else document.content
208
- data.append([i + 1, document.meta['name'], truncated_content])
209
-
210
- # Convert data to DataFrame and display using Streamlit
211
- df = pd.DataFrame(data, columns=['Ranked Context', 'Document Name', 'Content'])
212
- st.table(df)
213
 
 
 
 
214
  except SystemExit as e:
215
- os._exit(e.code)
 
1
+ from utils.check_pydantic_version import use_pydantic_v1
2
+ use_pydantic_v1() #This function has to be run before importing haystack. as haystack requires pydantic v1 to run
3
+
4
+
5
  from operator import index
6
  import streamlit as st
7
  import logging
 
16
  import pandas as pd
17
  import haystack
18
 
19
+ from datetime import datetime
20
+ import streamlit.components.v1 as components
21
+ import streamlit_authenticator as stauth
22
+ import pickle
23
+
24
+ from streamlit_modal import Modal
25
+ import numpy as np
26
+
27
+
28
+
29
+ names = ['mlreply']
30
+ usernames = ['mlreply']
31
+ with open('hashed_password.pkl','rb') as f:
32
+ hashed_passwords = pickle.load(f)
33
+
34
+
35
+
36
  # Whether the file upload should be enabled or not
37
  DISABLE_FILE_UPLOAD = bool(os.getenv("DISABLE_FILE_UPLOAD"))
38
+
39
+
40
+ def show_documents_list(retrieved_documents):
41
+ data = []
42
+ for i, document in enumerate(retrieved_documents):
43
+ data.append([document.meta['name']])
44
+ df = pd.DataFrame(data, columns=['Uploaded Document Name'])
45
+ df.drop_duplicates(subset=['Uploaded Document Name'], inplace=True)
46
+ df.index = np.arange(1, len(df) + 1)
47
+ return df
48
+
49
  # Define a function to handle file uploads
50
  def upload_files():
51
+ uploaded_files = upload_container.file_uploader(
52
+ "upload", type=["pdf", "txt", "docx"], accept_multiple_files=True, label_visibility="hidden", key=1
53
  )
54
  return uploaded_files
55
 
 
56
 
57
+ # Define a function to process a single file
58
  def process_file(data_file, preprocesor, document_store):
59
  # read file and add content
60
  file_contents = data_file.read().decode("utf-8")
 
79
  except Exception as e:
80
  print(e)
81
 
82
+
83
+ # Define a function to upload the documents to haystack document store
84
+ def upload_document():
85
+ if data_files is not None:
86
+ for data_file in data_files:
87
+ # Upload file
88
+ if data_file:
89
+ try:
90
+ #raw_json = upload_doc(data_file)
91
+ # Call the process_file function for each uploaded file
92
+ if args.store == 'inmemory':
93
+ processed_data = process_file(data_file, preprocesor, document_store)
94
+ #upload_container.write(str(data_file.name) + " &nbsp;&nbsp; βœ… ")
95
+ except Exception as e:
96
+ upload_container.write(str(data_file.name) + " &nbsp;&nbsp; ❌ ")
97
+ upload_container.write("_This file could not be parsed, see the logs for more information._")
98
+
99
+ # Define a function to reset the documents in haystack document store
100
+ def reset_documents():
101
+ print('\nReseting documents list at ' + str(datetime.now()) + '\n')
102
+ st.session_state.data_files = None
103
+ document_store.delete_documents()
104
+
105
  try:
106
  args = parser.parse_args()
107
  preprocesor = start_preprocessor_node()
108
  document_store = start_document_store(type=args.store)
109
+ document_store.get_all_documents()
110
  retriever = start_retriever(document_store)
111
  reader = start_reader()
112
  st.set_page_config(
 
121
  )
122
  st.sidebar.image("ml_logo.png", use_column_width=True)
123
 
124
+ authenticator = stauth.Authenticate(names, usernames, hashed_passwords, "document_search", "random_text", cookie_expiry_days=1)
 
125
 
126
+ name, authentication_status, username = authenticator.login("Login", "main")
 
127
 
128
+ if authentication_status == False:
129
+ st.error("Username/Password is incorrect")
 
 
130
 
131
+ if authentication_status == None:
132
+ st.warning("Please enter your username and password")
133
 
134
+ if authentication_status:
 
 
 
 
135
 
136
+ # Sidebar for Task Selection
137
+ st.sidebar.header('Options:')
138
 
139
+ # OpenAI Key Input
140
+ openai_key = st.sidebar.text_input("Enter LLM-authorization Key:", type="password")
141
 
142
+ if openai_key:
143
+ task_options = ['Extractive', 'Generative']
144
+ else:
145
+ task_options = ['Extractive']
146
 
147
+ task_selection = st.sidebar.radio('Select the task:', task_options)
148
 
149
+ # Check the task and initialize pipeline accordingly
150
+ if task_selection == 'Extractive':
151
+ pipeline_extractive = initialize_pipeline("extractive", document_store, retriever, reader)
152
+ elif task_selection == 'Generative' and openai_key: # Check for openai_key to ensure user has entered it
153
+ pipeline_rag = initialize_pipeline("rag", document_store, retriever, reader, openai_key=openai_key)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
 
 
 
 
 
155
 
156
+ set_initial_state()
157
 
158
+ modal = Modal("Manage Files", key="demo-modal")
159
+ open_modal = st.sidebar.button("Manage Files", use_container_width=True)
160
+ if open_modal:
161
+ modal.open()
162
 
163
+ st.write('# ' + args.name)
164
+ if modal.is_open():
165
+ with modal.container():
166
+ if not DISABLE_FILE_UPLOAD:
167
+ upload_container = st.container()
168
+ data_files = upload_files()
169
+ upload_document()
170
+ st.session_state.sidebar_state = 'collapsed'
171
+ st.table(show_documents_list(document_store.get_all_documents()))
172
+
173
+ # File upload block
174
+ # if not DISABLE_FILE_UPLOAD:
175
+ # upload_container = st.sidebar.container()
176
+ # upload_container.write("## File Upload:")
177
+ # data_files = upload_files()
178
+ # Button to update files in the documentStore
179
+ # upload_container.button('Upload Files', on_click=upload_document, args=())
180
 
181
+ # Button to reset the documents in DocumentStore
182
+ st.sidebar.button("Reset documents", on_click=reset_documents, args=(), use_container_width=True)
183
+
184
+ if "question" not in st.session_state:
185
+ st.session_state.question = ""
186
+ # Search bar
187
+ question = st.text_input("Question", value=st.session_state.question, max_chars=100, on_change=reset_results, label_visibility="hidden")
188
+
189
+ run_pressed = st.button("Run")
190
+
191
+ run_query = (
192
+ run_pressed or question != st.session_state.question #or task_selection != st.session_state.task
193
+ )
194
+
195
+ # Get results for query
196
+ if run_query and question:
197
+ if task_selection == 'Extractive':
198
+ reset_results()
199
+ st.session_state.question = question
200
+ with st.spinner("πŸ”Ž &nbsp;&nbsp; Running your pipeline"):
201
+ try:
202
+ st.session_state.results_extractive = query(pipeline_extractive, question)
203
+ st.session_state.task = task_selection
204
+ except JSONDecodeError as je:
205
+ st.error(
206
+ "πŸ‘“ &nbsp;&nbsp; An error occurred reading the results. Is the document store working?"
207
+ )
208
+ except Exception as e:
209
  logging.exception(e)
210
  st.error("🐞 &nbsp;&nbsp; An error occurred during the request.")
 
 
211
 
212
+ elif task_selection == 'Generative':
213
+ reset_results()
214
+ st.session_state.question = question
215
+ with st.spinner("πŸ”Ž &nbsp;&nbsp; Running your pipeline"):
216
+ try:
217
+ st.session_state.results_generative = query(pipeline_rag, question)
218
+ st.session_state.task = task_selection
219
+ except JSONDecodeError as je:
220
+ st.error(
221
+ "πŸ‘“ &nbsp;&nbsp; An error occurred reading the results. Is the document store working?"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
222
  )
223
+ except Exception as e:
224
+ if "API key is invalid" in str(e):
225
+ logging.exception(e)
226
+ st.error("🐞 &nbsp;&nbsp; incorrect API key provided. You can find your API key at https://platform.openai.com/account/api-keys.")
227
+ else:
228
+ logging.exception(e)
229
+ st.error("🐞 &nbsp;&nbsp; An error occurred during the request.")
230
+ # Display results
231
+ if (st.session_state.results_extractive or st.session_state.results_generative) and run_query:
232
+
233
+ # Handle Extractive Answers
234
+ if task_selection == 'Extractive':
235
+ results = st.session_state.results_extractive
236
+
237
+ st.subheader("Extracted Answers:")
238
+
239
+ if 'answers' in results:
240
+ answers = results['answers']
241
+ treshold = 0.2
242
+ higher_then_treshold = any(ans.score > treshold for ans in answers)
243
+ if not higher_then_treshold:
244
+ st.markdown(f"<span style='color:red'>Please note none of the answers achieved a score higher then {int(treshold) * 100}%. Which probably means that the desired answer is not in the searched documents.</span>", unsafe_allow_html=True)
245
+ for count, answer in enumerate(answers):
246
+ if answer.answer:
247
+ text, context = answer.answer, answer.context
248
+ start_idx = context.find(text)
249
+ end_idx = start_idx + len(text)
250
+ score = round(answer.score, 3)
251
+ st.markdown(f"**Answer {count + 1}:**")
252
+ st.markdown(
253
+ context[:start_idx] + str(annotation(body=text, label=f'SCORE {score}', background='#964448', color='#ffffff')) + context[end_idx:],
254
+ unsafe_allow_html=True,
255
+ )
256
+ else:
257
+ st.info(
258
+ "πŸ€” &nbsp;&nbsp; Haystack is unsure whether any of the documents contain an answer to your question. Try to reformulate it!"
259
+ )
260
+
261
+ # Handle Generative Answers
262
+ elif task_selection == 'Generative':
263
+ results = st.session_state.results_generative
264
+ st.subheader("Generated Answer:")
265
+ if 'results' in results:
266
+ st.markdown("**Answer:**")
267
+ st.write(results['results'][0])
268
+
269
+ # Handle Retrieved Documents
270
+ if 'documents' in results:
271
+ retrieved_documents = results['documents']
272
+ st.subheader("Retriever Results:")
273
 
274
+ data = []
275
+ for i, document in enumerate(retrieved_documents):
276
+ # Truncate the content
277
+ truncated_content = (document.content[:150] + '...') if len(document.content) > 150 else document.content
278
+ data.append([i + 1, document.meta['name'], truncated_content])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
279
 
280
+ # Convert data to DataFrame and display using Streamlit
281
+ df = pd.DataFrame(data, columns=['Ranked Context', 'Document Name', 'Content'])
282
+ st.table(df)
283
  except SystemExit as e:
284
+ os._exit(e.code)
generate_keys.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+
3
+ import pickle
4
+ from pathlib import Path
5
+
6
+ import streamlit_authenticator as stauth
7
+
8
+ names = ['mlreply']
9
+ usernames = ['mlreply']
10
+ passwords = ['mlreply1']
11
+
12
+ hashed_passwords = stauth.Hasher((passwords)).generate()
13
+
14
+ with open('hashed_password.pkl','wb') as f:
15
+ pickle.dump(hashed_passwords, f)
hashed_password.pkl ADDED
Binary file (78 Bytes). View file
 
requirements.txt CHANGED
@@ -1,7 +1,10 @@
 
1
  safetensors==0.3.3.post1
2
- farm-haystack[inference,weaviate,opensearch]==1.20.0
3
  milvus-haystack
4
  streamlit==1.23.0
 
 
5
  markdown
6
  st-annotated-text
7
- datasets
 
1
+ scikit-learn==1.3.2
2
  safetensors==0.3.3.post1
3
+ farm-haystack[inference,weaviate,opensearch,file-conversion,pdf]==1.20.0
4
  milvus-haystack
5
  streamlit==1.23.0
6
+ streamlit-authenticator==0.1.5
7
+ streamlit_modal
8
  markdown
9
  st-annotated-text
10
+ datasets
utils/check_pydantic_version.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pydantic
2
+ import os
3
+ import fileinput
4
+
5
+ def replace_string_in_files(folder_path, old_str, new_str):
6
+ for subdir, dirs, files in os.walk(folder_path):
7
+ for file in files:
8
+ file_path = os.path.join(subdir, file)
9
+
10
+ # Check if the file is a text file (you can modify this condition based on your needs)
11
+ if file.endswith(".txt") or file.endswith(".py"):
12
+ # Open the file in place for editing
13
+ with fileinput.FileInput(file_path, inplace=True) as f:
14
+ for line in f:
15
+ # Replace the old string with the new string
16
+ print(line.replace(old_str, new_str), end='')
17
+
18
+
19
+ def use_pydantic_v1():
20
+ module_file_path = pydantic.__file__
21
+ module_file_path = module_file_path.split('pydantic')[0] + 'haystack'
22
+ with open(module_file_path+'/schema.py','r') as f:
23
+ haystack_schema_file = f.read()
24
+
25
+ if 'from pydantic.v1' not in haystack_schema_file:
26
+ replace_string_in_files(module_file_path, 'from pydantic', 'from pydantic.v1')
utils/config.py CHANGED
@@ -8,12 +8,14 @@ parser = argparse.ArgumentParser(description='This app lists animals')
8
 
9
  document_store_choices = ('inmemory', 'weaviate', 'milvus', 'opensearch')
10
  parser.add_argument('--store', choices=document_store_choices, default='inmemory', help='DocumentStore selection (default: %(default)s)')
11
- parser.add_argument('--name', default="My Search App")
12
 
13
  model_configs = {
14
  'EMBEDDING_MODEL': os.getenv("EMBEDDING_MODEL", "sentence-transformers/all-MiniLM-L12-v2"),
15
  'GENERATIVE_MODEL': os.getenv("GENERATIVE_MODEL", "gpt-4"),
16
- 'EXTRACTIVE_MODEL': os.getenv("EXTRACTIVE_MODEL", "deepset/roberta-base-squad2"),
 
 
17
  'OPENAI_KEY': os.getenv("OPENAI_KEY"),
18
  'COHERE_KEY': os.getenv("COHERE_KEY"),
19
  }
 
8
 
9
  document_store_choices = ('inmemory', 'weaviate', 'milvus', 'opensearch')
10
  parser.add_argument('--store', choices=document_store_choices, default='inmemory', help='DocumentStore selection (default: %(default)s)')
11
+ parser.add_argument('--name', default="Document Insights: Extractive & Generative Methods")
12
 
13
  model_configs = {
14
  'EMBEDDING_MODEL': os.getenv("EMBEDDING_MODEL", "sentence-transformers/all-MiniLM-L12-v2"),
15
  'GENERATIVE_MODEL': os.getenv("GENERATIVE_MODEL", "gpt-4"),
16
+ #'EXTRACTIVE_MODEL': os.getenv("EXTRACTIVE_MODEL", "deepset/roberta-base-squad2"),
17
+ 'EXTRACTIVE_MODEL': os.getenv("EXTRACTIVE_MODEL", "deepset/gelectra-large-germanquad"),
18
+ #'EXTRACTIVE_MODEL': os.getenv("EXTRACTIVE_MODEL", "MachineLearningReply/bert-base-german-legal-qa"),
19
  'OPENAI_KEY': os.getenv("OPENAI_KEY"),
20
  'COHERE_KEY': os.getenv("COHERE_KEY"),
21
  }
utils/haystack.py CHANGED
@@ -6,6 +6,7 @@ from haystack.schema import Answer
6
  from haystack.document_stores import BaseDocumentStore
7
  from haystack.document_stores import InMemoryDocumentStore, OpenSearchDocumentStore, WeaviateDocumentStore
8
  from haystack.nodes import EmbeddingRetriever, FARMReader, PromptNode, PreProcessor
 
9
  from milvus_haystack import MilvusDocumentStore
10
  #Use this file to set up your Haystack pipeline and querying
11
 
@@ -99,7 +100,8 @@ def start_haystack_extractive(_document_store: BaseDocumentStore, _retriever: Em
99
  def start_haystack_rag(_document_store: BaseDocumentStore, _retriever: EmbeddingRetriever, openai_key):
100
  prompt_node = PromptNode(default_prompt_template="deepset/question-answering",
101
  model_name_or_path=model_configs['GENERATIVE_MODEL'],
102
- api_key=openai_key)
 
103
  pipe = Pipeline()
104
 
105
  pipe.add_node(component=_retriever, name="Retriever", inputs=["Query"])
@@ -118,3 +120,5 @@ def initialize_pipeline(task, document_store, retriever, reader, openai_key = ""
118
  return start_haystack_extractive(document_store, retriever, reader)
119
  elif task == 'rag':
120
  return start_haystack_rag(document_store, retriever, openai_key)
 
 
 
6
  from haystack.document_stores import BaseDocumentStore
7
  from haystack.document_stores import InMemoryDocumentStore, OpenSearchDocumentStore, WeaviateDocumentStore
8
  from haystack.nodes import EmbeddingRetriever, FARMReader, PromptNode, PreProcessor
9
+ #from haystack.nodes import TextConverter, FileTypeClassifier, PDFToTextConverter
10
  from milvus_haystack import MilvusDocumentStore
11
  #Use this file to set up your Haystack pipeline and querying
12
 
 
100
  def start_haystack_rag(_document_store: BaseDocumentStore, _retriever: EmbeddingRetriever, openai_key):
101
  prompt_node = PromptNode(default_prompt_template="deepset/question-answering",
102
  model_name_or_path=model_configs['GENERATIVE_MODEL'],
103
+ api_key=openai_key,
104
+ max_length=500)
105
  pipe = Pipeline()
106
 
107
  pipe.add_node(component=_retriever, name="Retriever", inputs=["Query"])
 
120
  return start_haystack_extractive(document_store, retriever, reader)
121
  elif task == 'rag':
122
  return start_haystack_rag(document_store, retriever, openai_key)
123
+
124
+