Abhilashvj commited on
Commit
26add68
1 Parent(s): 90b5d0f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -6
app.py CHANGED
@@ -48,7 +48,7 @@ preprocessor = PreProcessor(
48
  clean_whitespace=True,
49
  clean_header_footer=False,
50
  split_by="word",
51
- split_length=100,
52
  split_respect_sentence_boundary=True
53
  )
54
  file_type_classifier = FileTypeClassifier()
@@ -129,7 +129,7 @@ def complete(prompt):
129
  )
130
  return res['choices'][0]['text'].strip()
131
 
132
- def query(pipe, question, top_k_reader, top_k_retriever):
133
  # first we retrieve relevant items from Pinecone
134
  query_with_contexts, contexts = retrieve(question)
135
  return complete(query_with_contexts), contexts
@@ -216,7 +216,7 @@ if len(ALL_FILES) > 0:
216
  docs = indexing_pipeline_with_classification.run(file_paths=ALL_FILES, meta=META_DATA)["documents"]
217
  index_name = "qa_demo"
218
  # we will use batches of 64
219
- batch_size = 64
220
  # docs = docs['documents']
221
  with st.spinner(
222
  "🧠    Performing indexing of uplaoded documents... \n "
@@ -228,13 +228,13 @@ if len(ALL_FILES) > 0:
228
  batch = [doc.content for doc in docs[i:i_end]]
229
  # generate embeddings for batch
230
  try:
231
- res = openai.Embedding.create(input=texts, engine=embed_model)
232
  except:
233
  done = False
234
  while not done:
235
  sleep(5)
236
  try:
237
- res = openai.Embedding.create(input=texts, engine=embed_model)
238
  done = True
239
  except:
240
  pass
@@ -300,7 +300,7 @@ if run_pressed:
300
  ):
301
  try:
302
  st.session_state.results = query(
303
- pipe, question, top_k_reader=None, top_k_retriever=None
304
  )
305
  except JSONDecodeError as je:
306
  st.error("👓    An error occurred reading the results. Is the document store working?")
 
48
  clean_whitespace=True,
49
  clean_header_footer=False,
50
  split_by="word",
51
+ split_length=200,
52
  split_respect_sentence_boundary=True
53
  )
54
  file_type_classifier = FileTypeClassifier()
 
129
  )
130
  return res['choices'][0]['text'].strip()
131
 
132
+ def query(question, top_k_reader, top_k_retriever):
133
  # first we retrieve relevant items from Pinecone
134
  query_with_contexts, contexts = retrieve(question)
135
  return complete(query_with_contexts), contexts
 
216
  docs = indexing_pipeline_with_classification.run(file_paths=ALL_FILES, meta=META_DATA)["documents"]
217
  index_name = "qa_demo"
218
  # we will use batches of 64
219
+ batch_size = 200
220
  # docs = docs['documents']
221
  with st.spinner(
222
  "🧠    Performing indexing of uplaoded documents... \n "
 
228
  batch = [doc.content for doc in docs[i:i_end]]
229
  # generate embeddings for batch
230
  try:
231
+ res = openai.Embedding.create(input=batch, engine=embed_model)
232
  except:
233
  done = False
234
  while not done:
235
  sleep(5)
236
  try:
237
+ res = openai.Embedding.create(input=batch, engine=embed_model)
238
  done = True
239
  except:
240
  pass
 
300
  ):
301
  try:
302
  st.session_state.results = query(
303
+ question, top_k_reader=None, top_k_retriever=None
304
  )
305
  except JSONDecodeError as je:
306
  st.error("👓    An error occurred reading the results. Is the document store working?")