dinhquangson commited on
Commit
67cd4b7
1 Parent(s): cea66bb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +88 -13
app.py CHANGED
@@ -22,6 +22,13 @@ from hybrid_searcher import HybridSearcher
22
 
23
  app = FastAPI()
24
 
 
 
 
 
 
 
 
25
 
26
  FILEPATH_PATTERN = "structured_data_doc.parquet"
27
  NUM_PROC = os.cpu_count()
@@ -117,28 +124,94 @@ async def create_upload_file(text_field: str, file: UploadFile = File(...)):
117
  )
118
  return {"filename": file.filename, "message": "Done"}
119
 
120
- app.add_middleware(
121
- CORSMiddleware,
122
- allow_origins=["*"],
123
- allow_credentials=True,
124
- allow_methods=["*"],
125
- allow_headers=["*"],
126
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
 
 
 
 
 
128
  @app.get("/search")
129
  def search(prompt: str):
 
 
 
 
130
  # Let's see what senators are saying about immigration policy
131
  hits = client2.search(
132
  collection_name="law",
133
  query_vector=model.encode(prompt).tolist(),
134
  limit=5
135
  )
 
136
  for hit in hits:
137
  print(hit.payload, "score:", hit.score)
 
 
 
 
 
 
 
138
  return hits
139
 
140
  @app.get("/download-database/")
141
  async def download_database():
 
 
 
 
142
  # Path to the database directory
143
  database_dir = join(os.getcwd(), 'database')
144
  # Path for the zip file
@@ -146,6 +219,12 @@ async def download_database():
146
 
147
  # Create a zip file of the database directory
148
  shutil.make_archive(zip_path.replace('.zip', ''), 'zip', database_dir)
 
 
 
 
 
 
149
 
150
  # Return the zip file as a response for download
151
  return FileResponse(zip_path, media_type='application/zip', filename='database.zip')
@@ -163,9 +242,7 @@ def neural_search(q: str, city: str, collection_name: str):
163
 
164
  elapsed_time = end_time - start_time
165
 
166
- print(f"Execution time: {elapsed_time:.6f} seconds")
167
-
168
- return {"result": neural_searcher.search(text=q, city=city)}
169
 
170
  @app.get("/hybrid_search")
171
  def hybrid_search(q: str, city: str, collection_name: str):
@@ -180,9 +257,7 @@ def hybrid_search(q: str, city: str, collection_name: str):
180
 
181
  elapsed_time = end_time - start_time
182
 
183
- print(f"Execution time: {elapsed_time:.6f} seconds")
184
-
185
- return {"result": hybrid_searcher.search(text=q, city=city)}
186
 
187
  @app.get("/")
188
  def api_home():
 
22
 
23
  app = FastAPI()
24
 
25
+ app.add_middleware(
26
+ CORSMiddleware,
27
+ allow_origins=["*"],
28
+ allow_credentials=True,
29
+ allow_methods=["*"],
30
+ allow_headers=["*"],
31
+ )
32
 
33
  FILEPATH_PATTERN = "structured_data_doc.parquet"
34
  NUM_PROC = os.cpu_count()
 
124
  )
125
  return {"filename": file.filename, "message": "Done"}
126
 
127
+ @app.post("/uploadfile4hypersearch/")
128
+ async def upload_file_4_hyper_search(collection_name: str, text_field: str, file: UploadFile = File(...)):
129
+ import time
130
+
131
+ start_time = time.time()
132
+
133
+ file_savePath = join(temp_path,file.filename)
134
+ client2.set_model("sentence-transformers/all-MiniLM-L6-v2")
135
+
136
+ # comment this line to use dense vectors only
137
+ client2.set_sparse_model("prithivida/Splade_PP_en_v1")
138
+ with open(file_savePath,'wb') as f:
139
+ shutil.copyfileobj(file.file, f)
140
+
141
+ print(f"Uploaded complete!")
142
+
143
+ client2.recreate_collection(
144
+ collection_name=collection_name,
145
+ vectors_config=client2.get_fastembed_vector_params(),
146
+
147
+ # comment this line to use dense vectors only
148
+ sparse_vectors_config=client2.get_fastembed_sparse_vector_params(),
149
+ )
150
+
151
+ print(f"The collection is created complete!")
152
+
153
+ # Here you can save the file and do other operations as needed
154
+ if '.json' in file_savePath:
155
+ import json
156
+ metadata = []
157
+ documents = []
158
+
159
+ with open(file_savePath) as fd:
160
+ for line in fd:
161
+ obj = json.loads(line)
162
+ documents.append(obj.pop(text_field))
163
+ metadata.append(obj)
164
+
165
+ print(f"The documents and metadata is parsed complete!")
166
+
167
+ client2.add(
168
+ collection_name=collection_name,
169
+ documents=documents,
170
+ metadata=metadata,
171
+ parallel=0, # Use all available CPU cores to encode data.
172
+ # Requires wrapping code into if __name__ == '__main__' block
173
+ )
174
+
175
+ print(f"The documents and metadata is upserted complete!")
176
+ else:
177
+ raise NotImplementedError("This feature is not supported yet")
178
+
179
+ end_time = time.time()
180
 
181
+ elapsed_time = end_time - start_time
182
+
183
+ return {"filename": file.filename, "message": "Done", "execution_time": elapsed_time}
184
+
185
  @app.get("/search")
186
  def search(prompt: str):
187
+ import time
188
+
189
+ start_time = time.time()
190
+
191
  # Let's see what senators are saying about immigration policy
192
  hits = client2.search(
193
  collection_name="law",
194
  query_vector=model.encode(prompt).tolist(),
195
  limit=5
196
  )
197
+
198
  for hit in hits:
199
  print(hit.payload, "score:", hit.score)
200
+
201
+ end_time = time.time()
202
+
203
+ elapsed_time = end_time - start_time
204
+
205
+ print(f"Execution time: {elapsed_time:.6f} seconds")
206
+
207
  return hits
208
 
209
  @app.get("/download-database/")
210
  async def download_database():
211
+ import time
212
+
213
+ start_time = time.time()
214
+
215
  # Path to the database directory
216
  database_dir = join(os.getcwd(), 'database')
217
  # Path for the zip file
 
219
 
220
  # Create a zip file of the database directory
221
  shutil.make_archive(zip_path.replace('.zip', ''), 'zip', database_dir)
222
+
223
+ end_time = time.time()
224
+
225
+ elapsed_time = end_time - start_time
226
+
227
+ print(f"Execution time: {elapsed_time:.6f} seconds")
228
 
229
  # Return the zip file as a response for download
230
  return FileResponse(zip_path, media_type='application/zip', filename='database.zip')
 
242
 
243
  elapsed_time = end_time - start_time
244
 
245
+ return {"result": neural_searcher.search(text=q, city=city), "execution_time": elapsed_time}
 
 
246
 
247
  @app.get("/hybrid_search")
248
  def hybrid_search(q: str, city: str, collection_name: str):
 
257
 
258
  elapsed_time = end_time - start_time
259
 
260
+ return {"result": hybrid_searcher.search(text=q, city=city), "execution_time": elapsed_time}
 
 
261
 
262
  @app.get("/")
263
  def api_home():