QDrantRAG9

Sleeping

App Files Files Community

dinhquangson commited on Jun 20

Commit

67cd4b7

•

1 Parent(s): cea66bb

Update app.py

Browse files

Files changed (1) hide show

app.py +88 -13

app.py CHANGED Viewed

@@ -22,6 +22,13 @@ from hybrid_searcher import HybridSearcher
 app = FastAPI()
 FILEPATH_PATTERN = "structured_data_doc.parquet"
 NUM_PROC = os.cpu_count()
@@ -117,28 +124,94 @@ async def create_upload_file(text_field: str, file: UploadFile = File(...)):
         )
     return {"filename": file.filename, "message": "Done"}
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
 @app.get("/search")
 def search(prompt: str):
     # Let's see what senators are saying about immigration policy
     hits = client2.search(
         collection_name="law",
         query_vector=model.encode(prompt).tolist(),
         limit=5
     )
     for hit in hits:
       print(hit.payload, "score:", hit.score)
     return hits
 @app.get("/download-database/")
 async def download_database():
     # Path to the database directory
     database_dir = join(os.getcwd(), 'database')
     # Path for the zip file
@@ -146,6 +219,12 @@ async def download_database():
     # Create a zip file of the database directory
     shutil.make_archive(zip_path.replace('.zip', ''), 'zip', database_dir)
     # Return the zip file as a response for download
     return FileResponse(zip_path, media_type='application/zip', filename='database.zip')
@@ -163,9 +242,7 @@ def neural_search(q: str, city: str, collection_name: str):
     elapsed_time = end_time - start_time
-    print(f"Execution time: {elapsed_time:.6f} seconds")
-    return {"result": neural_searcher.search(text=q, city=city)}
 @app.get("/hybrid_search")
 def hybrid_search(q: str, city: str, collection_name: str):
@@ -180,9 +257,7 @@ def hybrid_search(q: str, city: str, collection_name: str):
     elapsed_time = end_time - start_time
-    print(f"Execution time: {elapsed_time:.6f} seconds")
-    return {"result": hybrid_searcher.search(text=q, city=city)}
 @app.get("/")
 def api_home():

 app = FastAPI()
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
 FILEPATH_PATTERN = "structured_data_doc.parquet"
 NUM_PROC = os.cpu_count()
         )
     return {"filename": file.filename, "message": "Done"}
+@app.post("/uploadfile4hypersearch/")
+async def upload_file_4_hyper_search(collection_name: str, text_field: str, file: UploadFile = File(...)):
+    import time
+    start_time = time.time()
+    file_savePath =  join(temp_path,file.filename)
+    client2.set_model("sentence-transformers/all-MiniLM-L6-v2")
+    # comment this line to use dense vectors only
+    client2.set_sparse_model("prithivida/Splade_PP_en_v1")
+    with open(file_savePath,'wb') as f:
+        shutil.copyfileobj(file.file, f)
+    print(f"Uploaded complete!")
+    client2.recreate_collection(
+        collection_name=collection_name,
+        vectors_config=client2.get_fastembed_vector_params(),
+        # comment this line to use dense vectors only
+        sparse_vectors_config=client2.get_fastembed_sparse_vector_params(),
+    )
+    print(f"The collection is created complete!")
+    # Here you can save the file and do other operations as needed
+    if '.json' in file_savePath:
+        import json
+        metadata = []
+        documents = []
+        with open(file_savePath) as fd:
+            for line in fd:
+                obj = json.loads(line)
+                documents.append(obj.pop(text_field))
+                metadata.append(obj)
+        print(f"The documents and metadata is parsed complete!")
+        client2.add(
+            collection_name=collection_name,
+            documents=documents,
+            metadata=metadata,
+            parallel=0,  # Use all available CPU cores to encode data.
+            # Requires wrapping code into if __name__ == '__main__' block
+        )
+        print(f"The documents and metadata is upserted complete!")
+    else:
+        raise NotImplementedError("This feature is not supported yet")
+    end_time = time.time()
+    elapsed_time = end_time - start_time
+    return {"filename": file.filename, "message": "Done", "execution_time": elapsed_time}
 @app.get("/search")
 def search(prompt: str):
+    import time
+    start_time = time.time()
     # Let's see what senators are saying about immigration policy
     hits = client2.search(
         collection_name="law",
         query_vector=model.encode(prompt).tolist(),
         limit=5
     )
     for hit in hits:
       print(hit.payload, "score:", hit.score)
+    end_time = time.time()
+    elapsed_time = end_time - start_time
+    print(f"Execution time: {elapsed_time:.6f} seconds")
     return hits
 @app.get("/download-database/")
 async def download_database():
+    import time
+    start_time = time.time()
     # Path to the database directory
     database_dir = join(os.getcwd(), 'database')
     # Path for the zip file
     # Create a zip file of the database directory
     shutil.make_archive(zip_path.replace('.zip', ''), 'zip', database_dir)
+    end_time = time.time()
+    elapsed_time = end_time - start_time
+    print(f"Execution time: {elapsed_time:.6f} seconds")
     # Return the zip file as a response for download
     return FileResponse(zip_path, media_type='application/zip', filename='database.zip')
     elapsed_time = end_time - start_time
+    return {"result": neural_searcher.search(text=q, city=city), "execution_time": elapsed_time}
 @app.get("/hybrid_search")
 def hybrid_search(q: str, city: str, collection_name: str):
     elapsed_time = end_time - start_time
+    return {"result": hybrid_searcher.search(text=q, city=city), "execution_time": elapsed_time}
 @app.get("/")
 def api_home():