Spaces:
Sleeping
Sleeping
dinhquangson
commited on
Commit
•
67cd4b7
1
Parent(s):
cea66bb
Update app.py
Browse files
app.py
CHANGED
@@ -22,6 +22,13 @@ from hybrid_searcher import HybridSearcher
|
|
22 |
|
23 |
app = FastAPI()
|
24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
|
26 |
FILEPATH_PATTERN = "structured_data_doc.parquet"
|
27 |
NUM_PROC = os.cpu_count()
|
@@ -117,28 +124,94 @@ async def create_upload_file(text_field: str, file: UploadFile = File(...)):
|
|
117 |
)
|
118 |
return {"filename": file.filename, "message": "Done"}
|
119 |
|
120 |
-
app.
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
127 |
|
|
|
|
|
|
|
|
|
128 |
@app.get("/search")
|
129 |
def search(prompt: str):
|
|
|
|
|
|
|
|
|
130 |
# Let's see what senators are saying about immigration policy
|
131 |
hits = client2.search(
|
132 |
collection_name="law",
|
133 |
query_vector=model.encode(prompt).tolist(),
|
134 |
limit=5
|
135 |
)
|
|
|
136 |
for hit in hits:
|
137 |
print(hit.payload, "score:", hit.score)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
138 |
return hits
|
139 |
|
140 |
@app.get("/download-database/")
|
141 |
async def download_database():
|
|
|
|
|
|
|
|
|
142 |
# Path to the database directory
|
143 |
database_dir = join(os.getcwd(), 'database')
|
144 |
# Path for the zip file
|
@@ -146,6 +219,12 @@ async def download_database():
|
|
146 |
|
147 |
# Create a zip file of the database directory
|
148 |
shutil.make_archive(zip_path.replace('.zip', ''), 'zip', database_dir)
|
|
|
|
|
|
|
|
|
|
|
|
|
149 |
|
150 |
# Return the zip file as a response for download
|
151 |
return FileResponse(zip_path, media_type='application/zip', filename='database.zip')
|
@@ -163,9 +242,7 @@ def neural_search(q: str, city: str, collection_name: str):
|
|
163 |
|
164 |
elapsed_time = end_time - start_time
|
165 |
|
166 |
-
|
167 |
-
|
168 |
-
return {"result": neural_searcher.search(text=q, city=city)}
|
169 |
|
170 |
@app.get("/hybrid_search")
|
171 |
def hybrid_search(q: str, city: str, collection_name: str):
|
@@ -180,9 +257,7 @@ def hybrid_search(q: str, city: str, collection_name: str):
|
|
180 |
|
181 |
elapsed_time = end_time - start_time
|
182 |
|
183 |
-
|
184 |
-
|
185 |
-
return {"result": hybrid_searcher.search(text=q, city=city)}
|
186 |
|
187 |
@app.get("/")
|
188 |
def api_home():
|
|
|
22 |
|
23 |
app = FastAPI()
|
24 |
|
25 |
+
app.add_middleware(
|
26 |
+
CORSMiddleware,
|
27 |
+
allow_origins=["*"],
|
28 |
+
allow_credentials=True,
|
29 |
+
allow_methods=["*"],
|
30 |
+
allow_headers=["*"],
|
31 |
+
)
|
32 |
|
33 |
FILEPATH_PATTERN = "structured_data_doc.parquet"
|
34 |
NUM_PROC = os.cpu_count()
|
|
|
124 |
)
|
125 |
return {"filename": file.filename, "message": "Done"}
|
126 |
|
127 |
+
@app.post("/uploadfile4hypersearch/")
|
128 |
+
async def upload_file_4_hyper_search(collection_name: str, text_field: str, file: UploadFile = File(...)):
|
129 |
+
import time
|
130 |
+
|
131 |
+
start_time = time.time()
|
132 |
+
|
133 |
+
file_savePath = join(temp_path,file.filename)
|
134 |
+
client2.set_model("sentence-transformers/all-MiniLM-L6-v2")
|
135 |
+
|
136 |
+
# comment this line to use dense vectors only
|
137 |
+
client2.set_sparse_model("prithivida/Splade_PP_en_v1")
|
138 |
+
with open(file_savePath,'wb') as f:
|
139 |
+
shutil.copyfileobj(file.file, f)
|
140 |
+
|
141 |
+
print(f"Uploaded complete!")
|
142 |
+
|
143 |
+
client2.recreate_collection(
|
144 |
+
collection_name=collection_name,
|
145 |
+
vectors_config=client2.get_fastembed_vector_params(),
|
146 |
+
|
147 |
+
# comment this line to use dense vectors only
|
148 |
+
sparse_vectors_config=client2.get_fastembed_sparse_vector_params(),
|
149 |
+
)
|
150 |
+
|
151 |
+
print(f"The collection is created complete!")
|
152 |
+
|
153 |
+
# Here you can save the file and do other operations as needed
|
154 |
+
if '.json' in file_savePath:
|
155 |
+
import json
|
156 |
+
metadata = []
|
157 |
+
documents = []
|
158 |
+
|
159 |
+
with open(file_savePath) as fd:
|
160 |
+
for line in fd:
|
161 |
+
obj = json.loads(line)
|
162 |
+
documents.append(obj.pop(text_field))
|
163 |
+
metadata.append(obj)
|
164 |
+
|
165 |
+
print(f"The documents and metadata is parsed complete!")
|
166 |
+
|
167 |
+
client2.add(
|
168 |
+
collection_name=collection_name,
|
169 |
+
documents=documents,
|
170 |
+
metadata=metadata,
|
171 |
+
parallel=0, # Use all available CPU cores to encode data.
|
172 |
+
# Requires wrapping code into if __name__ == '__main__' block
|
173 |
+
)
|
174 |
+
|
175 |
+
print(f"The documents and metadata is upserted complete!")
|
176 |
+
else:
|
177 |
+
raise NotImplementedError("This feature is not supported yet")
|
178 |
+
|
179 |
+
end_time = time.time()
|
180 |
|
181 |
+
elapsed_time = end_time - start_time
|
182 |
+
|
183 |
+
return {"filename": file.filename, "message": "Done", "execution_time": elapsed_time}
|
184 |
+
|
185 |
@app.get("/search")
|
186 |
def search(prompt: str):
|
187 |
+
import time
|
188 |
+
|
189 |
+
start_time = time.time()
|
190 |
+
|
191 |
# Let's see what senators are saying about immigration policy
|
192 |
hits = client2.search(
|
193 |
collection_name="law",
|
194 |
query_vector=model.encode(prompt).tolist(),
|
195 |
limit=5
|
196 |
)
|
197 |
+
|
198 |
for hit in hits:
|
199 |
print(hit.payload, "score:", hit.score)
|
200 |
+
|
201 |
+
end_time = time.time()
|
202 |
+
|
203 |
+
elapsed_time = end_time - start_time
|
204 |
+
|
205 |
+
print(f"Execution time: {elapsed_time:.6f} seconds")
|
206 |
+
|
207 |
return hits
|
208 |
|
209 |
@app.get("/download-database/")
|
210 |
async def download_database():
|
211 |
+
import time
|
212 |
+
|
213 |
+
start_time = time.time()
|
214 |
+
|
215 |
# Path to the database directory
|
216 |
database_dir = join(os.getcwd(), 'database')
|
217 |
# Path for the zip file
|
|
|
219 |
|
220 |
# Create a zip file of the database directory
|
221 |
shutil.make_archive(zip_path.replace('.zip', ''), 'zip', database_dir)
|
222 |
+
|
223 |
+
end_time = time.time()
|
224 |
+
|
225 |
+
elapsed_time = end_time - start_time
|
226 |
+
|
227 |
+
print(f"Execution time: {elapsed_time:.6f} seconds")
|
228 |
|
229 |
# Return the zip file as a response for download
|
230 |
return FileResponse(zip_path, media_type='application/zip', filename='database.zip')
|
|
|
242 |
|
243 |
elapsed_time = end_time - start_time
|
244 |
|
245 |
+
return {"result": neural_searcher.search(text=q, city=city), "execution_time": elapsed_time}
|
|
|
|
|
246 |
|
247 |
@app.get("/hybrid_search")
|
248 |
def hybrid_search(q: str, city: str, collection_name: str):
|
|
|
257 |
|
258 |
elapsed_time = end_time - start_time
|
259 |
|
260 |
+
return {"result": hybrid_searcher.search(text=q, city=city), "execution_time": elapsed_time}
|
|
|
|
|
261 |
|
262 |
@app.get("/")
|
263 |
def api_home():
|