dinhquangson commited on
Commit
2b22ee8
1 Parent(s): a273ba0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -6
app.py CHANGED
@@ -185,7 +185,6 @@ def search(prompt: str):
185
  from haystack.components.generators import OpenAIGenerator
186
  from haystack.utils import Secret
187
  from haystack.components.builders import PromptBuilder
188
- from QueryMetadataExtractor import QueryMetadataExtractor
189
 
190
  start_time = time.time()
191
 
@@ -211,14 +210,12 @@ def search(prompt: str):
211
  model="meta-llama-3-8b-instruct",
212
  generation_kwargs = {"max_tokens": 512}
213
  )
214
- metadata_extractor = QueryMetadataExtractor()
215
 
216
  querying = Pipeline()
217
  querying.add_component("sparse_text_embedder", FastembedSparseTextEmbedder(model="Qdrant/bm42-all-minilm-l6-v2-attentions"))
218
  querying.add_component("dense_text_embedder", FastembedTextEmbedder(
219
  model="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", prefix="Đại diện cho câu này để tìm kiếm các đoạn văn có liên quan: ")
220
  )
221
- querying.add_component(instance=metadata_extractor, name="metadata_extractor")
222
  querying.add_component("retriever", QdrantHybridRetriever(document_store=document_store))
223
  querying.add_component("document_joiner", DocumentJoiner())
224
  querying.add_component("ranker", TransformersSimilarityRanker(model="BAAI/bge-m3"))
@@ -227,18 +224,15 @@ def search(prompt: str):
227
 
228
  querying.connect("sparse_text_embedder.sparse_embedding", "retriever.query_sparse_embedding")
229
  querying.connect("dense_text_embedder.embedding", "retriever.query_embedding")
230
- querying.connect("metadata_extractor.filters", "retriever.filters")
231
  querying.connect("retriever", "document_joiner")
232
  querying.connect("document_joiner", "ranker")
233
  querying.connect("ranker.documents", "prompt_builder.documents")
234
  querying.connect("prompt_builder", "llm")
235
  querying.debug=True
236
- metadata_fields = {"publish_date", "publisher", "document_type"}
237
  results = querying.run(
238
  {
239
  "dense_text_embedder": {"text": prompt},
240
  "sparse_text_embedder": {"text": prompt},
241
- "metadata_extractor": {"query": prompt, "metadata_fields": metadata_fields},
242
  "ranker": {"query": prompt},
243
  "prompt_builder": {"question": prompt}
244
  }
@@ -282,6 +276,15 @@ def truncate_text(text: str) -> str:
282
  else:
283
  return text[:3000]
284
 
 
 
 
 
 
 
 
 
 
285
  @app.post("/pdf2text/")
286
  async def convert_upload_file(file: UploadFile = File(...)):
287
  import pytesseract
 
185
  from haystack.components.generators import OpenAIGenerator
186
  from haystack.utils import Secret
187
  from haystack.components.builders import PromptBuilder
 
188
 
189
  start_time = time.time()
190
 
 
210
  model="meta-llama-3-8b-instruct",
211
  generation_kwargs = {"max_tokens": 512}
212
  )
 
213
 
214
  querying = Pipeline()
215
  querying.add_component("sparse_text_embedder", FastembedSparseTextEmbedder(model="Qdrant/bm42-all-minilm-l6-v2-attentions"))
216
  querying.add_component("dense_text_embedder", FastembedTextEmbedder(
217
  model="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", prefix="Đại diện cho câu này để tìm kiếm các đoạn văn có liên quan: ")
218
  )
 
219
  querying.add_component("retriever", QdrantHybridRetriever(document_store=document_store))
220
  querying.add_component("document_joiner", DocumentJoiner())
221
  querying.add_component("ranker", TransformersSimilarityRanker(model="BAAI/bge-m3"))
 
224
 
225
  querying.connect("sparse_text_embedder.sparse_embedding", "retriever.query_sparse_embedding")
226
  querying.connect("dense_text_embedder.embedding", "retriever.query_embedding")
 
227
  querying.connect("retriever", "document_joiner")
228
  querying.connect("document_joiner", "ranker")
229
  querying.connect("ranker.documents", "prompt_builder.documents")
230
  querying.connect("prompt_builder", "llm")
231
  querying.debug=True
 
232
  results = querying.run(
233
  {
234
  "dense_text_embedder": {"text": prompt},
235
  "sparse_text_embedder": {"text": prompt},
 
236
  "ranker": {"query": prompt},
237
  "prompt_builder": {"question": prompt}
238
  }
 
276
  else:
277
  return text[:3000]
278
 
279
+ @app.post("/query2metadata/")
280
+ async def extract_metadata_from_query(query: str):
281
+ from QueryMetadataExtractor import QueryMetadataExtractor
282
+
283
+ extractor = QueryMetadataExtractor()
284
+ metadata_fields = {"publisher", "publish_date", "document_type"}
285
+
286
+ result = extractor.run(query, metadata_fields)
287
+
288
  @app.post("/pdf2text/")
289
  async def convert_upload_file(file: UploadFile = File(...)):
290
  import pytesseract