dinhquangson commited on
Commit
3dcae65
1 Parent(s): 3d73876

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -2
app.py CHANGED
@@ -139,6 +139,7 @@ async def create_upload_file(text_field: str, file: UploadFile = File(...), ocr:
139
  indexing.add_component("sparse_doc_embedder", FastembedSparseDocumentEmbedder(model="facebook/spar-wiki-bm25-lexmodel-context-encoder"))
140
  indexing.add_component("dense_doc_embedder", FastembedDocumentEmbedder(model="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"))
141
  indexing.add_component("writer", DocumentWriter(document_store=document_store, policy=DuplicatePolicy.OVERWRITE))
 
142
  indexing.connect("sparse_doc_embedder", "dense_doc_embedder")
143
  indexing.connect("dense_doc_embedder", "writer")
144
 
@@ -161,11 +162,20 @@ def search(prompt: str):
161
  )
162
  from haystack.components.rankers import TransformersSimilarityRanker
163
  from haystack.components.joiners import DocumentJoiner
164
-
 
 
165
  start_time = time.time()
166
 
167
  # Querying
168
 
 
 
 
 
 
 
 
169
  querying = Pipeline()
170
  querying.add_component("sparse_text_embedder", FastembedSparseTextEmbedder(model="facebook/spar-wiki-bm25-lexmodel-query-encoder'"))
171
  querying.add_component("dense_text_embedder", FastembedTextEmbedder(
@@ -174,15 +184,20 @@ def search(prompt: str):
174
  querying.add_component("retriever", QdrantHybridRetriever(document_store=document_store))
175
  querying.add_component("document_joiner", DocumentJoiner())
176
  querying.add_component("ranker", TransformersSimilarityRanker(model="BAAI/bge-m3"))
 
 
177
  querying.connect("sparse_text_embedder.sparse_embedding", "retriever.query_sparse_embedding")
178
  querying.connect("dense_text_embedder.embedding", "retriever.query_embedding")
179
  querying.connect("retriever", "document_joiner")
180
  querying.connect("document_joiner", "ranker")
 
181
 
182
  results = querying.run(
183
  {"dense_text_embedder": {"text": prompt},
184
  "sparse_text_embedder": {"text": prompt},
185
- "ranker": {"query": prompt}}
 
 
186
  )
187
 
188
 
 
139
  indexing.add_component("sparse_doc_embedder", FastembedSparseDocumentEmbedder(model="facebook/spar-wiki-bm25-lexmodel-context-encoder"))
140
  indexing.add_component("dense_doc_embedder", FastembedDocumentEmbedder(model="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"))
141
  indexing.add_component("writer", DocumentWriter(document_store=document_store, policy=DuplicatePolicy.OVERWRITE))
142
+
143
  indexing.connect("sparse_doc_embedder", "dense_doc_embedder")
144
  indexing.connect("dense_doc_embedder", "writer")
145
 
 
162
  )
163
  from haystack.components.rankers import TransformersSimilarityRanker
164
  from haystack.components.joiners import DocumentJoiner
165
+ from haystack.components.generators import OpenAIGenerator
166
+ from haystack.utils import Secret
167
+
168
  start_time = time.time()
169
 
170
  # Querying
171
 
172
+
173
+ generator = OpenAIGenerator(
174
+ api_key=Secret.from_env_var("OCTOAI_TOKEN"),
175
+ api_base_url="https://text.octoai.run/v1",
176
+ model="meta-llama-3-8b-instruct",
177
+ generation_kwargs = {"max_tokens": 512}
178
+ )
179
  querying = Pipeline()
180
  querying.add_component("sparse_text_embedder", FastembedSparseTextEmbedder(model="facebook/spar-wiki-bm25-lexmodel-query-encoder'"))
181
  querying.add_component("dense_text_embedder", FastembedTextEmbedder(
 
184
  querying.add_component("retriever", QdrantHybridRetriever(document_store=document_store))
185
  querying.add_component("document_joiner", DocumentJoiner())
186
  querying.add_component("ranker", TransformersSimilarityRanker(model="BAAI/bge-m3"))
187
+ querying.add_component("llm", generator)
188
+
189
  querying.connect("sparse_text_embedder.sparse_embedding", "retriever.query_sparse_embedding")
190
  querying.connect("dense_text_embedder.embedding", "retriever.query_embedding")
191
  querying.connect("retriever", "document_joiner")
192
  querying.connect("document_joiner", "ranker")
193
+ pipeline.connect("ranker", "llm")
194
 
195
  results = querying.run(
196
  {"dense_text_embedder": {"text": prompt},
197
  "sparse_text_embedder": {"text": prompt},
198
+ "ranker": {"query": prompt},
199
+ "llm": {"query": prompt},
200
+ }
201
  )
202
 
203