Spaces:
Sleeping
Sleeping
dinhquangson
commited on
Commit
•
3dcae65
1
Parent(s):
3d73876
Update app.py
Browse files
app.py
CHANGED
@@ -139,6 +139,7 @@ async def create_upload_file(text_field: str, file: UploadFile = File(...), ocr:
|
|
139 |
indexing.add_component("sparse_doc_embedder", FastembedSparseDocumentEmbedder(model="facebook/spar-wiki-bm25-lexmodel-context-encoder"))
|
140 |
indexing.add_component("dense_doc_embedder", FastembedDocumentEmbedder(model="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"))
|
141 |
indexing.add_component("writer", DocumentWriter(document_store=document_store, policy=DuplicatePolicy.OVERWRITE))
|
|
|
142 |
indexing.connect("sparse_doc_embedder", "dense_doc_embedder")
|
143 |
indexing.connect("dense_doc_embedder", "writer")
|
144 |
|
@@ -161,11 +162,20 @@ def search(prompt: str):
|
|
161 |
)
|
162 |
from haystack.components.rankers import TransformersSimilarityRanker
|
163 |
from haystack.components.joiners import DocumentJoiner
|
164 |
-
|
|
|
|
|
165 |
start_time = time.time()
|
166 |
|
167 |
# Querying
|
168 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
169 |
querying = Pipeline()
|
170 |
querying.add_component("sparse_text_embedder", FastembedSparseTextEmbedder(model="facebook/spar-wiki-bm25-lexmodel-query-encoder'"))
|
171 |
querying.add_component("dense_text_embedder", FastembedTextEmbedder(
|
@@ -174,15 +184,20 @@ def search(prompt: str):
|
|
174 |
querying.add_component("retriever", QdrantHybridRetriever(document_store=document_store))
|
175 |
querying.add_component("document_joiner", DocumentJoiner())
|
176 |
querying.add_component("ranker", TransformersSimilarityRanker(model="BAAI/bge-m3"))
|
|
|
|
|
177 |
querying.connect("sparse_text_embedder.sparse_embedding", "retriever.query_sparse_embedding")
|
178 |
querying.connect("dense_text_embedder.embedding", "retriever.query_embedding")
|
179 |
querying.connect("retriever", "document_joiner")
|
180 |
querying.connect("document_joiner", "ranker")
|
|
|
181 |
|
182 |
results = querying.run(
|
183 |
{"dense_text_embedder": {"text": prompt},
|
184 |
"sparse_text_embedder": {"text": prompt},
|
185 |
-
"ranker": {"query": prompt}
|
|
|
|
|
186 |
)
|
187 |
|
188 |
|
|
|
139 |
indexing.add_component("sparse_doc_embedder", FastembedSparseDocumentEmbedder(model="facebook/spar-wiki-bm25-lexmodel-context-encoder"))
|
140 |
indexing.add_component("dense_doc_embedder", FastembedDocumentEmbedder(model="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"))
|
141 |
indexing.add_component("writer", DocumentWriter(document_store=document_store, policy=DuplicatePolicy.OVERWRITE))
|
142 |
+
|
143 |
indexing.connect("sparse_doc_embedder", "dense_doc_embedder")
|
144 |
indexing.connect("dense_doc_embedder", "writer")
|
145 |
|
|
|
162 |
)
|
163 |
from haystack.components.rankers import TransformersSimilarityRanker
|
164 |
from haystack.components.joiners import DocumentJoiner
|
165 |
+
from haystack.components.generators import OpenAIGenerator
|
166 |
+
from haystack.utils import Secret
|
167 |
+
|
168 |
start_time = time.time()
|
169 |
|
170 |
# Querying
|
171 |
|
172 |
+
|
173 |
+
generator = OpenAIGenerator(
|
174 |
+
api_key=Secret.from_env_var("OCTOAI_TOKEN"),
|
175 |
+
api_base_url="https://text.octoai.run/v1",
|
176 |
+
model="meta-llama-3-8b-instruct",
|
177 |
+
generation_kwargs = {"max_tokens": 512}
|
178 |
+
)
|
179 |
querying = Pipeline()
|
180 |
querying.add_component("sparse_text_embedder", FastembedSparseTextEmbedder(model="facebook/spar-wiki-bm25-lexmodel-query-encoder'"))
|
181 |
querying.add_component("dense_text_embedder", FastembedTextEmbedder(
|
|
|
184 |
querying.add_component("retriever", QdrantHybridRetriever(document_store=document_store))
|
185 |
querying.add_component("document_joiner", DocumentJoiner())
|
186 |
querying.add_component("ranker", TransformersSimilarityRanker(model="BAAI/bge-m3"))
|
187 |
+
querying.add_component("llm", generator)
|
188 |
+
|
189 |
querying.connect("sparse_text_embedder.sparse_embedding", "retriever.query_sparse_embedding")
|
190 |
querying.connect("dense_text_embedder.embedding", "retriever.query_embedding")
|
191 |
querying.connect("retriever", "document_joiner")
|
192 |
querying.connect("document_joiner", "ranker")
|
193 |
+
pipeline.connect("ranker", "llm")
|
194 |
|
195 |
results = querying.run(
|
196 |
{"dense_text_embedder": {"text": prompt},
|
197 |
"sparse_text_embedder": {"text": prompt},
|
198 |
+
"ranker": {"query": prompt},
|
199 |
+
"llm": {"query": prompt},
|
200 |
+
}
|
201 |
)
|
202 |
|
203 |
|