Spaces:
Sleeping
Sleeping
dinhquangson
commited on
Commit
•
4520e07
1
Parent(s):
9f7a757
Update app.py
Browse files
app.py
CHANGED
@@ -136,14 +136,30 @@ async def create_upload_file(text_field: str, file: UploadFile = File(...), ocr:
|
|
136 |
|
137 |
|
138 |
indexing = Pipeline()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
139 |
indexing.add_component("sparse_doc_embedder", FastembedSparseDocumentEmbedder(model="Qdrant/bm42-all-minilm-l6-v2-attentions"))
|
140 |
indexing.add_component("dense_doc_embedder", FastembedDocumentEmbedder(model="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"))
|
141 |
indexing.add_component("writer", DocumentWriter(document_store=document_store, policy=DuplicatePolicy.OVERWRITE))
|
|
|
142 |
|
|
|
|
|
|
|
|
|
143 |
indexing.connect("sparse_doc_embedder", "dense_doc_embedder")
|
144 |
indexing.connect("dense_doc_embedder", "writer")
|
145 |
|
146 |
-
indexing.run({"
|
147 |
end_time = time.time()
|
148 |
|
149 |
elapsed_time = end_time - start_time
|
@@ -213,7 +229,7 @@ def search(prompt: str):
|
|
213 |
{"dense_text_embedder": {"text": prompt},
|
214 |
"sparse_text_embedder": {"text": prompt},
|
215 |
"ranker": {"query": prompt},
|
216 |
-
"prompt_builder": {"
|
217 |
"llm": {"query": prompt},
|
218 |
}
|
219 |
)
|
|
|
136 |
|
137 |
|
138 |
indexing = Pipeline()
|
139 |
+
|
140 |
+
document_joiner = DocumentJoiner()
|
141 |
+
|
142 |
+
|
143 |
+
document_cleaner = DocumentCleaner()
|
144 |
+
|
145 |
+
document_splitter = DocumentSplitter(split_by="word", split_length=1000, split_overlap=0)
|
146 |
+
|
147 |
+
indexing.add_component("document_joiner", document_joiner)
|
148 |
+
indexing.add_component("document_cleaner", document_cleaner)
|
149 |
+
indexing.add_component("document_splitter", document_splitter)
|
150 |
indexing.add_component("sparse_doc_embedder", FastembedSparseDocumentEmbedder(model="Qdrant/bm42-all-minilm-l6-v2-attentions"))
|
151 |
indexing.add_component("dense_doc_embedder", FastembedDocumentEmbedder(model="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"))
|
152 |
indexing.add_component("writer", DocumentWriter(document_store=document_store, policy=DuplicatePolicy.OVERWRITE))
|
153 |
+
|
154 |
|
155 |
+
indexing.connect("document_joiner", "document_cleaner")
|
156 |
+
indexing.connect("document_cleaner", "document_splitter")
|
157 |
+
indexing.connect("document_splitter", "document_embedder")
|
158 |
+
|
159 |
indexing.connect("sparse_doc_embedder", "dense_doc_embedder")
|
160 |
indexing.connect("dense_doc_embedder", "writer")
|
161 |
|
162 |
+
indexing.run({"document_joiner": {"documents": documents}})
|
163 |
end_time = time.time()
|
164 |
|
165 |
elapsed_time = end_time - start_time
|
|
|
229 |
{"dense_text_embedder": {"text": prompt},
|
230 |
"sparse_text_embedder": {"text": prompt},
|
231 |
"ranker": {"query": prompt},
|
232 |
+
"prompt_builder": {"query": prompt},
|
233 |
"llm": {"query": prompt},
|
234 |
}
|
235 |
)
|