Spaces:

daneshjoy
/

idr

Runtime error

App Files Files Community

daneshjoy commited on Dec 30, 2022

Commit

06651f1

•

1 Parent(s): 7c3d9d1

using prepared doc_store

Browse files

Files changed (4) hide show

.gitignore +1 -0
app.py +5 -79
doc_store.zip +3 -0
requirements.txt +1 -1

.gitignore CHANGED Viewed

	@@ -1 +1,2 @@
1	__pycache__/


1	__pycache__/
2	+ data/

app.py CHANGED Viewed

@@ -1,21 +1,14 @@
 import os
 import streamlit as st
-from haystack.document_stores import FAISSDocumentStore
-from haystack.utils import convert_files_to_docs, fetch_archive_from_http, clean_wiki_text
-from haystack.nodes import DensePassageRetriever
-from haystack.utils import print_documents, print_answers
-from haystack.pipelines import DocumentSearchPipeline
-from haystack.nodes import Seq2SeqGenerator
-from haystack.pipelines import GenerativeQAPipeline
-from haystack.utils import convert_files_to_docs, clean_wiki_text
 from lfqa import prepare, answer
-doc_dir = "./data/wiki_gameofthrones_txt12"
-sql_file = './faiss_doc_store.db'
-faiss_file = './faiss_index.faiss'
 # Sliders
 DEFAULT_DOCS_FROM_RETRIEVER = int(os.getenv("DEFAULT_DOCS_FROM_RETRIEVER", "3"))
@@ -130,72 +123,5 @@ def main(pipe):
             st.write(st.session_state.results['answers'][0].meta['content'][i])
             st.markdown('---\n')
-# %% ------------------------------------------- Creating Doc store
-# if not os.path.exists(sql_file) or not os.path.exists(faiss_file):
-module_dir = os.path.dirname(os.path.abspath(__file__))
-os.chdir(module_dir)
-# %% Download/Load Docs
-# Get some files that we want to use
-# s3_url = "https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-qa/datasets/documents/wiki_gameofthrones_txt12.zip"
-# fetch_archive_from_http(url=s3_url, output_dir=doc_dir)
-print('---> Loading Documents ...')
-# Convert files to docs + cleaning
-docs = convert_files_to_docs(dir_path=doc_dir,
-                            clean_func=clean_wiki_text,
-                            split_paragraphs=True)
-# %% Document Store
-print('---> Creating document store ...')
-# # custom path for sql file
-# document_store = FAISSDocumentStore(embedding_dim=128,
-#                                     faiss_index_factory_str="Flat",
-#                                     sql_url=f"sqlite:///{sql_file}")
-# In memory database
-document_store = FAISSDocumentStore(embedding_dim=128,
-                                    faiss_index_factory_str="Flat",
-                                    sql_url=f"sqlite://")
-# # default path for sql file
-# document_store = FAISSDocumentStore(embedding_dim=128,
-#                                     faiss_index_factory_str="Flat")
-# %% Retriever (DPR)
-print('---> Initializing retriever ...')
-retriever = DensePassageRetriever(
-    document_store=document_store,
-    query_embedding_model="vblagoje/dpr-question_encoder-single-lfqa-wiki",
-    passage_embedding_model="vblagoje/dpr-ctx_encoder-single-lfqa-wiki",
-    use_gpu=False
-)
-# %% Create Embeddings  and save results
-document_store.update_embeddings(retriever)
-print('---> Saving results ...')
-# update db
-document_store.write_documents(docs)
-# save faiss file
-document_store.save(faiss_file)
-print('Done!')
-# %% ------------------------------------------- Main App
-generator = Seq2SeqGenerator(model_name_or_path="vblagoje/bart_lfqa", use_gpu=False)
-pipe = GenerativeQAPipeline(generator, retriever)
-# pipe = prepare()
 main(pipe)

+from zipfile import ZipFile
 import os
 import streamlit as st
 from lfqa import prepare, answer
+with ZipFile("doc_store.zip","r") as zip_ref:
+    zip_ref.extractall('.')
 # Sliders
 DEFAULT_DOCS_FROM_RETRIEVER = int(os.getenv("DEFAULT_DOCS_FROM_RETRIEVER", "3"))
             st.write(st.session_state.results['answers'][0].meta['content'][i])
             st.markdown('---\n')
+pipe = prepare()
 main(pipe)

doc_store.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f6c25c0f4d55c7d80aa4525d619a11531d9a5c316d5022cb8927bdd19c635747
+size 2589071

requirements.txt CHANGED Viewed

@@ -1,3 +1,3 @@
-farm-haystack[docstores,preprocessing,ocr,faiss]
 streamlit >= 1.9.0, < 2
 st-annotated-text >= 2.0.0, < 3

+farm-haystack[ocr,faiss]
 streamlit >= 1.9.0, < 2
 st-annotated-text >= 2.0.0, < 3