|
|
|
|
|
""" |
|
Created on Tue Dec 27 14:50:23 2022 |
|
|
|
@author: saeed |
|
""" |
|
|
|
import os |
|
from haystack.document_stores import FAISSDocumentStore |
|
from haystack.nodes import DensePassageRetriever |
|
from haystack.utils import convert_files_to_docs, clean_wiki_text |
|
|
|
|
|
module_dir = os.path.dirname(os.path.abspath(__file__)) |
|
os.chdir(module_dir) |
|
|
|
doc_dir = "data/wiki_gameofthrones_txt12" |
|
sql_file = 'faiss_doc_store.db' |
|
faiss_file = 'faiss_index.faiss' |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print('---> Loading Documents ...') |
|
|
|
|
|
docs = convert_files_to_docs(dir_path=doc_dir, |
|
clean_func=clean_wiki_text, |
|
split_paragraphs=True) |
|
|
|
|
|
|
|
print('---> Creating document store ...') |
|
document_store = FAISSDocumentStore(embedding_dim=128, |
|
faiss_index_factory_str="Flat", |
|
sql_url=f"sqlite:///{sql_file}") |
|
|
|
|
|
|
|
|
|
|
|
print('---> Initializing retriever ...') |
|
retriever = DensePassageRetriever( |
|
document_store=document_store, |
|
query_embedding_model="vblagoje/dpr-question_encoder-single-lfqa-wiki", |
|
passage_embedding_model="vblagoje/dpr-ctx_encoder-single-lfqa-wiki", |
|
use_gpu=True |
|
) |
|
|
|
|
|
document_store.update_embeddings(retriever) |
|
|
|
print('---> Saving results ...') |
|
|
|
document_store.write_documents(docs) |
|
|
|
document_store.save(faiss_file) |
|
|
|
print('Done!') |
|
|