shahabkahn commited on
Commit
5bbafa5
·
verified ·
1 Parent(s): ebf631c

Update db.py

Browse files
Files changed (1) hide show
  1. db.py +28 -27
db.py CHANGED
@@ -1,27 +1,28 @@
1
- # ingest.py
2
- from langchain_community.embeddings import HuggingFaceEmbeddings
3
- from langchain_community.vectorstores import FAISS
4
- from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
5
- from langchain.text_splitter import RecursiveCharacterTextSplitter
6
-
7
- DATA_PATH = "data/"
8
- DB_FAISS_PATH = "vectorstore/db_faiss"
9
-
10
- def create_vector_db():
11
- loader = DirectoryLoader(
12
- DATA_PATH, glob="*.pdf", loader_cls=PyPDFLoader
13
- )
14
-
15
- documents = loader.load()
16
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
17
- texts = text_splitter.split_documents(documents)
18
-
19
- embeddings = HuggingFaceEmbeddings(
20
- model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={"device": "cpu"}
21
- )
22
-
23
- db = FAISS.from_documents(texts, embeddings)
24
- db.save_local(DB_FAISS_PATH)
25
-
26
- if __name__ == "__main__":
27
- create_vector_db()
 
 
1
+ from langchain_community.embeddings import HuggingFaceEmbeddings
2
+ from langchain_community.vectorstores import FAISS
3
+ from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
4
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
5
+ from dotenv import load_dotenv
6
+ import os
7
+ import logging
8
+
9
+
10
+ load_dotenv()
11
+
12
+ logging.basicConfig(level=logging.INFO)
13
+ logger = logging.getLogger(__name__)
14
+
15
+ def load_embeddings():
16
+ return HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={"device": "cpu"})
17
+
18
+ def load_vector_database(embeddings):
19
+ try:
20
+ db = FAISS.load_local("vectorstore/db_faiss", embeddings, allow_dangerous_deserialization=True)
21
+ logger.info("Vector database loaded successfully!")
22
+ return db
23
+ except Exception as e:
24
+ logger.error(f"Failed to load vector database: {e}")
25
+ raise e
26
+
27
+ embeddings = load_embeddings()
28
+ vector_db = load_vector_database(embeddings)