Spaces:
Runtime error
Runtime error
sanjeevl10
commited on
Commit
•
8b05ebc
1
Parent(s):
be508b8
removed unused versions, cleaned up Docker file
Browse files- app.py +9 -11
- requirements.txt +2 -1
app.py
CHANGED
@@ -5,7 +5,7 @@ from operator import itemgetter
|
|
5 |
from langchain_huggingface import HuggingFaceEndpoint
|
6 |
from langchain_community.document_loaders import TextLoader
|
7 |
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
8 |
-
from langchain.document_loaders import
|
9 |
from langchain_huggingface import HuggingFaceEndpointEmbeddings
|
10 |
from langchain_core.prompts import PromptTemplate
|
11 |
from langchain.schema.runnable.config import RunnableConfig
|
@@ -38,7 +38,7 @@ HF_TOKEN = os.environ["HF_TOKEN"]
|
|
38 |
4. Index Files if they do not exist, otherwise load the vectorstore
|
39 |
"""
|
40 |
#Load the Pdf Documents from airbnb-10k
|
41 |
-
documents =
|
42 |
|
43 |
### 2. CREATE TEXT SPLITTER AND SPLIT DOCUMENTS
|
44 |
text_splitter = RecursiveCharacterTextSplitter(
|
@@ -56,9 +56,7 @@ hf_embeddings = HuggingFaceEndpointEmbeddings(
|
|
56 |
huggingfacehub_api_token=HF_TOKEN,
|
57 |
)
|
58 |
|
59 |
-
|
60 |
-
vectordbfile = os.path.join(vectordbdir, "/vectorstore")
|
61 |
-
|
62 |
|
63 |
if os.path.exists(vectordbfile):
|
64 |
vectorstore = Qdrant.from_existing_collection(
|
@@ -70,12 +68,12 @@ if os.path.exists(vectordbfile):
|
|
70 |
else:
|
71 |
print("Indexing Files")
|
72 |
os.makedirs(vectordbfile, exist_ok=True)
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
hf_retriever = vectorstore.as_retriever()
|
80 |
|
81 |
### 4. INDEX FILES
|
|
|
5 |
from langchain_huggingface import HuggingFaceEndpoint
|
6 |
from langchain_community.document_loaders import TextLoader
|
7 |
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
8 |
+
from langchain.document_loaders import UnstructuredPDFLoader
|
9 |
from langchain_huggingface import HuggingFaceEndpointEmbeddings
|
10 |
from langchain_core.prompts import PromptTemplate
|
11 |
from langchain.schema.runnable.config import RunnableConfig
|
|
|
38 |
4. Index Files if they do not exist, otherwise load the vectorstore
|
39 |
"""
|
40 |
#Load the Pdf Documents from airbnb-10k
|
41 |
+
documents = UnstructuredPDFLoader("data/airbnb-10k.pdf").load()
|
42 |
|
43 |
### 2. CREATE TEXT SPLITTER AND SPLIT DOCUMENTS
|
44 |
text_splitter = RecursiveCharacterTextSplitter(
|
|
|
56 |
huggingfacehub_api_token=HF_TOKEN,
|
57 |
)
|
58 |
|
59 |
+
vectordbfile = "./data/vectorstore"
|
|
|
|
|
60 |
|
61 |
if os.path.exists(vectordbfile):
|
62 |
vectorstore = Qdrant.from_existing_collection(
|
|
|
68 |
else:
|
69 |
print("Indexing Files")
|
70 |
os.makedirs(vectordbfile, exist_ok=True)
|
71 |
+
vectorstore = Qdrant.from_documents(
|
72 |
+
documents=split_documents,
|
73 |
+
embedding=hf_embeddings,
|
74 |
+
path=vectordbfile,
|
75 |
+
collection_name="airbnb-10k",
|
76 |
+
)
|
77 |
hf_retriever = vectorstore.as_retriever()
|
78 |
|
79 |
### 4. INDEX FILES
|
requirements.txt
CHANGED
@@ -6,4 +6,5 @@ langchain_huggingface==0.0.3
|
|
6 |
langchain_text_splitters==0.2.1
|
7 |
python-dotenv==1.0.1
|
8 |
pymupdf==1.24.5
|
9 |
-
qdrant-client==1.9.2
|
|
|
|
6 |
langchain_text_splitters==0.2.1
|
7 |
python-dotenv==1.0.1
|
8 |
pymupdf==1.24.5
|
9 |
+
qdrant-client==1.9.2
|
10 |
+
unstructured==0.5.6
|