Spaces:
Runtime error
Runtime error
sanjeevl10
commited on
Commit
•
d154644
1
Parent(s):
cb19fde
load the airbnb llm
Browse files- app.py +10 -9
- data/airbnb-10k.pdf +0 -0
app.py
CHANGED
@@ -6,12 +6,10 @@ from langchain_huggingface import HuggingFaceEndpoint
|
|
6 |
from langchain_community.document_loaders import TextLoader
|
7 |
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
8 |
from langchain_community.vectorstores import FAISS
|
|
|
9 |
from langchain_huggingface import HuggingFaceEndpointEmbeddings
|
10 |
from langchain_core.prompts import PromptTemplate
|
11 |
-
from langchain.schema.output_parser import StrOutputParser
|
12 |
-
from langchain.schema.runnable import RunnablePassthrough
|
13 |
from langchain.schema.runnable.config import RunnableConfig
|
14 |
-
from pathlib import Path
|
15 |
|
16 |
# GLOBAL SCOPE - ENTIRE APPLICATION HAS ACCESS TO VALUES SET IN THIS SCOPE #
|
17 |
# ---- ENV VARIABLES ---- #
|
@@ -38,10 +36,13 @@ HF_TOKEN = os.environ["HF_TOKEN"]
|
|
38 |
3. Load HuggingFace Embeddings (remember to use the URL we set above)
|
39 |
4. Index Files if they do not exist, otherwise load the vectorstore
|
40 |
"""
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
|
|
|
|
|
|
45 |
|
46 |
### 2. CREATE TEXT SPLITTER AND SPLIT DOCUMENTS
|
47 |
text_splitter = RecursiveCharacterTextSplitter(
|
@@ -131,10 +132,10 @@ def rename(original_author: str):
|
|
131 |
"""
|
132 |
This function can be used to rename the 'author' of a message.
|
133 |
|
134 |
-
In this case, we're overriding the 'Assistant' author to be '
|
135 |
"""
|
136 |
rename_dict = {
|
137 |
-
"Assistant" : "
|
138 |
}
|
139 |
return rename_dict.get(original_author, original_author)
|
140 |
|
|
|
6 |
from langchain_community.document_loaders import TextLoader
|
7 |
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
8 |
from langchain_community.vectorstores import FAISS
|
9 |
+
from langchain.document_loaders import PyMuPDFLoader
|
10 |
from langchain_huggingface import HuggingFaceEndpointEmbeddings
|
11 |
from langchain_core.prompts import PromptTemplate
|
|
|
|
|
12 |
from langchain.schema.runnable.config import RunnableConfig
|
|
|
13 |
|
14 |
# GLOBAL SCOPE - ENTIRE APPLICATION HAS ACCESS TO VALUES SET IN THIS SCOPE #
|
15 |
# ---- ENV VARIABLES ---- #
|
|
|
36 |
3. Load HuggingFace Embeddings (remember to use the URL we set above)
|
37 |
4. Index Files if they do not exist, otherwise load the vectorstore
|
38 |
"""
|
39 |
+
# Loop through all the pdf documents in the folder data
|
40 |
+
def load_pdfdocuments(self,path: str):
|
41 |
+
self.documents = []
|
42 |
+
return PyMuPDFLoader("data/airbnb-10k.pdf").load()
|
43 |
+
|
44 |
+
#Load the Pdf Documents from airbnb-10k
|
45 |
+
documents = load_pdfdocuments()
|
46 |
|
47 |
### 2. CREATE TEXT SPLITTER AND SPLIT DOCUMENTS
|
48 |
text_splitter = RecursiveCharacterTextSplitter(
|
|
|
132 |
"""
|
133 |
This function can be used to rename the 'author' of a message.
|
134 |
|
135 |
+
In this case, we're overriding the 'Assistant' author to be 'AirBnb LLM Assistant'.
|
136 |
"""
|
137 |
rename_dict = {
|
138 |
+
"Assistant" : "AirBnB LLM Assitant"
|
139 |
}
|
140 |
return rename_dict.get(original_author, original_author)
|
141 |
|
data/airbnb-10k.pdf
ADDED
Binary file (596 kB). View file
|
|