sanjeevl10 commited on
Commit
d154644
1 Parent(s): cb19fde

load the airbnb llm

Browse files
Files changed (2) hide show
  1. app.py +10 -9
  2. data/airbnb-10k.pdf +0 -0
app.py CHANGED
@@ -6,12 +6,10 @@ from langchain_huggingface import HuggingFaceEndpoint
6
  from langchain_community.document_loaders import TextLoader
7
  from langchain_text_splitters import RecursiveCharacterTextSplitter
8
  from langchain_community.vectorstores import FAISS
 
9
  from langchain_huggingface import HuggingFaceEndpointEmbeddings
10
  from langchain_core.prompts import PromptTemplate
11
- from langchain.schema.output_parser import StrOutputParser
12
- from langchain.schema.runnable import RunnablePassthrough
13
  from langchain.schema.runnable.config import RunnableConfig
14
- from pathlib import Path
15
 
16
  # GLOBAL SCOPE - ENTIRE APPLICATION HAS ACCESS TO VALUES SET IN THIS SCOPE #
17
  # ---- ENV VARIABLES ---- #
@@ -38,10 +36,13 @@ HF_TOKEN = os.environ["HF_TOKEN"]
38
  3. Load HuggingFace Embeddings (remember to use the URL we set above)
39
  4. Index Files if they do not exist, otherwise load the vectorstore
40
  """
41
- ### 1. CREATE TEXT LOADER AND LOAD DOCUMENTS
42
- ### NOTE: PAY ATTENTION TO THE PATH THEY ARE IN.
43
- text_loader = TextLoader("data/paul_graham_essays.txt")
44
- documents = text_loader.load()
 
 
 
45
 
46
  ### 2. CREATE TEXT SPLITTER AND SPLIT DOCUMENTS
47
  text_splitter = RecursiveCharacterTextSplitter(
@@ -131,10 +132,10 @@ def rename(original_author: str):
131
  """
132
  This function can be used to rename the 'author' of a message.
133
 
134
- In this case, we're overriding the 'Assistant' author to be 'Paul Graham Essay Bot'.
135
  """
136
  rename_dict = {
137
- "Assistant" : "Paul Graham Essay Bot"
138
  }
139
  return rename_dict.get(original_author, original_author)
140
 
 
6
  from langchain_community.document_loaders import TextLoader
7
  from langchain_text_splitters import RecursiveCharacterTextSplitter
8
  from langchain_community.vectorstores import FAISS
9
+ from langchain.document_loaders import PyMuPDFLoader
10
  from langchain_huggingface import HuggingFaceEndpointEmbeddings
11
  from langchain_core.prompts import PromptTemplate
 
 
12
  from langchain.schema.runnable.config import RunnableConfig
 
13
 
14
  # GLOBAL SCOPE - ENTIRE APPLICATION HAS ACCESS TO VALUES SET IN THIS SCOPE #
15
  # ---- ENV VARIABLES ---- #
 
36
  3. Load HuggingFace Embeddings (remember to use the URL we set above)
37
  4. Index Files if they do not exist, otherwise load the vectorstore
38
  """
39
+ # Loop through all the pdf documents in the folder data
40
+ def load_pdfdocuments(self,path: str):
41
+ self.documents = []
42
+ return PyMuPDFLoader("data/airbnb-10k.pdf").load()
43
+
44
+ #Load the Pdf Documents from airbnb-10k
45
+ documents = load_pdfdocuments()
46
 
47
  ### 2. CREATE TEXT SPLITTER AND SPLIT DOCUMENTS
48
  text_splitter = RecursiveCharacterTextSplitter(
 
132
  """
133
  This function can be used to rename the 'author' of a message.
134
 
135
+ In this case, we're overriding the 'Assistant' author to be 'AirBnb LLM Assistant'.
136
  """
137
  rename_dict = {
138
+ "Assistant" : "AirBnB LLM Assitant"
139
  }
140
  return rename_dict.get(original_author, original_author)
141
 
data/airbnb-10k.pdf ADDED
Binary file (596 kB). View file