sanjeevl10 commited on
Commit
f0cd44d
1 Parent(s): 1082375

update chainlit

Browse files
Files changed (3) hide show
  1. app.py +16 -13
  2. chainlit.md +6 -0
  3. requirements.txt +2 -1
app.py CHANGED
@@ -9,7 +9,7 @@ from langchain.document_loaders import PyMuPDFLoader
9
  from langchain_huggingface import HuggingFaceEndpointEmbeddings
10
  from langchain_core.prompts import PromptTemplate
11
  from langchain.schema.runnable.config import RunnableConfig
12
- from langchain_community.vectorstores import FAISS
13
 
14
 
15
  # GLOBAL SCOPE - ENTIRE APPLICATION HAS ACCESS TO VALUES SET IN THIS SCOPE #
@@ -42,7 +42,7 @@ documents = PyMuPDFLoader("data/airbnb-10k.pdf").load()
42
 
43
  ### 2. CREATE TEXT SPLITTER AND SPLIT DOCUMENTS
44
  text_splitter = RecursiveCharacterTextSplitter(
45
- chunk_size=1000,
46
  chunk_overlap=30,
47
  length_function=len,
48
  is_separator_regex=False,
@@ -58,22 +58,25 @@ hf_embeddings = HuggingFaceEndpointEmbeddings(
58
 
59
  #Initialize the Vector Store
60
  if os.path.exists("./vectorstore"):
61
- vectorstore = FAISS.load_local(
62
- "./vectorstore",
63
- hf_embeddings,
64
- allow_dangerous_deserialization=True # this is necessary to load the vectorstore from disk as it's stored as a `.pkl` file.
 
65
  )
 
66
  else:
67
  os.makedirs("./vectorstore", exist_ok=True)
68
  ### 4. INDEX FILES
69
  ### NOTE: REMEMBER TO BATCH THE DOCUMENTS WITH MAXIMUM BATCH SIZE = 32
70
- for i in range(0, len(split_documents), 32):
71
- if i == 0:
72
- vectorstore = FAISS.from_documents(split_documents[i:i+32], hf_embeddings)
73
- continue
74
- vectorstore.add_documents(split_documents[i:i+32])
75
- vectorstore.save_local("./vectorstore")
76
- hf_retriever = vectorstore.as_retriever()
 
77
 
78
  # -- AUGMENTED -- #
79
  """
 
9
  from langchain_huggingface import HuggingFaceEndpointEmbeddings
10
  from langchain_core.prompts import PromptTemplate
11
  from langchain.schema.runnable.config import RunnableConfig
12
+ from langchain_community.vectorstores import Qdrant
13
 
14
 
15
  # GLOBAL SCOPE - ENTIRE APPLICATION HAS ACCESS TO VALUES SET IN THIS SCOPE #
 
42
 
43
  ### 2. CREATE TEXT SPLITTER AND SPLIT DOCUMENTS
44
  text_splitter = RecursiveCharacterTextSplitter(
45
+ chunk_size=500,
46
  chunk_overlap=30,
47
  length_function=len,
48
  is_separator_regex=False,
 
58
 
59
  #Initialize the Vector Store
60
  if os.path.exists("./vectorstore"):
61
+ vectorstore = Qdrant.from_existing_collection(
62
+ path = "./vectorstore",
63
+ embeddings = hf_embeddings,
64
+ collection_name = "airbnb-10k",
65
+ batch_size=32,
66
  )
67
+ hf_retriever = vectorstore.as_retriever()
68
  else:
69
  os.makedirs("./vectorstore", exist_ok=True)
70
  ### 4. INDEX FILES
71
  ### NOTE: REMEMBER TO BATCH THE DOCUMENTS WITH MAXIMUM BATCH SIZE = 32
72
+ vectorstore = Qdrant.from_documents(
73
+ documents=split_documents,
74
+ embedding=hf_embeddings,
75
+ path= "./vectorstore",
76
+ collection_name="airbnb-10k",
77
+ batch_size=32,
78
+ )
79
+ hf_retriever = vectorstore.as_retriever()
80
 
81
  # -- AUGMENTED -- #
82
  """
chainlit.md CHANGED
@@ -7,3 +7,9 @@
7
  - Vector Store: FIASS
8
  - UI: Chainlit
9
  - Deployment: Docker on HuggingFace Spaces
 
 
 
 
 
 
 
7
  - Vector Store: FIASS
8
  - UI: Chainlit
9
  - Deployment: Docker on HuggingFace Spaces
10
+
11
+ # Model Evaluation
12
+ - Evaluate your answers to the following questions
13
+ - Q1 "What is Airbnb's 'Description of Business'?"
14
+ - Q2 "What was the total value of 'Cash and cash equivalents' as of December 31, 2023?"
15
+ - Q3 "What is the 'maximum number of shares to be sold under the 10b5-1 Trading plan' by Brian Chesky?"
requirements.txt CHANGED
@@ -6,4 +6,5 @@ langchain_huggingface==0.0.3
6
  langchain_text_splitters==0.2.1
7
  python-dotenv==1.0.1
8
  pymupdf==1.24.5
9
- faiss-cpu
 
 
6
  langchain_text_splitters==0.2.1
7
  python-dotenv==1.0.1
8
  pymupdf==1.24.5
9
+ faiss-cpu
10
+ qdrant-client==1.9.2