Sanjeev Lakkaraju commited on
Commit
d2e07cf
1 Parent(s): 35ab4db

fixed the Dockerfile

Browse files
Files changed (3) hide show
  1. Dockerfile +7 -2
  2. app.py +16 -22
  3. requirements.txt +1 -1
Dockerfile CHANGED
@@ -1,12 +1,17 @@
1
  FROM python:3.11
 
2
  RUN useradd -m -u 1000 user
 
3
  USER user
4
  ENV HOME=/home/user \
5
  PATH=/home/user/.local/bin:$PATH
 
6
  WORKDIR $HOME/app
7
- COPY . .
8
  COPY --chown=user . $HOME/app
 
9
  COPY ./requirements.txt ~/app/requirements.txt
 
10
  RUN pip install -r requirements.txt
11
- USER user
12
  CMD ["chainlit", "run", "app.py", "--port", "7860"]
 
1
  FROM python:3.11
2
+
3
  RUN useradd -m -u 1000 user
4
+
5
  USER user
6
  ENV HOME=/home/user \
7
  PATH=/home/user/.local/bin:$PATH
8
+
9
  WORKDIR $HOME/app
10
+
11
  COPY --chown=user . $HOME/app
12
+
13
  COPY ./requirements.txt ~/app/requirements.txt
14
+
15
  RUN pip install -r requirements.txt
16
+ COPY . .
17
  CMD ["chainlit", "run", "app.py", "--port", "7860"]
app.py CHANGED
@@ -9,7 +9,7 @@ from langchain.document_loaders import PyMuPDFLoader
9
  from langchain_huggingface import HuggingFaceEndpointEmbeddings
10
  from langchain_core.prompts import PromptTemplate
11
  from langchain.schema.runnable.config import RunnableConfig
12
- from langchain_community.vectorstores import Qdrant
13
 
14
 
15
  # GLOBAL SCOPE - ENTIRE APPLICATION HAS ACCESS TO VALUES SET IN THIS SCOPE #
@@ -56,31 +56,25 @@ hf_embeddings = HuggingFaceEndpointEmbeddings(
56
  huggingfacehub_api_token=HF_TOKEN,
57
  )
58
 
59
- vectordbfile = "./data/vectorstore"
60
-
61
- if os.path.exists(vectordbfile):
62
- vectorstore = Qdrant.from_existing_collection(
63
- embedding=hf_embeddings,
64
- path=vectordbfile,
65
- collection_name="airbnb-10k",
66
  )
67
- hf_retriever = vectorstore.as_retriever()
68
  else:
69
- print("Indexing Files")
70
- os.makedirs(vectordbfile, exist_ok=True)
71
- vectorstore = Qdrant.from_documents(
72
- documents=split_documents,
73
- embedding=hf_embeddings,
74
- path=vectordbfile,
75
- collection_name="airbnb-10k",
76
- )
77
- hf_retriever = vectorstore.as_retriever()
78
-
79
  ### 4. INDEX FILES
80
  ### NOTE: REMEMBER TO BATCH THE DOCUMENTS WITH MAXIMUM BATCH SIZE = 32
81
-
82
-
83
-
 
 
 
 
 
84
  # -- AUGMENTED -- #
85
  """
86
  1. Define a String Template
 
9
  from langchain_huggingface import HuggingFaceEndpointEmbeddings
10
  from langchain_core.prompts import PromptTemplate
11
  from langchain.schema.runnable.config import RunnableConfig
12
+ from langchain_community.vectorstores import FAISS
13
 
14
 
15
  # GLOBAL SCOPE - ENTIRE APPLICATION HAS ACCESS TO VALUES SET IN THIS SCOPE #
 
56
  huggingfacehub_api_token=HF_TOKEN,
57
  )
58
 
59
+ #Initialize the Vector Store
60
+ if os.path.exists("./data/vectorstore"):
61
+ vectorstore = FAISS.load_local(
62
+ "./data/vectorstore",
63
+ hf_embeddings,
64
+ allow_dangerous_deserialization=True # this is necessary to load the vectorstore from disk as it's stored as a `.pkl` file.
 
65
  )
 
66
  else:
67
+ os.makedirs("./data/vectorstore", exist_ok=True)
 
 
 
 
 
 
 
 
 
68
  ### 4. INDEX FILES
69
  ### NOTE: REMEMBER TO BATCH THE DOCUMENTS WITH MAXIMUM BATCH SIZE = 32
70
+ for i in range(0, len(split_documents), 32):
71
+ if i == 0:
72
+ vectorstore = FAISS.from_documents(split_documents[i:i+32], hf_embeddings)
73
+ continue
74
+ vectorstore.add_documents(split_documents[i:i+32])
75
+ vectorstore.save_local("./data/vectorstore")
76
+ hf_retriever = vectorstore.as_retriever()
77
+
78
  # -- AUGMENTED -- #
79
  """
80
  1. Define a String Template
requirements.txt CHANGED
@@ -6,4 +6,4 @@ langchain_huggingface==0.0.3
6
  langchain_text_splitters==0.2.1
7
  python-dotenv==1.0.1
8
  pymupdf==1.24.5
9
- qdrant-client==1.9.2
 
6
  langchain_text_splitters==0.2.1
7
  python-dotenv==1.0.1
8
  pymupdf==1.24.5
9
+ faiss-cpu