sanjeevl10 commited on
Commit
544f61e
1 Parent(s): 892ebe9

LLM Optimizer

Browse files
Files changed (1) hide show
  1. app.py +7 -12
app.py CHANGED
@@ -3,11 +3,11 @@ import chainlit as cl
3
  from dotenv import load_dotenv
4
  from operator import itemgetter
5
  from langchain_huggingface import HuggingFaceEndpoint
6
- from langchain_community.document_loaders import TextLoader
7
  from langchain_text_splitters import RecursiveCharacterTextSplitter
8
  from langchain.document_loaders import PyMuPDFLoader
9
  from langchain_huggingface import HuggingFaceEndpointEmbeddings
10
  from langchain_core.prompts import PromptTemplate
 
11
  from langchain.schema.runnable.config import RunnableConfig
12
  from langchain_community.vectorstores import Qdrant
13
 
@@ -27,6 +27,7 @@ We will load our environment variables here.
27
  HF_LLM_ENDPOINT = os.environ["HF_LLM_ENDPOINT"]
28
  HF_EMBED_ENDPOINT = os.environ["HF_EMBED_ENDPOINT"]
29
  HF_TOKEN = os.environ["HF_TOKEN"]
 
30
 
31
  # ---- GLOBAL DECLARATIONS ---- #
32
 
@@ -47,20 +48,15 @@ text_splitter = RecursiveCharacterTextSplitter(
47
  )
48
  split_documents = text_splitter.split_documents(documents)
49
 
50
- ### 3. LOAD HUGGINGFACE EMBEDDINGS
51
- hf_embeddings = HuggingFaceEndpointEmbeddings(
52
- model=HF_EMBED_ENDPOINT,
53
- task="feature-extraction",
54
- huggingfacehub_api_token=HF_TOKEN,
55
- )
56
 
57
  #Initialize the Vector Store
58
  if os.path.exists("./vectorstore"):
59
  vectorstore = Qdrant.from_existing_collection(
 
60
  path = "./vectorstore",
61
- embeddings = hf_embeddings,
62
  collection_name = "airbnb-10k",
63
- batch_size=32,
64
  )
65
  hf_retriever = vectorstore.as_retriever()
66
  else:
@@ -68,11 +64,10 @@ else:
68
  ### 4. INDEX FILES
69
  ### NOTE: REMEMBER TO BATCH THE DOCUMENTS WITH MAXIMUM BATCH SIZE = 32
70
  vectorstore = Qdrant.from_documents(
71
- documents=split_documents,
72
- embedding=hf_embeddings,
73
  path= "./vectorstore",
74
  collection_name="airbnb-10k",
75
- batch_size=32,
76
  )
77
  hf_retriever = vectorstore.as_retriever()
78
 
 
3
  from dotenv import load_dotenv
4
  from operator import itemgetter
5
  from langchain_huggingface import HuggingFaceEndpoint
 
6
  from langchain_text_splitters import RecursiveCharacterTextSplitter
7
  from langchain.document_loaders import PyMuPDFLoader
8
  from langchain_huggingface import HuggingFaceEndpointEmbeddings
9
  from langchain_core.prompts import PromptTemplate
10
+ from langchain_openai.embeddings import OpenAIEmbeddings
11
  from langchain.schema.runnable.config import RunnableConfig
12
  from langchain_community.vectorstores import Qdrant
13
 
 
27
  HF_LLM_ENDPOINT = os.environ["HF_LLM_ENDPOINT"]
28
  HF_EMBED_ENDPOINT = os.environ["HF_EMBED_ENDPOINT"]
29
  HF_TOKEN = os.environ["HF_TOKEN"]
30
+ OPENAPI_API_KEY = os.environ["OPENAI_API_KEY"]
31
 
32
  # ---- GLOBAL DECLARATIONS ---- #
33
 
 
48
  )
49
  split_documents = text_splitter.split_documents(documents)
50
 
51
+ ### 3. LOAD open ai EMBEDDINGS
52
+ embeddings = OpenAIEmbeddings()
 
 
 
 
53
 
54
  #Initialize the Vector Store
55
  if os.path.exists("./vectorstore"):
56
  vectorstore = Qdrant.from_existing_collection(
57
+ embeddings = embeddings,
58
  path = "./vectorstore",
 
59
  collection_name = "airbnb-10k",
 
60
  )
61
  hf_retriever = vectorstore.as_retriever()
62
  else:
 
64
  ### 4. INDEX FILES
65
  ### NOTE: REMEMBER TO BATCH THE DOCUMENTS WITH MAXIMUM BATCH SIZE = 32
66
  vectorstore = Qdrant.from_documents(
67
+ split_documents,
68
+ embeddings,
69
  path= "./vectorstore",
70
  collection_name="airbnb-10k",
 
71
  )
72
  hf_retriever = vectorstore.as_retriever()
73