Spaces:
Runtime error
Runtime error
sanjeevl10
commited on
Commit
•
544f61e
1
Parent(s):
892ebe9
LLM Optimizer
Browse files
app.py
CHANGED
@@ -3,11 +3,11 @@ import chainlit as cl
|
|
3 |
from dotenv import load_dotenv
|
4 |
from operator import itemgetter
|
5 |
from langchain_huggingface import HuggingFaceEndpoint
|
6 |
-
from langchain_community.document_loaders import TextLoader
|
7 |
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
8 |
from langchain.document_loaders import PyMuPDFLoader
|
9 |
from langchain_huggingface import HuggingFaceEndpointEmbeddings
|
10 |
from langchain_core.prompts import PromptTemplate
|
|
|
11 |
from langchain.schema.runnable.config import RunnableConfig
|
12 |
from langchain_community.vectorstores import Qdrant
|
13 |
|
@@ -27,6 +27,7 @@ We will load our environment variables here.
|
|
27 |
HF_LLM_ENDPOINT = os.environ["HF_LLM_ENDPOINT"]
|
28 |
HF_EMBED_ENDPOINT = os.environ["HF_EMBED_ENDPOINT"]
|
29 |
HF_TOKEN = os.environ["HF_TOKEN"]
|
|
|
30 |
|
31 |
# ---- GLOBAL DECLARATIONS ---- #
|
32 |
|
@@ -47,20 +48,15 @@ text_splitter = RecursiveCharacterTextSplitter(
|
|
47 |
)
|
48 |
split_documents = text_splitter.split_documents(documents)
|
49 |
|
50 |
-
### 3. LOAD
|
51 |
-
|
52 |
-
model=HF_EMBED_ENDPOINT,
|
53 |
-
task="feature-extraction",
|
54 |
-
huggingfacehub_api_token=HF_TOKEN,
|
55 |
-
)
|
56 |
|
57 |
#Initialize the Vector Store
|
58 |
if os.path.exists("./vectorstore"):
|
59 |
vectorstore = Qdrant.from_existing_collection(
|
|
|
60 |
path = "./vectorstore",
|
61 |
-
embeddings = hf_embeddings,
|
62 |
collection_name = "airbnb-10k",
|
63 |
-
batch_size=32,
|
64 |
)
|
65 |
hf_retriever = vectorstore.as_retriever()
|
66 |
else:
|
@@ -68,11 +64,10 @@ else:
|
|
68 |
### 4. INDEX FILES
|
69 |
### NOTE: REMEMBER TO BATCH THE DOCUMENTS WITH MAXIMUM BATCH SIZE = 32
|
70 |
vectorstore = Qdrant.from_documents(
|
71 |
-
|
72 |
-
|
73 |
path= "./vectorstore",
|
74 |
collection_name="airbnb-10k",
|
75 |
-
batch_size=32,
|
76 |
)
|
77 |
hf_retriever = vectorstore.as_retriever()
|
78 |
|
|
|
3 |
from dotenv import load_dotenv
|
4 |
from operator import itemgetter
|
5 |
from langchain_huggingface import HuggingFaceEndpoint
|
|
|
6 |
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
7 |
from langchain.document_loaders import PyMuPDFLoader
|
8 |
from langchain_huggingface import HuggingFaceEndpointEmbeddings
|
9 |
from langchain_core.prompts import PromptTemplate
|
10 |
+
from langchain_openai.embeddings import OpenAIEmbeddings
|
11 |
from langchain.schema.runnable.config import RunnableConfig
|
12 |
from langchain_community.vectorstores import Qdrant
|
13 |
|
|
|
27 |
HF_LLM_ENDPOINT = os.environ["HF_LLM_ENDPOINT"]
|
28 |
HF_EMBED_ENDPOINT = os.environ["HF_EMBED_ENDPOINT"]
|
29 |
HF_TOKEN = os.environ["HF_TOKEN"]
|
30 |
+
OPENAPI_API_KEY = os.environ["OPENAI_API_KEY"]
|
31 |
|
32 |
# ---- GLOBAL DECLARATIONS ---- #
|
33 |
|
|
|
48 |
)
|
49 |
split_documents = text_splitter.split_documents(documents)
|
50 |
|
51 |
+
### 3. LOAD open ai EMBEDDINGS
|
52 |
+
embeddings = OpenAIEmbeddings()
|
|
|
|
|
|
|
|
|
53 |
|
54 |
#Initialize the Vector Store
|
55 |
if os.path.exists("./vectorstore"):
|
56 |
vectorstore = Qdrant.from_existing_collection(
|
57 |
+
embeddings = embeddings,
|
58 |
path = "./vectorstore",
|
|
|
59 |
collection_name = "airbnb-10k",
|
|
|
60 |
)
|
61 |
hf_retriever = vectorstore.as_retriever()
|
62 |
else:
|
|
|
64 |
### 4. INDEX FILES
|
65 |
### NOTE: REMEMBER TO BATCH THE DOCUMENTS WITH MAXIMUM BATCH SIZE = 32
|
66 |
vectorstore = Qdrant.from_documents(
|
67 |
+
split_documents,
|
68 |
+
embeddings,
|
69 |
path= "./vectorstore",
|
70 |
collection_name="airbnb-10k",
|
|
|
71 |
)
|
72 |
hf_retriever = vectorstore.as_retriever()
|
73 |
|