Spaces:
Runtime error
Runtime error
added comments in app.py
Browse files
app.py
CHANGED
@@ -3,6 +3,8 @@ import os
|
|
3 |
api_token = os.getenv("HF_TOKEN")
|
4 |
|
5 |
|
|
|
|
|
6 |
from langchain_community.vectorstores import FAISS
|
7 |
from langchain_community.document_loaders import PyPDFLoader
|
8 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
@@ -15,10 +17,13 @@ from langchain.memory import ConversationBufferMemory
|
|
15 |
from langchain_community.llms import HuggingFaceEndpoint
|
16 |
import torch
|
17 |
|
|
|
|
|
18 |
list_llm = ["meta-llama/Meta-Llama-3-8B-Instruct", "mistralai/Mistral-7B-Instruct-v0.2"]
|
19 |
list_llm_simple = [os.path.basename(llm) for llm in list_llm]
|
20 |
|
21 |
# Load and split PDF document
|
|
|
22 |
def load_doc(list_file_path):
|
23 |
# Processing for one document only
|
24 |
# loader = PyPDFLoader(file_path)
|
@@ -34,14 +39,20 @@ def load_doc(list_file_path):
|
|
34 |
doc_splits = text_splitter.split_documents(pages)
|
35 |
return doc_splits
|
36 |
|
|
|
|
|
37 |
# Create vector database
|
|
|
38 |
def create_db(splits):
|
39 |
embeddings = HuggingFaceEmbeddings()
|
40 |
vectordb = FAISS.from_documents(splits, embeddings)
|
41 |
return vectordb
|
42 |
|
43 |
|
|
|
|
|
44 |
# Initialize langchain LLM chain
|
|
|
45 |
def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, progress=gr.Progress()):
|
46 |
if llm_model == "meta-llama/Meta-Llama-3-8B-Instruct":
|
47 |
llm = HuggingFaceEndpoint(
|
@@ -77,7 +88,10 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
|
|
77 |
)
|
78 |
return qa_chain
|
79 |
|
|
|
|
|
80 |
# Initialize database
|
|
|
81 |
def initialize_database(list_file_obj, progress=gr.Progress()):
|
82 |
# Create a list of documents (when valid)
|
83 |
list_file_path = [x.name for x in list_file_obj if x is not None]
|
@@ -87,7 +101,11 @@ def initialize_database(list_file_obj, progress=gr.Progress()):
|
|
87 |
vector_db = create_db(doc_splits)
|
88 |
return vector_db, "Database created!"
|
89 |
|
|
|
|
|
|
|
90 |
# Initialize LLM
|
|
|
91 |
def initialize_LLM(llm_option, llm_temperature, max_tokens, top_k, vector_db, progress=gr.Progress()):
|
92 |
# print("llm_option",llm_option)
|
93 |
llm_name = list_llm[llm_option]
|
|
|
3 |
api_token = os.getenv("HF_TOKEN")
|
4 |
|
5 |
|
6 |
+
# adding all the imports
|
7 |
+
|
8 |
from langchain_community.vectorstores import FAISS
|
9 |
from langchain_community.document_loaders import PyPDFLoader
|
10 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
|
17 |
from langchain_community.llms import HuggingFaceEndpoint
|
18 |
import torch
|
19 |
|
20 |
+
# add more llms if needed
|
21 |
+
|
22 |
list_llm = ["meta-llama/Meta-Llama-3-8B-Instruct", "mistralai/Mistral-7B-Instruct-v0.2"]
|
23 |
list_llm_simple = [os.path.basename(llm) for llm in list_llm]
|
24 |
|
25 |
# Load and split PDF document
|
26 |
+
|
27 |
def load_doc(list_file_path):
|
28 |
# Processing for one document only
|
29 |
# loader = PyPDFLoader(file_path)
|
|
|
39 |
doc_splits = text_splitter.split_documents(pages)
|
40 |
return doc_splits
|
41 |
|
42 |
+
|
43 |
+
|
44 |
# Create vector database
|
45 |
+
|
46 |
def create_db(splits):
|
47 |
embeddings = HuggingFaceEmbeddings()
|
48 |
vectordb = FAISS.from_documents(splits, embeddings)
|
49 |
return vectordb
|
50 |
|
51 |
|
52 |
+
|
53 |
+
|
54 |
# Initialize langchain LLM chain
|
55 |
+
|
56 |
def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, progress=gr.Progress()):
|
57 |
if llm_model == "meta-llama/Meta-Llama-3-8B-Instruct":
|
58 |
llm = HuggingFaceEndpoint(
|
|
|
88 |
)
|
89 |
return qa_chain
|
90 |
|
91 |
+
|
92 |
+
|
93 |
# Initialize database
|
94 |
+
|
95 |
def initialize_database(list_file_obj, progress=gr.Progress()):
|
96 |
# Create a list of documents (when valid)
|
97 |
list_file_path = [x.name for x in list_file_obj if x is not None]
|
|
|
101 |
vector_db = create_db(doc_splits)
|
102 |
return vector_db, "Database created!"
|
103 |
|
104 |
+
|
105 |
+
|
106 |
+
|
107 |
# Initialize LLM
|
108 |
+
|
109 |
def initialize_LLM(llm_option, llm_temperature, max_tokens, top_k, vector_db, progress=gr.Progress()):
|
110 |
# print("llm_option",llm_option)
|
111 |
llm_name = list_llm[llm_option]
|