Meetnote-Support commited on
Commit
417e90b
·
verified ·
1 Parent(s): a5c9a00

added comments in app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -0
app.py CHANGED
@@ -3,6 +3,8 @@ import os
3
  api_token = os.getenv("HF_TOKEN")
4
 
5
 
 
 
6
  from langchain_community.vectorstores import FAISS
7
  from langchain_community.document_loaders import PyPDFLoader
8
  from langchain.text_splitter import RecursiveCharacterTextSplitter
@@ -15,10 +17,13 @@ from langchain.memory import ConversationBufferMemory
15
  from langchain_community.llms import HuggingFaceEndpoint
16
  import torch
17
 
 
 
18
  list_llm = ["meta-llama/Meta-Llama-3-8B-Instruct", "mistralai/Mistral-7B-Instruct-v0.2"]
19
  list_llm_simple = [os.path.basename(llm) for llm in list_llm]
20
 
21
  # Load and split PDF document
 
22
  def load_doc(list_file_path):
23
  # Processing for one document only
24
  # loader = PyPDFLoader(file_path)
@@ -34,14 +39,20 @@ def load_doc(list_file_path):
34
  doc_splits = text_splitter.split_documents(pages)
35
  return doc_splits
36
 
 
 
37
  # Create vector database
 
38
  def create_db(splits):
39
  embeddings = HuggingFaceEmbeddings()
40
  vectordb = FAISS.from_documents(splits, embeddings)
41
  return vectordb
42
 
43
 
 
 
44
  # Initialize langchain LLM chain
 
45
  def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, progress=gr.Progress()):
46
  if llm_model == "meta-llama/Meta-Llama-3-8B-Instruct":
47
  llm = HuggingFaceEndpoint(
@@ -77,7 +88,10 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
77
  )
78
  return qa_chain
79
 
 
 
80
  # Initialize database
 
81
  def initialize_database(list_file_obj, progress=gr.Progress()):
82
  # Create a list of documents (when valid)
83
  list_file_path = [x.name for x in list_file_obj if x is not None]
@@ -87,7 +101,11 @@ def initialize_database(list_file_obj, progress=gr.Progress()):
87
  vector_db = create_db(doc_splits)
88
  return vector_db, "Database created!"
89
 
 
 
 
90
  # Initialize LLM
 
91
  def initialize_LLM(llm_option, llm_temperature, max_tokens, top_k, vector_db, progress=gr.Progress()):
92
  # print("llm_option",llm_option)
93
  llm_name = list_llm[llm_option]
 
3
  api_token = os.getenv("HF_TOKEN")
4
 
5
 
6
+ # adding all the imports
7
+
8
  from langchain_community.vectorstores import FAISS
9
  from langchain_community.document_loaders import PyPDFLoader
10
  from langchain.text_splitter import RecursiveCharacterTextSplitter
 
17
  from langchain_community.llms import HuggingFaceEndpoint
18
  import torch
19
 
20
+ # add more llms if needed
21
+
22
  list_llm = ["meta-llama/Meta-Llama-3-8B-Instruct", "mistralai/Mistral-7B-Instruct-v0.2"]
23
  list_llm_simple = [os.path.basename(llm) for llm in list_llm]
24
 
25
  # Load and split PDF document
26
+
27
  def load_doc(list_file_path):
28
  # Processing for one document only
29
  # loader = PyPDFLoader(file_path)
 
39
  doc_splits = text_splitter.split_documents(pages)
40
  return doc_splits
41
 
42
+
43
+
44
  # Create vector database
45
+
46
  def create_db(splits):
47
  embeddings = HuggingFaceEmbeddings()
48
  vectordb = FAISS.from_documents(splits, embeddings)
49
  return vectordb
50
 
51
 
52
+
53
+
54
  # Initialize langchain LLM chain
55
+
56
  def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, progress=gr.Progress()):
57
  if llm_model == "meta-llama/Meta-Llama-3-8B-Instruct":
58
  llm = HuggingFaceEndpoint(
 
88
  )
89
  return qa_chain
90
 
91
+
92
+
93
  # Initialize database
94
+
95
  def initialize_database(list_file_obj, progress=gr.Progress()):
96
  # Create a list of documents (when valid)
97
  list_file_path = [x.name for x in list_file_obj if x is not None]
 
101
  vector_db = create_db(doc_splits)
102
  return vector_db, "Database created!"
103
 
104
+
105
+
106
+
107
  # Initialize LLM
108
+
109
  def initialize_LLM(llm_option, llm_temperature, max_tokens, top_k, vector_db, progress=gr.Progress()):
110
  # print("llm_option",llm_option)
111
  llm_name = list_llm[llm_option]