HareemFatima commited on
Commit
f32095c
Β·
verified Β·
1 Parent(s): bedddd8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -28
app.py CHANGED
@@ -15,7 +15,7 @@ from dotenv import load_dotenv
15
  from langchain_community.embeddings import HuggingFaceEmbeddings
16
  import nltk
17
  from urllib.parse import urljoin, urlparse
18
- from langchain.memory import ConversationBufferMemory
19
 
20
  # Load environment variables (if needed for API keys)
21
  load_dotenv()
@@ -37,12 +37,6 @@ def preprocess_text(text):
37
  cleaned_text = " ".join([word for word in tokens if word not in STOPWORDS]) # Remove stopwords
38
  return cleaned_text
39
 
40
- # Function to Save Processed Data to a Document
41
- def save_data_to_document(data, filename="processed_data.json"):
42
- with open(filename, 'w') as f:
43
- json.dump(data, f, indent=4)
44
- st.success(f"Data has been saved to {filename}")
45
-
46
  # Scrape Website with BeautifulSoup
47
  def scrape_website(url):
48
  visited_urls = set()
@@ -122,7 +116,7 @@ def create_faiss_with_uuid(text_chunks):
122
  return unique_id, faiss_directory # Return the UUID and the directory path
123
 
124
  # Build Conversational Chain
125
- def get_conversational_chain(memory):
126
  prompt_template = """
127
  Answer the question as detailed as possible from the provided context. If the answer is not in
128
  provided context, just say, "answer is not available in the context." Don't provide the wrong answer.\n\n
@@ -131,27 +125,23 @@ def get_conversational_chain(memory):
131
 
132
  Answer:
133
  """
134
- model = Ollama(model="phi") # Initialize LLaMA model
135
  prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
136
 
137
- # Add memory to the chain
138
- chain = load_qa_chain(model, chain_type="stuff", prompt=prompt, memory=memory)
139
 
140
  return chain
141
 
142
  # Handle User Input and Process Questions with UUID-based FAISS Index
143
- def user_input(user_question, faiss_directory, memory):
144
  # Load the FAISS index based on the given directory (UUID-based)
145
  new_db = FAISS.load_local(faiss_directory, embeddings, allow_dangerous_deserialization=True)
146
 
147
  # Perform similarity search and answer the user's question
148
  docs = new_db.similarity_search(user_question)
149
- chain = get_conversational_chain(memory)
150
 
151
- # Update memory with the question and response
152
  response = chain({"input_documents": docs, "question": user_question}, return_only_outputs=True)
153
- memory.save_context({"input": user_question}, {"output": response["output_text"]})
154
-
155
  st.write("Reply: ", response["output_text"])
156
 
157
  # Main Function for Streamlit App
@@ -159,14 +149,11 @@ def main():
159
  st.set_page_config("Chat PDF & URL", layout="wide")
160
  st.header("Chat with PDF or URL using Ollama πŸ’")
161
 
162
- # Initialize memory for conversation history
163
- memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
164
-
165
  user_question = st.text_input("Ask a Question from the Processed Data")
166
 
167
  if user_question and 'faiss_directory' in st.session_state:
168
  faiss_directory = st.session_state['faiss_directory']
169
- user_input(user_question, faiss_directory, memory)
170
 
171
  with st.sidebar:
172
  st.title("Menu:")
@@ -182,10 +169,6 @@ def main():
182
  text_chunks = get_text_chunks(raw_text)
183
  unique_id, faiss_directory = create_faiss_with_uuid(text_chunks)
184
  st.session_state['faiss_directory'] = faiss_directory
185
-
186
- # Save the cleaned PDF data to a document
187
- save_data_to_document({"pdf_data": raw_text}, f"pdf_data_{unique_id}.json")
188
-
189
  st.success("PDF data is ready for queries!")
190
  else:
191
  st.error("No PDF files were uploaded.")
@@ -206,10 +189,7 @@ def main():
206
  text_chunks = get_text_chunks(raw_text)
207
  unique_id, faiss_directory = create_faiss_with_uuid(text_chunks)
208
  st.session_state['faiss_directory'] = faiss_directory
209
-
210
- # Save the cleaned URL data to a document
211
- save_data_to_document({"url_data": scraped_data}, f"url_data_{unique_id}.json")
212
-
213
  st.success("Scraped data is ready for queries!")
214
  except Exception as e:
215
  st.error(f"Failed to scrape or process data: {e}")
 
15
  from langchain_community.embeddings import HuggingFaceEmbeddings
16
  import nltk
17
  from urllib.parse import urljoin, urlparse
18
+ import faiss
19
 
20
  # Load environment variables (if needed for API keys)
21
  load_dotenv()
 
37
  cleaned_text = " ".join([word for word in tokens if word not in STOPWORDS]) # Remove stopwords
38
  return cleaned_text
39
 
 
 
 
 
 
 
40
  # Scrape Website with BeautifulSoup
41
  def scrape_website(url):
42
  visited_urls = set()
 
116
  return unique_id, faiss_directory # Return the UUID and the directory path
117
 
118
  # Build Conversational Chain
119
+ def get_conversational_chain():
120
  prompt_template = """
121
  Answer the question as detailed as possible from the provided context. If the answer is not in
122
  provided context, just say, "answer is not available in the context." Don't provide the wrong answer.\n\n
 
125
 
126
  Answer:
127
  """
128
+ model = Ollama(model="qwen2.5:0.5b") # Initialize LLaMA model
129
  prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
130
 
131
+ chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
 
132
 
133
  return chain
134
 
135
  # Handle User Input and Process Questions with UUID-based FAISS Index
136
+ def user_input(user_question, faiss_directory):
137
  # Load the FAISS index based on the given directory (UUID-based)
138
  new_db = FAISS.load_local(faiss_directory, embeddings, allow_dangerous_deserialization=True)
139
 
140
  # Perform similarity search and answer the user's question
141
  docs = new_db.similarity_search(user_question)
142
+ chain = get_conversational_chain()
143
 
 
144
  response = chain({"input_documents": docs, "question": user_question}, return_only_outputs=True)
 
 
145
  st.write("Reply: ", response["output_text"])
146
 
147
  # Main Function for Streamlit App
 
149
  st.set_page_config("Chat PDF & URL", layout="wide")
150
  st.header("Chat with PDF or URL using Ollama πŸ’")
151
 
 
 
 
152
  user_question = st.text_input("Ask a Question from the Processed Data")
153
 
154
  if user_question and 'faiss_directory' in st.session_state:
155
  faiss_directory = st.session_state['faiss_directory']
156
+ user_input(user_question, faiss_directory)
157
 
158
  with st.sidebar:
159
  st.title("Menu:")
 
169
  text_chunks = get_text_chunks(raw_text)
170
  unique_id, faiss_directory = create_faiss_with_uuid(text_chunks)
171
  st.session_state['faiss_directory'] = faiss_directory
 
 
 
 
172
  st.success("PDF data is ready for queries!")
173
  else:
174
  st.error("No PDF files were uploaded.")
 
189
  text_chunks = get_text_chunks(raw_text)
190
  unique_id, faiss_directory = create_faiss_with_uuid(text_chunks)
191
  st.session_state['faiss_directory'] = faiss_directory
192
+
 
 
 
193
  st.success("Scraped data is ready for queries!")
194
  except Exception as e:
195
  st.error(f"Failed to scrape or process data: {e}")