Spaces:
Sleeping
Sleeping
HareemFatima
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -15,7 +15,7 @@ from dotenv import load_dotenv
|
|
15 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
16 |
import nltk
|
17 |
from urllib.parse import urljoin, urlparse
|
18 |
-
|
19 |
|
20 |
# Load environment variables (if needed for API keys)
|
21 |
load_dotenv()
|
@@ -37,12 +37,6 @@ def preprocess_text(text):
|
|
37 |
cleaned_text = " ".join([word for word in tokens if word not in STOPWORDS]) # Remove stopwords
|
38 |
return cleaned_text
|
39 |
|
40 |
-
# Function to Save Processed Data to a Document
|
41 |
-
def save_data_to_document(data, filename="processed_data.json"):
|
42 |
-
with open(filename, 'w') as f:
|
43 |
-
json.dump(data, f, indent=4)
|
44 |
-
st.success(f"Data has been saved to {filename}")
|
45 |
-
|
46 |
# Scrape Website with BeautifulSoup
|
47 |
def scrape_website(url):
|
48 |
visited_urls = set()
|
@@ -122,7 +116,7 @@ def create_faiss_with_uuid(text_chunks):
|
|
122 |
return unique_id, faiss_directory # Return the UUID and the directory path
|
123 |
|
124 |
# Build Conversational Chain
|
125 |
-
def get_conversational_chain(
|
126 |
prompt_template = """
|
127 |
Answer the question as detailed as possible from the provided context. If the answer is not in
|
128 |
provided context, just say, "answer is not available in the context." Don't provide the wrong answer.\n\n
|
@@ -131,27 +125,23 @@ def get_conversational_chain(memory):
|
|
131 |
|
132 |
Answer:
|
133 |
"""
|
134 |
-
model = Ollama(model="
|
135 |
prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
|
136 |
|
137 |
-
|
138 |
-
chain = load_qa_chain(model, chain_type="stuff", prompt=prompt, memory=memory)
|
139 |
|
140 |
return chain
|
141 |
|
142 |
# Handle User Input and Process Questions with UUID-based FAISS Index
|
143 |
-
def user_input(user_question, faiss_directory
|
144 |
# Load the FAISS index based on the given directory (UUID-based)
|
145 |
new_db = FAISS.load_local(faiss_directory, embeddings, allow_dangerous_deserialization=True)
|
146 |
|
147 |
# Perform similarity search and answer the user's question
|
148 |
docs = new_db.similarity_search(user_question)
|
149 |
-
chain = get_conversational_chain(
|
150 |
|
151 |
-
# Update memory with the question and response
|
152 |
response = chain({"input_documents": docs, "question": user_question}, return_only_outputs=True)
|
153 |
-
memory.save_context({"input": user_question}, {"output": response["output_text"]})
|
154 |
-
|
155 |
st.write("Reply: ", response["output_text"])
|
156 |
|
157 |
# Main Function for Streamlit App
|
@@ -159,14 +149,11 @@ def main():
|
|
159 |
st.set_page_config("Chat PDF & URL", layout="wide")
|
160 |
st.header("Chat with PDF or URL using Ollama π")
|
161 |
|
162 |
-
# Initialize memory for conversation history
|
163 |
-
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
|
164 |
-
|
165 |
user_question = st.text_input("Ask a Question from the Processed Data")
|
166 |
|
167 |
if user_question and 'faiss_directory' in st.session_state:
|
168 |
faiss_directory = st.session_state['faiss_directory']
|
169 |
-
user_input(user_question, faiss_directory
|
170 |
|
171 |
with st.sidebar:
|
172 |
st.title("Menu:")
|
@@ -182,10 +169,6 @@ def main():
|
|
182 |
text_chunks = get_text_chunks(raw_text)
|
183 |
unique_id, faiss_directory = create_faiss_with_uuid(text_chunks)
|
184 |
st.session_state['faiss_directory'] = faiss_directory
|
185 |
-
|
186 |
-
# Save the cleaned PDF data to a document
|
187 |
-
save_data_to_document({"pdf_data": raw_text}, f"pdf_data_{unique_id}.json")
|
188 |
-
|
189 |
st.success("PDF data is ready for queries!")
|
190 |
else:
|
191 |
st.error("No PDF files were uploaded.")
|
@@ -206,10 +189,7 @@ def main():
|
|
206 |
text_chunks = get_text_chunks(raw_text)
|
207 |
unique_id, faiss_directory = create_faiss_with_uuid(text_chunks)
|
208 |
st.session_state['faiss_directory'] = faiss_directory
|
209 |
-
|
210 |
-
# Save the cleaned URL data to a document
|
211 |
-
save_data_to_document({"url_data": scraped_data}, f"url_data_{unique_id}.json")
|
212 |
-
|
213 |
st.success("Scraped data is ready for queries!")
|
214 |
except Exception as e:
|
215 |
st.error(f"Failed to scrape or process data: {e}")
|
|
|
15 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
16 |
import nltk
|
17 |
from urllib.parse import urljoin, urlparse
|
18 |
+
import faiss
|
19 |
|
20 |
# Load environment variables (if needed for API keys)
|
21 |
load_dotenv()
|
|
|
37 |
cleaned_text = " ".join([word for word in tokens if word not in STOPWORDS]) # Remove stopwords
|
38 |
return cleaned_text
|
39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
# Scrape Website with BeautifulSoup
|
41 |
def scrape_website(url):
|
42 |
visited_urls = set()
|
|
|
116 |
return unique_id, faiss_directory # Return the UUID and the directory path
|
117 |
|
118 |
# Build Conversational Chain
|
119 |
+
def get_conversational_chain():
|
120 |
prompt_template = """
|
121 |
Answer the question as detailed as possible from the provided context. If the answer is not in
|
122 |
provided context, just say, "answer is not available in the context." Don't provide the wrong answer.\n\n
|
|
|
125 |
|
126 |
Answer:
|
127 |
"""
|
128 |
+
model = Ollama(model="qwen2.5:0.5b") # Initialize LLaMA model
|
129 |
prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
|
130 |
|
131 |
+
chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
|
|
|
132 |
|
133 |
return chain
|
134 |
|
135 |
# Handle User Input and Process Questions with UUID-based FAISS Index
|
136 |
+
def user_input(user_question, faiss_directory):
|
137 |
# Load the FAISS index based on the given directory (UUID-based)
|
138 |
new_db = FAISS.load_local(faiss_directory, embeddings, allow_dangerous_deserialization=True)
|
139 |
|
140 |
# Perform similarity search and answer the user's question
|
141 |
docs = new_db.similarity_search(user_question)
|
142 |
+
chain = get_conversational_chain()
|
143 |
|
|
|
144 |
response = chain({"input_documents": docs, "question": user_question}, return_only_outputs=True)
|
|
|
|
|
145 |
st.write("Reply: ", response["output_text"])
|
146 |
|
147 |
# Main Function for Streamlit App
|
|
|
149 |
st.set_page_config("Chat PDF & URL", layout="wide")
|
150 |
st.header("Chat with PDF or URL using Ollama π")
|
151 |
|
|
|
|
|
|
|
152 |
user_question = st.text_input("Ask a Question from the Processed Data")
|
153 |
|
154 |
if user_question and 'faiss_directory' in st.session_state:
|
155 |
faiss_directory = st.session_state['faiss_directory']
|
156 |
+
user_input(user_question, faiss_directory)
|
157 |
|
158 |
with st.sidebar:
|
159 |
st.title("Menu:")
|
|
|
169 |
text_chunks = get_text_chunks(raw_text)
|
170 |
unique_id, faiss_directory = create_faiss_with_uuid(text_chunks)
|
171 |
st.session_state['faiss_directory'] = faiss_directory
|
|
|
|
|
|
|
|
|
172 |
st.success("PDF data is ready for queries!")
|
173 |
else:
|
174 |
st.error("No PDF files were uploaded.")
|
|
|
189 |
text_chunks = get_text_chunks(raw_text)
|
190 |
unique_id, faiss_directory = create_faiss_with_uuid(text_chunks)
|
191 |
st.session_state['faiss_directory'] = faiss_directory
|
192 |
+
|
|
|
|
|
|
|
193 |
st.success("Scraped data is ready for queries!")
|
194 |
except Exception as e:
|
195 |
st.error(f"Failed to scrape or process data: {e}")
|