seek007's picture
Create app.py
f337b6a verified
from langchain.chains import RetrievalQA
from langchain_qdrant import QdrantVectorStore
from langchain_community.embeddings import OpenAIEmbeddings
from langchain_community.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
# Import additional libraries for web application and audio processing
import streamlit as st
import gradio as gr
import speech_recognition as sr
from gtts import gTTS
from io import BytesIO
import base64
from streamlit_chat import message
import os
os.environ['OPENAI_API_KEY'] = os.environ.get("OPENAI_API_KEY")
Qdrant_API_KEY = os.environ.get("Qdrant_API_KEY")
# Initialize embedding model for vector representation of text
embed_model = OpenAIEmbeddings()
# Define the Qdrant URL for vector storage
qdrant_url = "https://ee4d124d-d295-4df3-ad6b-47fe60d3f80d.europe-west3-0.gcp.cloud.qdrant.io:6333"
# Create a Qdrant vector store from the document chunks and embeddings
# vectorstore = QdrantVectorStore.from_documents(chunks, embed_model, url=qdrant_url, api_key=Qdrant_API_KEY, collection_name="Pakistan_Constitution_eng")
vectorstore = QdrantVectorStore.from_existing_collection(embedding=embed_model, url=qdrant_url, api_key=Qdrant_API_KEY, collection_name="Pakistan_Constitution_eng") # read from existing collection
# Initialize the language model for chat interactions
llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0)
# Build the prompt template for the QA system
template = """You are a legal assistant providing accurate and concise information based on the Constitution of Pakistan.
You will receive questions in Urdu language and you have to answer in Urdu language.
Use the following pieces of context to answer the question at the end.
Give accurate references to the articles and clauses.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Provide proper relevant citations or references to the exact articles or clauses in the Constitution.
Keep the answer as concise as possible. Always say "thanks!" in urdu at the end of the answer.
{context}
Question: {question}
Helpful Answer:"""
# Define the prompt template for the retrieval QA chain
QA_CHAIN_PROMPT = PromptTemplate(input_variables=["context", "question"], template=template)
# Create a RetrievalQA chain using the language model and vector store
qa_chain = RetrievalQA.from_chain_type(
llm,
retriever=vectorstore.as_retriever(),
chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)
# Function to convert text to speech and return as base64-encoded audio
def speak_urdu(text):
tts = gTTS(text=text, lang='ur') # Convert text to speech in Urdu
audio_fp = BytesIO() # Create a file-like object in memory
tts.write_to_fp(audio_fp) # Write the audio to the object
audio_fp.seek(0) # Reset pointer to the beginning
audio_bytes = audio_fp.read() # Read the audio bytes
audio_base64 = base64.b64encode(audio_bytes).decode() # Encode bytes to base64
return audio_base64
# Function to recognize Urdu speech
def recognize_speech():
recognizer = sr.Recognizer() # Initialize the speech recognizer
with sr.Microphone() as source:
st.write("Listening...") # Indicate that the app is listening
audio = recognizer.listen(source) # Listen for audio input
try:
# Recognize speech using Google's API in Urdu
recognized_text = recognizer.recognize_google(audio, language="ur")
return recognized_text
except sr.UnknownValueError:
return "Sorry, I couldn't understand your speech." # Handle unrecognized speech
except sr.RequestError:
return "Error: Unable to process your request." # Handle request error
# Placeholder function for Gradio chatbot interaction
def invoke_chatbot(user_input):
response = qa_chain.invoke(user_input) # Invoke the QA chain with user input
return response["result"] # Return the result from the chain
# Helper function to autoplay audio using HTML
def autoplay_audio(audio_base64):
audio_html = f"""
<audio autoplay="true" controls="false" style="display:none;">
<source src="data:audio/mp3;base64,{audio_base64}" type="audio/mp3">
</audio>
"""
st.markdown(audio_html, unsafe_allow_html=True) # Render audio in Streamlit
# Initialize session state variables for chat history and input
if "history" not in st.session_state:
st.session_state["history"] = [] # Chat history
if "input_text" not in st.session_state:
st.session_state["input_text"] = "" # User input text
if "voice_input" not in st.session_state:
st.session_state["voice_input"] = "" # Voice input text
if "audio_playback" not in st.session_state:
st.session_state["audio_playback"] = None # Current audio for playback
# Clear chat function to reset session state
def clear_chat():
st.session_state["history"] = [] # Clear chat history
st.session_state["input_text"] = "" # Clear user input text
st.session_state["audio_playback"] = None # Clear audio playback
# Sidebar for developer details, disclaimer, and copyright information
st.sidebar.image("c.png", use_column_width=True)
st.sidebar.title("Developer Details")
st.sidebar.write("Developed by: **Abdul S.**")
st.sidebar.write("@XevenSolutions")
st.sidebar.write("LinkedIn: [linkedin.com/in/kushikhlaq](https://www.linkedin.com/in/kushikhlaq/)")
st.sidebar.title("Disclaimer")
st.sidebar.write("This chatbot provides information on the Constitution of Pakistan in Urdu. "
"Please note that the information may not be comprehensive or up to date. "
"For official references, please consult legal professionals.")
st.sidebar.title("Copyright")
st.sidebar.write("© 2024 Abdul Samad. All rights reserved.")
# Streamlit app layout for user interaction
st.title("Urdu Chatbot - Constitution of Pakistan")
st.write("Chat with the Constitution of Pakistan in Urdu, either by typing or speaking.")
# Button to clear chat
if st.button("Clear Chat"):
clear_chat() # Call the clear chat function
# Handle user text input
def handle_user_input():
user_input = st.session_state["input_text"] # Get user input from session state
if user_input:
# Pass the user input to the chatbot model
chatbot_response = invoke_chatbot(user_input)
# Store the chat history
st.session_state["history"].append({"user": user_input, "bot": chatbot_response})
# Generate audio for the chatbot response
audio_base64 = speak_urdu(chatbot_response)
st.session_state["audio_playback"] = audio_base64 # Store the audio for playback
# Clear the input after submission
st.session_state["input_text"] = ""
# Text input field with callback to handle user input
st.text_input(
"Type your question in Urdu",
value=st.session_state["input_text"], # Default value from session state
key="input_text", # Key for session state
on_change=handle_user_input # Callback function on input change
)
# Voice input option to recognize speech and handle it
if st.button("Speak Now"):
recognized_text = recognize_speech() # Get recognized speech
if recognized_text:
# Handle voice input by appending it to chat history
chatbot_response = invoke_chatbot(recognized_text)
st.session_state["history"].append({"user": recognized_text, "bot": chatbot_response})
# Generate audio for the chatbot response
audio_base64 = speak_urdu(chatbot_response)
st.session_state["audio_playback"] = audio_base64 # Store the audio for playback
st.session_state["voice_input"] = "" # Clear voice input after processing
# Display chat history using streamlit_chat component
for idx, chat in enumerate(st.session_state["history"]):
message(chat["user"], is_user=True, key=f"user_{idx}") # Display user message
message(chat["bot"], key=f"bot_{idx}") # Display bot response
# Autoplay audio for the latest chatbot response
if st.session_state["audio_playback"]:
autoplay_audio(st.session_state["audio_playback"]) # Play the audio
st.session_state["audio_playback"] = None # Reset audio playback state