Spaces:
Build error
Build error
from langchain.chains import RetrievalQA | |
from langchain_qdrant import QdrantVectorStore | |
from langchain_community.embeddings import OpenAIEmbeddings | |
from langchain_community.chat_models import ChatOpenAI | |
from langchain.prompts import PromptTemplate | |
# Import additional libraries for web application and audio processing | |
import streamlit as st | |
import gradio as gr | |
import speech_recognition as sr | |
from gtts import gTTS | |
from io import BytesIO | |
import base64 | |
from streamlit_chat import message | |
import os | |
os.environ['OPENAI_API_KEY'] = os.environ.get("OPENAI_API_KEY") | |
Qdrant_API_KEY = os.environ.get("Qdrant_API_KEY") | |
# Initialize embedding model for vector representation of text | |
embed_model = OpenAIEmbeddings() | |
# Define the Qdrant URL for vector storage | |
qdrant_url = "https://ee4d124d-d295-4df3-ad6b-47fe60d3f80d.europe-west3-0.gcp.cloud.qdrant.io:6333" | |
# Create a Qdrant vector store from the document chunks and embeddings | |
# vectorstore = QdrantVectorStore.from_documents(chunks, embed_model, url=qdrant_url, api_key=Qdrant_API_KEY, collection_name="Pakistan_Constitution_eng") | |
vectorstore = QdrantVectorStore.from_existing_collection(embedding=embed_model, url=qdrant_url, api_key=Qdrant_API_KEY, collection_name="Pakistan_Constitution_eng") # read from existing collection | |
# Initialize the language model for chat interactions | |
llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0) | |
# Build the prompt template for the QA system | |
template = """You are a legal assistant providing accurate and concise information based on the Constitution of Pakistan. | |
You will receive questions in Urdu language and you have to answer in Urdu language. | |
Use the following pieces of context to answer the question at the end. | |
Give accurate references to the articles and clauses. | |
If you don't know the answer, just say that you don't know, don't try to make up an answer. | |
Provide proper relevant citations or references to the exact articles or clauses in the Constitution. | |
Keep the answer as concise as possible. Always say "thanks!" in urdu at the end of the answer. | |
{context} | |
Question: {question} | |
Helpful Answer:""" | |
# Define the prompt template for the retrieval QA chain | |
QA_CHAIN_PROMPT = PromptTemplate(input_variables=["context", "question"], template=template) | |
# Create a RetrievalQA chain using the language model and vector store | |
qa_chain = RetrievalQA.from_chain_type( | |
llm, | |
retriever=vectorstore.as_retriever(), | |
chain_type_kwargs={"prompt": QA_CHAIN_PROMPT} | |
) | |
# Function to convert text to speech and return as base64-encoded audio | |
def speak_urdu(text): | |
tts = gTTS(text=text, lang='ur') # Convert text to speech in Urdu | |
audio_fp = BytesIO() # Create a file-like object in memory | |
tts.write_to_fp(audio_fp) # Write the audio to the object | |
audio_fp.seek(0) # Reset pointer to the beginning | |
audio_bytes = audio_fp.read() # Read the audio bytes | |
audio_base64 = base64.b64encode(audio_bytes).decode() # Encode bytes to base64 | |
return audio_base64 | |
# Function to recognize Urdu speech | |
def recognize_speech(): | |
recognizer = sr.Recognizer() # Initialize the speech recognizer | |
with sr.Microphone() as source: | |
st.write("Listening...") # Indicate that the app is listening | |
audio = recognizer.listen(source) # Listen for audio input | |
try: | |
# Recognize speech using Google's API in Urdu | |
recognized_text = recognizer.recognize_google(audio, language="ur") | |
return recognized_text | |
except sr.UnknownValueError: | |
return "Sorry, I couldn't understand your speech." # Handle unrecognized speech | |
except sr.RequestError: | |
return "Error: Unable to process your request." # Handle request error | |
# Placeholder function for Gradio chatbot interaction | |
def invoke_chatbot(user_input): | |
response = qa_chain.invoke(user_input) # Invoke the QA chain with user input | |
return response["result"] # Return the result from the chain | |
# Helper function to autoplay audio using HTML | |
def autoplay_audio(audio_base64): | |
audio_html = f""" | |
<audio autoplay="true" controls="false" style="display:none;"> | |
<source src="data:audio/mp3;base64,{audio_base64}" type="audio/mp3"> | |
</audio> | |
""" | |
st.markdown(audio_html, unsafe_allow_html=True) # Render audio in Streamlit | |
# Initialize session state variables for chat history and input | |
if "history" not in st.session_state: | |
st.session_state["history"] = [] # Chat history | |
if "input_text" not in st.session_state: | |
st.session_state["input_text"] = "" # User input text | |
if "voice_input" not in st.session_state: | |
st.session_state["voice_input"] = "" # Voice input text | |
if "audio_playback" not in st.session_state: | |
st.session_state["audio_playback"] = None # Current audio for playback | |
# Clear chat function to reset session state | |
def clear_chat(): | |
st.session_state["history"] = [] # Clear chat history | |
st.session_state["input_text"] = "" # Clear user input text | |
st.session_state["audio_playback"] = None # Clear audio playback | |
# Sidebar for developer details, disclaimer, and copyright information | |
st.sidebar.image("c.png", use_column_width=True) | |
st.sidebar.title("Developer Details") | |
st.sidebar.write("Developed by: **Abdul S.**") | |
st.sidebar.write("@XevenSolutions") | |
st.sidebar.write("LinkedIn: [linkedin.com/in/kushikhlaq](https://www.linkedin.com/in/kushikhlaq/)") | |
st.sidebar.title("Disclaimer") | |
st.sidebar.write("This chatbot provides information on the Constitution of Pakistan in Urdu. " | |
"Please note that the information may not be comprehensive or up to date. " | |
"For official references, please consult legal professionals.") | |
st.sidebar.title("Copyright") | |
st.sidebar.write("© 2024 Abdul Samad. All rights reserved.") | |
# Streamlit app layout for user interaction | |
st.title("Urdu Chatbot - Constitution of Pakistan") | |
st.write("Chat with the Constitution of Pakistan in Urdu, either by typing or speaking.") | |
# Button to clear chat | |
if st.button("Clear Chat"): | |
clear_chat() # Call the clear chat function | |
# Handle user text input | |
def handle_user_input(): | |
user_input = st.session_state["input_text"] # Get user input from session state | |
if user_input: | |
# Pass the user input to the chatbot model | |
chatbot_response = invoke_chatbot(user_input) | |
# Store the chat history | |
st.session_state["history"].append({"user": user_input, "bot": chatbot_response}) | |
# Generate audio for the chatbot response | |
audio_base64 = speak_urdu(chatbot_response) | |
st.session_state["audio_playback"] = audio_base64 # Store the audio for playback | |
# Clear the input after submission | |
st.session_state["input_text"] = "" | |
# Text input field with callback to handle user input | |
st.text_input( | |
"Type your question in Urdu", | |
value=st.session_state["input_text"], # Default value from session state | |
key="input_text", # Key for session state | |
on_change=handle_user_input # Callback function on input change | |
) | |
# Voice input option to recognize speech and handle it | |
if st.button("Speak Now"): | |
recognized_text = recognize_speech() # Get recognized speech | |
if recognized_text: | |
# Handle voice input by appending it to chat history | |
chatbot_response = invoke_chatbot(recognized_text) | |
st.session_state["history"].append({"user": recognized_text, "bot": chatbot_response}) | |
# Generate audio for the chatbot response | |
audio_base64 = speak_urdu(chatbot_response) | |
st.session_state["audio_playback"] = audio_base64 # Store the audio for playback | |
st.session_state["voice_input"] = "" # Clear voice input after processing | |
# Display chat history using streamlit_chat component | |
for idx, chat in enumerate(st.session_state["history"]): | |
message(chat["user"], is_user=True, key=f"user_{idx}") # Display user message | |
message(chat["bot"], key=f"bot_{idx}") # Display bot response | |
# Autoplay audio for the latest chatbot response | |
if st.session_state["audio_playback"]: | |
autoplay_audio(st.session_state["audio_playback"]) # Play the audio | |
st.session_state["audio_playback"] = None # Reset audio playback state | |