from langchain.chains import RetrievalQA from langchain_qdrant import QdrantVectorStore from langchain_community.embeddings import OpenAIEmbeddings from langchain_community.chat_models import ChatOpenAI from langchain.prompts import PromptTemplate # Import additional libraries for web application and audio processing import streamlit as st import gradio as gr import speech_recognition as sr from gtts import gTTS from io import BytesIO import base64 from streamlit_chat import message import os os.environ['OPENAI_API_KEY'] = os.environ.get("OPENAI_API_KEY") Qdrant_API_KEY = os.environ.get("Qdrant_API_KEY") # Initialize embedding model for vector representation of text embed_model = OpenAIEmbeddings() # Define the Qdrant URL for vector storage qdrant_url = "https://ee4d124d-d295-4df3-ad6b-47fe60d3f80d.europe-west3-0.gcp.cloud.qdrant.io:6333" # Create a Qdrant vector store from the document chunks and embeddings # vectorstore = QdrantVectorStore.from_documents(chunks, embed_model, url=qdrant_url, api_key=Qdrant_API_KEY, collection_name="Pakistan_Constitution_eng") vectorstore = QdrantVectorStore.from_existing_collection(embedding=embed_model, url=qdrant_url, api_key=Qdrant_API_KEY, collection_name="Pakistan_Constitution_eng") # read from existing collection # Initialize the language model for chat interactions llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0) # Build the prompt template for the QA system template = """You are a legal assistant providing accurate and concise information based on the Constitution of Pakistan. You will receive questions in Urdu language and you have to answer in Urdu language. Use the following pieces of context to answer the question at the end. Give accurate references to the articles and clauses. If you don't know the answer, just say that you don't know, don't try to make up an answer. Provide proper relevant citations or references to the exact articles or clauses in the Constitution. Keep the answer as concise as possible. Always say "thanks!" in urdu at the end of the answer. {context} Question: {question} Helpful Answer:""" # Define the prompt template for the retrieval QA chain QA_CHAIN_PROMPT = PromptTemplate(input_variables=["context", "question"], template=template) # Create a RetrievalQA chain using the language model and vector store qa_chain = RetrievalQA.from_chain_type( llm, retriever=vectorstore.as_retriever(), chain_type_kwargs={"prompt": QA_CHAIN_PROMPT} ) # Function to convert text to speech and return as base64-encoded audio def speak_urdu(text): tts = gTTS(text=text, lang='ur') # Convert text to speech in Urdu audio_fp = BytesIO() # Create a file-like object in memory tts.write_to_fp(audio_fp) # Write the audio to the object audio_fp.seek(0) # Reset pointer to the beginning audio_bytes = audio_fp.read() # Read the audio bytes audio_base64 = base64.b64encode(audio_bytes).decode() # Encode bytes to base64 return audio_base64 # Function to recognize Urdu speech def recognize_speech(): recognizer = sr.Recognizer() # Initialize the speech recognizer with sr.Microphone() as source: st.write("Listening...") # Indicate that the app is listening audio = recognizer.listen(source) # Listen for audio input try: # Recognize speech using Google's API in Urdu recognized_text = recognizer.recognize_google(audio, language="ur") return recognized_text except sr.UnknownValueError: return "Sorry, I couldn't understand your speech." # Handle unrecognized speech except sr.RequestError: return "Error: Unable to process your request." # Handle request error # Placeholder function for Gradio chatbot interaction def invoke_chatbot(user_input): response = qa_chain.invoke(user_input) # Invoke the QA chain with user input return response["result"] # Return the result from the chain # Helper function to autoplay audio using HTML def autoplay_audio(audio_base64): audio_html = f""" """ st.markdown(audio_html, unsafe_allow_html=True) # Render audio in Streamlit # Initialize session state variables for chat history and input if "history" not in st.session_state: st.session_state["history"] = [] # Chat history if "input_text" not in st.session_state: st.session_state["input_text"] = "" # User input text if "voice_input" not in st.session_state: st.session_state["voice_input"] = "" # Voice input text if "audio_playback" not in st.session_state: st.session_state["audio_playback"] = None # Current audio for playback # Clear chat function to reset session state def clear_chat(): st.session_state["history"] = [] # Clear chat history st.session_state["input_text"] = "" # Clear user input text st.session_state["audio_playback"] = None # Clear audio playback # Sidebar for developer details, disclaimer, and copyright information st.sidebar.image("c.png", use_column_width=True) st.sidebar.title("Developer Details") st.sidebar.write("Developed by: **Abdul S.**") st.sidebar.write("@XevenSolutions") st.sidebar.write("LinkedIn: [linkedin.com/in/kushikhlaq](https://www.linkedin.com/in/kushikhlaq/)") st.sidebar.title("Disclaimer") st.sidebar.write("This chatbot provides information on the Constitution of Pakistan in Urdu. " "Please note that the information may not be comprehensive or up to date. " "For official references, please consult legal professionals.") st.sidebar.title("Copyright") st.sidebar.write("© 2024 Abdul Samad. All rights reserved.") # Streamlit app layout for user interaction st.title("Urdu Chatbot - Constitution of Pakistan") st.write("Chat with the Constitution of Pakistan in Urdu, either by typing or speaking.") # Button to clear chat if st.button("Clear Chat"): clear_chat() # Call the clear chat function # Handle user text input def handle_user_input(): user_input = st.session_state["input_text"] # Get user input from session state if user_input: # Pass the user input to the chatbot model chatbot_response = invoke_chatbot(user_input) # Store the chat history st.session_state["history"].append({"user": user_input, "bot": chatbot_response}) # Generate audio for the chatbot response audio_base64 = speak_urdu(chatbot_response) st.session_state["audio_playback"] = audio_base64 # Store the audio for playback # Clear the input after submission st.session_state["input_text"] = "" # Text input field with callback to handle user input st.text_input( "Type your question in Urdu", value=st.session_state["input_text"], # Default value from session state key="input_text", # Key for session state on_change=handle_user_input # Callback function on input change ) # Voice input option to recognize speech and handle it if st.button("Speak Now"): recognized_text = recognize_speech() # Get recognized speech if recognized_text: # Handle voice input by appending it to chat history chatbot_response = invoke_chatbot(recognized_text) st.session_state["history"].append({"user": recognized_text, "bot": chatbot_response}) # Generate audio for the chatbot response audio_base64 = speak_urdu(chatbot_response) st.session_state["audio_playback"] = audio_base64 # Store the audio for playback st.session_state["voice_input"] = "" # Clear voice input after processing # Display chat history using streamlit_chat component for idx, chat in enumerate(st.session_state["history"]): message(chat["user"], is_user=True, key=f"user_{idx}") # Display user message message(chat["bot"], key=f"bot_{idx}") # Display bot response # Autoplay audio for the latest chatbot response if st.session_state["audio_playback"]: autoplay_audio(st.session_state["audio_playback"]) # Play the audio st.session_state["audio_playback"] = None # Reset audio playback state