Spaces:
Build error
Build error
File size: 8,221 Bytes
f337b6a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 |
from langchain.chains import RetrievalQA
from langchain_qdrant import QdrantVectorStore
from langchain_community.embeddings import OpenAIEmbeddings
from langchain_community.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
# Import additional libraries for web application and audio processing
import streamlit as st
import gradio as gr
import speech_recognition as sr
from gtts import gTTS
from io import BytesIO
import base64
from streamlit_chat import message
import os
os.environ['OPENAI_API_KEY'] = os.environ.get("OPENAI_API_KEY")
Qdrant_API_KEY = os.environ.get("Qdrant_API_KEY")
# Initialize embedding model for vector representation of text
embed_model = OpenAIEmbeddings()
# Define the Qdrant URL for vector storage
qdrant_url = "https://ee4d124d-d295-4df3-ad6b-47fe60d3f80d.europe-west3-0.gcp.cloud.qdrant.io:6333"
# Create a Qdrant vector store from the document chunks and embeddings
# vectorstore = QdrantVectorStore.from_documents(chunks, embed_model, url=qdrant_url, api_key=Qdrant_API_KEY, collection_name="Pakistan_Constitution_eng")
vectorstore = QdrantVectorStore.from_existing_collection(embedding=embed_model, url=qdrant_url, api_key=Qdrant_API_KEY, collection_name="Pakistan_Constitution_eng") # read from existing collection
# Initialize the language model for chat interactions
llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0)
# Build the prompt template for the QA system
template = """You are a legal assistant providing accurate and concise information based on the Constitution of Pakistan.
You will receive questions in Urdu language and you have to answer in Urdu language.
Use the following pieces of context to answer the question at the end.
Give accurate references to the articles and clauses.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Provide proper relevant citations or references to the exact articles or clauses in the Constitution.
Keep the answer as concise as possible. Always say "thanks!" in urdu at the end of the answer.
{context}
Question: {question}
Helpful Answer:"""
# Define the prompt template for the retrieval QA chain
QA_CHAIN_PROMPT = PromptTemplate(input_variables=["context", "question"], template=template)
# Create a RetrievalQA chain using the language model and vector store
qa_chain = RetrievalQA.from_chain_type(
llm,
retriever=vectorstore.as_retriever(),
chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)
# Function to convert text to speech and return as base64-encoded audio
def speak_urdu(text):
tts = gTTS(text=text, lang='ur') # Convert text to speech in Urdu
audio_fp = BytesIO() # Create a file-like object in memory
tts.write_to_fp(audio_fp) # Write the audio to the object
audio_fp.seek(0) # Reset pointer to the beginning
audio_bytes = audio_fp.read() # Read the audio bytes
audio_base64 = base64.b64encode(audio_bytes).decode() # Encode bytes to base64
return audio_base64
# Function to recognize Urdu speech
def recognize_speech():
recognizer = sr.Recognizer() # Initialize the speech recognizer
with sr.Microphone() as source:
st.write("Listening...") # Indicate that the app is listening
audio = recognizer.listen(source) # Listen for audio input
try:
# Recognize speech using Google's API in Urdu
recognized_text = recognizer.recognize_google(audio, language="ur")
return recognized_text
except sr.UnknownValueError:
return "Sorry, I couldn't understand your speech." # Handle unrecognized speech
except sr.RequestError:
return "Error: Unable to process your request." # Handle request error
# Placeholder function for Gradio chatbot interaction
def invoke_chatbot(user_input):
response = qa_chain.invoke(user_input) # Invoke the QA chain with user input
return response["result"] # Return the result from the chain
# Helper function to autoplay audio using HTML
def autoplay_audio(audio_base64):
audio_html = f"""
<audio autoplay="true" controls="false" style="display:none;">
<source src="data:audio/mp3;base64,{audio_base64}" type="audio/mp3">
</audio>
"""
st.markdown(audio_html, unsafe_allow_html=True) # Render audio in Streamlit
# Initialize session state variables for chat history and input
if "history" not in st.session_state:
st.session_state["history"] = [] # Chat history
if "input_text" not in st.session_state:
st.session_state["input_text"] = "" # User input text
if "voice_input" not in st.session_state:
st.session_state["voice_input"] = "" # Voice input text
if "audio_playback" not in st.session_state:
st.session_state["audio_playback"] = None # Current audio for playback
# Clear chat function to reset session state
def clear_chat():
st.session_state["history"] = [] # Clear chat history
st.session_state["input_text"] = "" # Clear user input text
st.session_state["audio_playback"] = None # Clear audio playback
# Sidebar for developer details, disclaimer, and copyright information
st.sidebar.image("c.png", use_column_width=True)
st.sidebar.title("Developer Details")
st.sidebar.write("Developed by: **Abdul S.**")
st.sidebar.write("@XevenSolutions")
st.sidebar.write("LinkedIn: [linkedin.com/in/kushikhlaq](https://www.linkedin.com/in/kushikhlaq/)")
st.sidebar.title("Disclaimer")
st.sidebar.write("This chatbot provides information on the Constitution of Pakistan in Urdu. "
"Please note that the information may not be comprehensive or up to date. "
"For official references, please consult legal professionals.")
st.sidebar.title("Copyright")
st.sidebar.write("© 2024 Abdul Samad. All rights reserved.")
# Streamlit app layout for user interaction
st.title("Urdu Chatbot - Constitution of Pakistan")
st.write("Chat with the Constitution of Pakistan in Urdu, either by typing or speaking.")
# Button to clear chat
if st.button("Clear Chat"):
clear_chat() # Call the clear chat function
# Handle user text input
def handle_user_input():
user_input = st.session_state["input_text"] # Get user input from session state
if user_input:
# Pass the user input to the chatbot model
chatbot_response = invoke_chatbot(user_input)
# Store the chat history
st.session_state["history"].append({"user": user_input, "bot": chatbot_response})
# Generate audio for the chatbot response
audio_base64 = speak_urdu(chatbot_response)
st.session_state["audio_playback"] = audio_base64 # Store the audio for playback
# Clear the input after submission
st.session_state["input_text"] = ""
# Text input field with callback to handle user input
st.text_input(
"Type your question in Urdu",
value=st.session_state["input_text"], # Default value from session state
key="input_text", # Key for session state
on_change=handle_user_input # Callback function on input change
)
# Voice input option to recognize speech and handle it
if st.button("Speak Now"):
recognized_text = recognize_speech() # Get recognized speech
if recognized_text:
# Handle voice input by appending it to chat history
chatbot_response = invoke_chatbot(recognized_text)
st.session_state["history"].append({"user": recognized_text, "bot": chatbot_response})
# Generate audio for the chatbot response
audio_base64 = speak_urdu(chatbot_response)
st.session_state["audio_playback"] = audio_base64 # Store the audio for playback
st.session_state["voice_input"] = "" # Clear voice input after processing
# Display chat history using streamlit_chat component
for idx, chat in enumerate(st.session_state["history"]):
message(chat["user"], is_user=True, key=f"user_{idx}") # Display user message
message(chat["bot"], key=f"bot_{idx}") # Display bot response
# Autoplay audio for the latest chatbot response
if st.session_state["audio_playback"]:
autoplay_audio(st.session_state["audio_playback"]) # Play the audio
st.session_state["audio_playback"] = None # Reset audio playback state
|