import gradio as gr
import edge_tts
import asyncio
import os

from huggingface_hub import hf_hub_download
from llama_cpp import Llama
from faster_whisper import WhisperModel

from utilsasync import get_sentence, tts_interface

os.environ["CUDACXX"] = "/usr/local/cuda/bin/nvcc"
os.system('python -m unidic download')
os.system('CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install llama-cpp-python==0.2.11 --verbose')
 
# The device to load the model onto. 
#
# Available device types:
# "cuda" - NVIDIA GPU
# "cpu" - Plain CPU
# "mps" - Apple silicon
device = "cuda"
 
# Load Mistral LLM
print("Loading Mistral LLM")
hf_hub_download(repo_id="TheBloke/Mistral-7B-Instruct-v0.1-GGUF", local_dir=".", filename="mistral-7b-instruct-v0.1.Q5_K_M.gguf")
mistral_model_path="./mistral-7b-instruct-v0.1.Q5_K_M.gguf"
mistral_llm = Llama(model_path=mistral_model_path,n_gpu_layers=35,max_new_tokens=256, context_window=4096, n_ctx=4096,n_batch=128,verbose=False)

# Load Whisper ASR model
print("Loading Whisper ASR")
whisper_model = WhisperModel("large-v3", device="cpu", compute_type="float32")


# Get all available voices from edge_tts
async def get_voices():
    voices = await edge_tts.list_voices()
    return {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] for v in voices}
 

# Will be triggered on text submit (updates the chat interface and sends the request to the LLM for a response)
def add_text(chatbot_history, text):
    chatbot_history = [] if chatbot_history is None else chatbot_history
    chatbot_history = chatbot_history + [(text, None)]
    return chatbot_history, gr.update(value="", interactive=True)


# Will be triggered on voice submit (will transribe and send to generate_speech)
def add_audio(chatbot_history, audio):
    chatbot_history = [] if chatbot_history is None else chatbot_history
    # get result from whisper and strip it to delete begin and end space
    response, _ = whisper_model.transcribe(audio)
    text = list(response)[0].text.strip()
    print("Transcribed text:", text)
    chatbot_history = chatbot_history + [(text, None)]
    return chatbot_history, gr.update(value="", interactive=True)


#Gets a reponse from the LLM and creates an audio clip using a TTS Model
def respond(chat_history, voice):
    if not voice:
        return None, gr.Warning("Please select a voice.")
    
    for sentence, chatbot_history in get_sentence(chat_history, mistral_llm):
        print("Inserting sentence to queue")
        print(sentence)

        audiopb = tts_interface(sentence, voice)
    
    #history, response = get_sentence(chat_history, mistral_llm)
        yield chatbot_history, sentence, audiopb


#Gradio Interface
async def create_demo():

    voices = await get_voices()

    #Interface Code
    with gr.Blocks(title="Chat with LLM - POC") as demo:
        
        DESCRIPTION = """# Chat with LLM - POC"""
        gr.Markdown(DESCRIPTION)

        with gr.Row():

            with gr.Column(scale=1, min_width=300):
                voice = gr.Dropdown(choices=[""] + list(voices.keys()), label="Select Voice", value="")
                user_msg = gr.Textbox(placeholder="Enter text here or speak into your microphone")
                audio_record = gr.Audio(sources=["microphone"], type="filepath", scale=4)
                ai_response = gr.Label(show_label=True,label="LLM Sentence currently being processed")
                submit_button = gr.Button("Submit")
                speech_button = gr.Button("Test Speech", visible=False)
                audio_playback = gr.Audio(
                    value=None,
                    label="Generated audio response",
                    streaming=True,
                    autoplay=True,interactive=False,
                    show_label=True,
                    visible=False
                )

            with gr.Column(scale=1, min_width=300):
                
                # Define chatbot component
                chatbot = gr.Chatbot(
                    value=[(None, "Hi, I'm an AI training assistant. Let's get going, how should we start?")],  # Initial greeting from the chatbot
                    elem_id="Conversation",
                    bubble_full_width=False,
                )

                speech_button.click(fn=tts_interface, inputs=[user_msg, voice], outputs=[audio_playback])

                audio_record.stop_recording(fn=add_audio, inputs=[chatbot, audio_record], outputs=[chatbot, user_msg], queue=False
                                    ).then(fn=respond,  inputs=[chatbot, voice], outputs=[chatbot, ai_response, audio_playback]) #.then(fn=tts_interface, inputs=[ai_response, voice], outputs=[audio_playback])

                submit_button.click(fn=add_text, inputs=[chatbot, user_msg], outputs=[chatbot, user_msg], queue=False
                                    ).then(fn=respond,  inputs=[chatbot, voice], outputs=[chatbot, ai_response, audio_playback]) #.then(fn=tts_interface, inputs=[ai_response, voice], outputs=[audio_playback])
                       
                
    return demo
    

# Run the application
demo = asyncio.run(create_demo())
demo.launch()