import os
#import re
#import functools
#from functools import partial

#import requests
#import pandas as pd

import torch
import gradio as gr
from transformers import pipeline, Wav2Vec2ProcessorWithLM
from pyannote.audio import Pipeline
import whisperx

from utils import split 
from utils import speech_to_text as stt

os.environ["TOKENIZERS_PARALLELISM"] = "false"
device = 0 if torch.cuda.is_available() else -1

color_map = {"joy": "green","anger": "red","surprise": "yellow","sadness": "blue","fear": "orange","love": "purple",}

# Audio components
whisper_device = "cuda" if torch.cuda.is_available() else "cpu"
whisper = whisperx.load_model("tiny.en", whisper_device)
alignment_model, metadata = whisperx.load_align_model(language_code="en", device=whisper_device)
speaker_segmentation = Pipeline.from_pretrained("pyannote/speaker-diarization@2.1", use_auth_token=os.environ['ENO_TOKEN'])

speech_to_text = partial(
    stt, 
    speaker_segmentation=speaker_segmentation, 
    whisper=whisper, 
    alignment_model=alignment_model, 
    metadata=metadata, 
    whisper_device=whisper_device
    )

# Text components
emotion_pipeline = pipeline(
    "text-classification",
    model="bhadresh-savani/distilbert-base-uncased-emotion",
)
summarization_pipeline = pipeline(
    "summarization",
    model="knkarthick/MEETING_SUMMARY",
)

def summarize(diarized, summarization_pipeline):
    text = ""
    for d in diarized:
        text += f"\n{d[1]}: {d[0]}"
    return summarization_pipeline(text)[0]["summary_text"]

def sentiment(diarized, emotion_pipeline):
    customer_sentiments = []

    for i in range(0, len(diarized), 2):
        speaker_speech, speaker_id = diarized[i]
        sentences = split(speaker_speech)

        if "Customer" in speaker_id:
            outputs = emotion_pipeline(sentences)
            for idx, (o, t) in enumerate(zip(outputs, sentences)):
                 customer_sentiments.append((t, o["label"]))
    return customer_sentiments

EXAMPLES = [["Customer_Support_Call.wav"]]

with gr.Blocks() as demo:

    with gr.Row():
        with gr.Column():
            audio = gr.Audio(label="Audio file", type="filepath")
            btn = gr.Button("Transcribe and Diarize")

            gr.Markdown("**Call Transcript:**")
            diarized = gr.HighlightedText(label="Call Transcript")
            gr.Markdown("Summarize Speaker")
            sum_btn = gr.Button("Get Summary")
            summary = gr.Textbox(lines=4)
            sentiment_btn = gr.Button("Get Customer Sentiment")
            analyzed = gr.HighlightedText(color_map=color_map)

        with gr.Column():
            gr.Markdown("## Example Files")
            gr.Examples(
                examples=EXAMPLES,
                inputs=[audio],
                outputs=[diarized],
                fn=speech_to_text,
                cache_examples=True
            )
    # when example button is clicked, convert audio file to text and diarize
    btn.click(fn=speech_to_text, inputs=audio, outputs=diarized)
    # when summarize checkboxes are changed, create summary
    sum_btn.click(fn=partial(summarize, summarization_pipeline=summarization_pipeline), inputs=[diarized], outputs=summary)
    # when sentiment button clicked, display highlighted text and plot
    sentiment_btn.click(fn=partial(sentiment, emotion_pipeline=emotion_pipeline), inputs=diarized, outputs=[analyzed])

demo.launch(debug=1)