import torch
import scipy
import os
import streamlit as st
import pandas as pd
from transformers import set_seed, pipeline
from transformers import VitsTokenizer, VitsModel
from datasets import load_dataset, Audio
from src import *

#from huggingface_hub import login
#from dotenv import load_dotenv


language_list = ['mos', 'fra', 'eng']

st.title("Demo: Finetuning models | Mooré Language")
tts, stt, trans, lid, about = st.tabs(["Text to speech", "Speech to text", "Translation", "Language ID", "**About**"])

with tts:
    tts_text = st.text_area(label = "Please enter your text here:", value="", placeholder="ne y wĩndga")

    tts_col1, tts_col2,  = st.columns(2)

    with tts_col1:
        tts_lang = st.selectbox('Language of text', (language_list), format_func = decode_iso)

    if st.button("Speak"):
        with st.spinner(":rainbow[Synthesizing, please wait...]"):
            synth = synthesize_facebook(tts_text, tts_lang)
  , sample_rate=16_000)

with stt:

    stt_file = st.file_uploader("Please upload an audio file:", type=['mp3', 'm4a'], key = "stt_uploader")
    stt_lang = st.selectbox("Please select the language:" , (language_list), format_func = decode_iso)

    if st.button("Transcribe"):
        with st.spinner(":rainbow[Received your file, please wait while I process it...]"):
            stt = transcribe(stt_file, stt_lang)
            ":violet[The transcription is:]" 
            ':violet[ "' + stt + '"]'

    "Using the supplied clips, here are the transcriptions:"
    df = pd.read_csv("data/speech_to_text.csv")
    df.columns = ['Clip ID', 'Spoken in Moore', 'Spoken in French', 'Transcription in Moore', 'Transcription in French']
    df.set_index('Clip ID', inplace=True)
    st.table(df[['Spoken in Moore', 'Transcription in Moore']])
    st.table(df[['Spoken in French', 'Transcription in French']])

with trans:
    trans_text = st.text_area(label = "Please enter your translation text here:", value="", placeholder="ne y wĩndga")
    #trans_col1, trans_col2, trans_col3 = st.columns([.25, .25, .5])
    trans_col1, trans_col2 = st.columns(2)

    with trans_col1:
        src_lang = st.selectbox('Translate from:', (language_list), format_func = decode_iso)
    with trans_col2:
        target_lang = st.selectbox('Translate to:', (language_list), format_func = decode_iso, index=1)
    #with trans_col3:
    #    trans_model = st.selectbox("Translation model:",
    #                            ("Facebook (nllb-200-distilled-600M)", 
    #                             "Helsinki NLP (opus-mt-mos-en)", 
    #                             "Masakhane (m2m100_418m_mos_fr_news)")
    #                           )
    if st.button("Translate"):
        with st.spinner(":rainbow[Translating from " + decode_iso(src_lang) + " into " + decode_iso(target_lang) + ", please wait...]"):
            translation = translate(trans_text, src_lang, target_lang) #, trans_model)

    "Using the supplied clips, here are the translations:"
    df = pd.read_csv("data/translated_eng.csv",
                    usecols=['ID', 'French', 'Moore', 'English', 
                             'tr_meta_mos_fra', 'tr_meta_mos_eng', 'tr_meta_eng_mos', 'tr_meta_fra_mos'])
    df.columns = ['Clip ID',  'Original Moore', 'Original French', 'Original English',
                         'Moore-English Translation', 'Moore-French Translation', 
                     'English-Moore Translation', 'French-Moore Translation']
    df.set_index('Clip ID', inplace=True)
    st.table(df[['Original Moore', 'Moore-French Translation', 'Moore-English Translation']])
    st.table(df[['Original French', 'French-Moore Translation']])
    st.table(df[['Original English', 'English-Moore Translation']])

with lid:
    langid_file = st.file_uploader("Please upload an audio file:", type=['mp3', 'm4a'], key = "lid_uploader")

    if st.button("Identify"):
        with st.spinner(":rainbow[Received your file, please wait while I process it...]"):
            lang = identify_language(langid_file)
            lang = decode_iso(lang)
            ":violet[The detected language is " + lang + "]"

    "Using the supplied clips, here are the recognized languages:"
    df = pd.read_csv("data/language_id.csv")
    df.columns = ['Clip ID', 'Language detected when speaking Mooré', 'Language detected when speaking French']
    df.set_index('Clip ID', inplace=True)

    # supported colors: blue, green, orange, red, violet, gray/grey, rainbow.

with about:
    #st.header("How it works")
**Text to speech**, **speech to text**, and **language identification** capabilities are provided by Meta's [Massively Multilingual Speech (MMS)]( model, which supports over 1000 languages.[^1]

**Translation** capabilities are provided primarily by Meta's [No Language Left Behind (NLLB)]( model, which supports translation between 200 languages.[^3]
We compare Meta's NLLB translations to two other translation alternatives. Masakhane, an African NLP initiative, offers endpoints for translations between Mooré and French.[^4] Helsinki NLP offers enpoints between Mooré and English, and one endpoint from French to Mooré.[^5]

Facebook has since released [SeamlessM4T]( which also provides support for audio-to-audio translation, however, Mooré is not currently one of the included languages.
[^1]: Endpoints used: TTS ([English](, 
    [LID](  For language ID, the 256-language variant was chosen as this was the model with the smallest number of languages, which still included Mooré.   
    Learn more:
    [Docs]( | 
    [Paper]( | 
    [Supported languages](
[^3]: Endpoint used: [NLLB](   
    Learn more: 
    [Docs]( | 
    [Paper]( | 
    [Supported languages](
[^4]: Endpoint used: [Mooré to French](, 
    [French to Mooré](   
    Learn more:
    [Docs]( |
[^5]: Endpoints used: [Mooré to English](,
    [English to Mooré](,
    [French to Mooré](   
    Learn more: