import tempfile

import edge_tts
import gradio as gr
from gradio_client import Client
import pyarabic.araby as araby

language_dict = {
  "English": {
    "Jenny": "en-US-JennyNeural",
    "Guy": "en-US-GuyNeural",
    "Ana": "en-US-AnaNeural",
    "Aria": "en-US-AriaNeural",
    "Christopher": "en-US-ChristopherNeural",
    "Eric": "en-US-EricNeural",
    "Michelle": "en-US-MichelleNeural",
    "Roger": "en-US-RogerNeural",
    "Natasha": "en-AU-NatashaNeural",
    "William": "en-AU-WilliamNeural",
    "Clara": "en-CA-ClaraNeural",
    "Liam": "en-CA-LiamNeural",
    "Libby": "en-GB-LibbyNeural",
    "Maisie": "en-GB-MaisieNeural",
    "Ryan": "en-GB-RyanNeural",
    "Sonia": "en-GB-SoniaNeural",
    "Thomas": "en-GB-ThomasNeural",
    "Sam": "en-HK-SamNeural",
    "Yan": "en-HK-YanNeural",
    "Connor": "en-IE-ConnorNeural",
    "Emily": "en-IE-EmilyNeural",
    "Neerja": "en-IN-NeerjaNeural",
    "Prabhat": "en-IN-PrabhatNeural",
    "Asilia": "en-KE-AsiliaNeural",
    "Chilemba": "en-KE-ChilembaNeural",
    "Abeo": "en-NG-AbeoNeural",
    "Ezinne": "en-NG-EzinneNeural",
    "Mitchell": "en-NZ-MitchellNeural",
    "James": "en-PH-JamesNeural",
    "Rosa": "en-PH-RosaNeural",
    "Luna": "en-SG-LunaNeural",
    "Wayne": "en-SG-WayneNeural",
    "Elimu": "en-TZ-ElimuNeural",
    "Imani": "en-TZ-ImaniNeural",
    "Leah": "en-ZA-LeahNeural",
    "Luke": "en-ZA-LukeNeural"
  },
  "Spanish": {
    "Elena": "es-AR-ElenaNeural",
    "Tomas": "es-AR-TomasNeural",
    "Marcelo": "es-BO-MarceloNeural",
    "Sofia": "es-BO-SofiaNeural",
    "Gonzalo": "es-CO-GonzaloNeural",
    "Salome": "es-CO-SalomeNeural",
    "Juan": "es-CR-JuanNeural",
    "Maria": "es-CR-MariaNeural",
    "Belkys": "es-CU-BelkysNeural",
    "Emilio": "es-DO-EmilioNeural",
    "Ramona": "es-DO-RamonaNeural",
    "Andrea": "es-EC-AndreaNeural",
    "Luis": "es-EC-LuisNeural",
    "Alvaro": "es-ES-AlvaroNeural",
    "Elvira": "es-ES-ElviraNeural",
    "Teresa": "es-GQ-TeresaNeural",
    "Andres": "es-GT-AndresNeural",
    "Marta": "es-GT-MartaNeural",
    "Carlos": "es-HN-CarlosNeural",
    "Karla": "es-HN-KarlaNeural",
    "Federico": "es-NI-FedericoNeural",
    "Yolanda": "es-NI-YolandaNeural",
    "Margarita": "es-PA-MargaritaNeural",
    "Roberto": "es-PA-RobertoNeural",
    "Alex": "es-PE-AlexNeural",
    "Camila": "es-PE-CamilaNeural",
    "Karina": "es-PR-KarinaNeural",
    "Victor": "es-PR-VictorNeural",
    "Mario": "es-PY-MarioNeural",
    "Tania": "es-PY-TaniaNeural",
    "Lorena": "es-SV-LorenaNeural",
    "Rodrigo": "es-SV-RodrigoNeural",
    "Alonso": "es-US-AlonsoNeural",
    "Paloma": "es-US-PalomaNeural",
    "Mateo": "es-UY-MateoNeural",
    "Valentina": "es-UY-ValentinaNeural",
    "Paola": "es-VE-PaolaNeural",
    "Sebastian": "es-VE-SebastianNeural"
  },
  "Arabic": {
    "Hamed": "ar-SA-HamedNeural",
    "Zariyah": "ar-SA-ZariyahNeural",
    "Fatima": "ar-AE-FatimaNeural",
    "Hamdan": "ar-AE-HamdanNeural",
    "Ali": "ar-BH-AliNeural",
    "Laila": "ar-BH-LailaNeural",
    "Ismael": "ar-DZ-IsmaelNeural",
    "Salma": "ar-EG-SalmaNeural",
    "Shakir": "ar-EG-ShakirNeural",
    "Bassel": "ar-IQ-BasselNeural",
    "Rana": "ar-IQ-RanaNeural",
    "Sana": "ar-JO-SanaNeural",
    "Taim": "ar-JO-TaimNeural",
    "Fahed": "ar-KW-FahedNeural",
    "Noura": "ar-KW-NouraNeural",
    "Layla": "ar-LB-LaylaNeural",
    "Rami": "ar-LB-RamiNeural",
    "Iman": "ar-LY-ImanNeural",
    "Omar": "ar-LY-OmarNeural",
    "Jamal": "ar-MA-JamalNeural",
    "Mouna": "ar-MA-MounaNeural",
    "Abdullah": "ar-OM-AbdullahNeural",
    "Aysha": "ar-OM-AyshaNeural",
    "Amal": "ar-QA-AmalNeural",
    "Moaz": "ar-QA-MoazNeural",
    "Amany": "ar-SY-AmanyNeural",
    "Laith": "ar-SY-LaithNeural",
    "Hedi": "ar-TN-HediNeural",
    "Reem": "ar-TN-ReemNeural",
    "Maryam": "ar-YE-MaryamNeural",
    "Saleh": "ar-YE-SalehNeural"
  },
  "Korean": {
    "Sun-Hi": "ko-KR-SunHiNeural",
    "InJoon": "ko-KR-InJoonNeural"
  },
  "Thai": {
    "Premwadee": "th-TH-PremwadeeNeural",
    "Niwat": "th-TH-NiwatNeural"
  },
  "Vietnamese": {
    "HoaiMy": "vi-VN-HoaiMyNeural",
    "NamMinh": "vi-VN-NamMinhNeural"
  },
  "Japanese": {
    "Nanami": "ja-JP-NanamiNeural",
    "Keita": "ja-JP-KeitaNeural"
  },
  "French": {
    "Denise": "fr-FR-DeniseNeural",
    "Eloise": "fr-FR-EloiseNeural",
    "Henri": "fr-FR-HenriNeural",
    "Sylvie": "fr-CA-SylvieNeural",
    "Antoine": "fr-CA-AntoineNeural",
    "Jean": "fr-CA-JeanNeural",
    "Ariane": "fr-CH-ArianeNeural",
    "Fabrice": "fr-CH-FabriceNeural",
    "Charline": "fr-BE-CharlineNeural",
    "Gerard": "fr-BE-GerardNeural"
  },
  "Portuguese": {
    "Francisca": "pt-BR-FranciscaNeural",
    "Antonio": "pt-BR-AntonioNeural",
    "Duarte": "pt-PT-DuarteNeural",
    "Raquel": "pt-PT-RaquelNeural"
  },
  "Indonesian": {
    "Ardi": "id-ID-ArdiNeural",
    "Gadis": "id-ID-GadisNeural"
  },
  "Hebrew": {
    "Avri": "he-IL-AvriNeural",
    "Hila": "he-IL-HilaNeural"
  },
  "Italian": {
    "Isabella": "it-IT-IsabellaNeural",
    "Diego": "it-IT-DiegoNeural",
    "Elsa": "it-IT-ElsaNeural"
  },
  "Dutch": {
    "Colette": "nl-NL-ColetteNeural",
    "Fenna": "nl-NL-FennaNeural",
    "Maarten": "nl-NL-MaartenNeural",
    "Arnaud": "nl-BE-ArnaudNeural",
    "Dena": "nl-BE-DenaNeural"
  },
  "Malay": {
    "Osman": "ms-MY-OsmanNeural",
    "Yasmin": "ms-MY-YasminNeural"
  },
  "Norwegian": {
    "Pernille": "nb-NO-PernilleNeural",
    "Finn": "nb-NO-FinnNeural"
  },
  "Swedish": {
    "Sofie": "sv-SE-SofieNeural",
    "Mattias": "sv-SE-MattiasNeural"
  },
  "Greek": {
    "Athina": "el-GR-AthinaNeural",
    "Nestoras": "el-GR-NestorasNeural"
  },
  "German": {
    "Katja": "de-DE-KatjaNeural",
    "Amala": "de-DE-AmalaNeural",
    "Conrad": "de-DE-ConradNeural",
    "Killian": "de-DE-KillianNeural",
    "Ingrid": "de-AT-IngridNeural",
    "Jonas": "de-AT-JonasNeural",
    "Jan": "de-CH-JanNeural",
    "Leni": "de-CH-LeniNeural"
  },
  "Afrikaans": {
    "Adri": "af-ZA-AdriNeural",
    "Willem": "af-ZA-WillemNeural"
  },
  "Amharic": {
    "Ameha": "am-ET-AmehaNeural",
    "Mekdes": "am-ET-MekdesNeural"
  },
  "Azerbaijani": {
    "Babek": "az-AZ-BabekNeural",
    "Banu": "az-AZ-BanuNeural"
  },
  "Bulgarian": {
    "Borislav": "bg-BG-BorislavNeural",
    "Kalina": "bg-BG-KalinaNeural"
  },
  "Bengali": {
    "Nabanita": "bn-BD-NabanitaNeural",
    "Pradeep": "bn-BD-PradeepNeural",
    "Bashkar": "bn-IN-BashkarNeural",
    "Tanishaa": "bn-IN-TanishaaNeural"
  },
  "Bosnian": {
    "Goran": "bs-BA-GoranNeural",
    "Vesna": "bs-BA-VesnaNeural"
  },
  "Catalan": {
    "Joana": "ca-ES-JoanaNeural",
    "Enric": "ca-ES-EnricNeural"
  },
  "Czech": {
    "Antonin": "cs-CZ-AntoninNeural",
    "Vlasta": "cs-CZ-VlastaNeural"
  },
  "Welsh": {
    "Aled": "cy-GB-AledNeural",
    "Nia": "cy-GB-NiaNeural"
  },
  "Danish": {
    "Christel": "da-DK-ChristelNeural",
    "Jeppe": "da-DK-JeppeNeural"
  },
  "Estonian": {
    "Anu": "et-EE-AnuNeural",
    "Kert": "et-EE-KertNeural"
  },
  "Persian": {
    "Dilara": "fa-IR-DilaraNeural",
    "Farid": "fa-IR-FaridNeural"
  },
  "Finnish": {
    "Harri": "fi-FI-HarriNeural",
    "Noora": "fi-FI-NooraNeural"
  },
  "Irish": {
    "Colm": "ga-IE-ColmNeural",
    "Orla": "ga-IE-OrlaNeural"
  },
  "Galician": {
    "Roi": "gl-ES-RoiNeural",
    "Sabela": "gl-ES-SabelaNeural"
  },
  "Gujarati": {
    "Dhwani": "gu-IN-DhwaniNeural",
    "Niranjan": "gu-IN-NiranjanNeural"
  },
  "Hindi": {
    "Madhur": "hi-IN-MadhurNeural",
    "Swara": "hi-IN-SwaraNeural"
  },
  "Croatian": {
    "Gabrijela": "hr-HR-GabrijelaNeural",
    "Srecko": "hr-HR-SreckoNeural"
  },
  "Hungarian": {
    "Noemi": "hu-HU-NoemiNeural",
    "Tamas": "hu-HU-TamasNeural"
  },
  "Icelandic": {
    "Gudrun": "is-IS-GudrunNeural",
    "Gunnar": "is-IS-GunnarNeural"
  },
  "Javanese": {
    "Dimas": "jv-ID-DimasNeural",
    "Siti": "jv-ID-SitiNeural"
  },
  "Georgian": {
    "Eka": "ka-GE-EkaNeural",
    "Giorgi": "ka-GE-GiorgiNeural"
  },
  "Kazakh": {
    "Aigul": "kk-KZ-AigulNeural",
    "Daulet": "kk-KZ-DauletNeural"
  },
  "Khmer": {
    "Piseth": "km-KH-PisethNeural",
    "Sreymom": "km-KH-SreymomNeural"
  },
  "Kannada": {
    "Gagan": "kn-IN-GaganNeural",
    "Sapna": "kn-IN-SapnaNeural"
  },
  "Lao": {
    "Chanthavong": "lo-LA-ChanthavongNeural",
    "Keomany": "lo-LA-KeomanyNeural"
  },
  "Lithuanian": {
    "Leonas": "lt-LT-LeonasNeural",
    "Ona": "lt-LT-OnaNeural"
  },
  "Latvian": {
    "Everita": "lv-LV-EveritaNeural",
    "Nils": "lv-LV-NilsNeural"
  },
  "Macedonian": {
    "Aleksandar": "mk-MK-AleksandarNeural",
    "Marija": "mk-MK-MarijaNeural"
  },
  "Malayalam": {
    "Midhun": "ml-IN-MidhunNeural",
    "Sobhana": "ml-IN-SobhanaNeural"
  },
  "Mongolian": {
    "Bataa": "mn-MN-BataaNeural",
    "Yesui": "mn-MN-YesuiNeural"
  },
  "Marathi": {
    "Aarohi": "mr-IN-AarohiNeural",
    "Manohar": "mr-IN-ManoharNeural"
  },
  "Maltese": {
    "Grace": "mt-MT-GraceNeural",
    "Joseph": "mt-MT-JosephNeural"
  },
  "Burmese": {
    "Nilar": "my-MM-NilarNeural",
    "Thiha": "my-MM-ThihaNeural"
  },
  "Nepali": {
    "Hemkala": "ne-NP-HemkalaNeural",
    "Sagar": "ne-NP-SagarNeural"
  },
  "Polish": {
    "Marek": "pl-PL-MarekNeural",
    "Zofia": "pl-PL-ZofiaNeural"
  },
  "Pashto": {
    "Gul Nawaz": "ps-AF-GulNawazNeural",
    "Latifa": "ps-AF-LatifaNeural"
  },
  "Romanian": {
    "Alina": "ro-RO-AlinaNeural",
    "Emil": "ro-RO-EmilNeural"
  },
  "Russian": {
    "Svetlana": "ru-RU-SvetlanaNeural",
    "Dmitry": "ru-RU-DmitryNeural"
  },
  "Sinhala": {
    "Sameera": "si-LK-SameeraNeural",
    "Thilini": "si-LK-ThiliniNeural"
  },
  "Slovak": {
    "Lukas": "sk-SK-LukasNeural",
    "Viktoria": "sk-SK-ViktoriaNeural"
  },
  "Slovenian": {
    "Petra": "sl-SI-PetraNeural",
    "Rok": "sl-SI-RokNeural"
  },
  "Somali": {
    "Muuse": "so-SO-MuuseNeural",
    "Ubax": "so-SO-UbaxNeural"
  },
  "Albanian": {
    "Anila": "sq-AL-AnilaNeural",
    "Ilir": "sq-AL-IlirNeural"
  },
  "Serbian": {
    "Nicholas": "sr-RS-NicholasNeural",
    "Sophie": "sr-RS-SophieNeural"
  },
  "Sundanese": {
    "Jajang": "su-ID-JajangNeural",
    "Tuti": "su-ID-TutiNeural"
  },
  "Swahili": {
    "Rafiki": "sw-KE-RafikiNeural",
    "Zuri": "sw-KE-ZuriNeural",
    "Daudi": "sw-TZ-DaudiNeural",
    "Rehema": "sw-TZ-RehemaNeural"
    },
  "Tamil": {
    "Pallavi": "ta-IN-PallaviNeural",
    "Valluvar": "ta-IN-ValluvarNeural",
    "Kumar": "ta-LK-KumarNeural",
    "Saranya": "ta-LK-SaranyaNeural",
    "Kani": "ta-MY-KaniNeural",
    "Surya": "ta-MY-SuryaNeural",
    "Anbu": "ta-SG-AnbuNeural"
  },
  "Telugu": {
    "Mohan": "te-IN-MohanNeural",
    "Shruti": "te-IN-ShrutiNeural"
  },
  "Turkish": {
    "Ahmet": "tr-TR-AhmetNeural",
    "Emel": "tr-TR-EmelNeural"
  },
  "Ukrainian": {
    "Ostap": "uk-UA-OstapNeural",
    "Polina": "uk-UA-PolinaNeural"
  },
  "Urdu": {
    "Gul": "ur-IN-GulNeural",
    "Salman": "ur-IN-SalmanNeural",
    "Asad": "ur-PK-AsadNeural",
    "Uzma": "ur-PK-UzmaNeural"
  },
  "Uzbek": {
    "Madina": "uz-UZ-MadinaNeural",
    "Sardor": "uz-UZ-SardorNeural"
  },
  "Mandarin": {
    "Xiaoxiao": "zh-CN-XiaoxiaoNeural",
    "Yunyang": "zh-CN-YunyangNeural",
    "Yunxi": "zh-CN-YunxiNeural",
    "Xiaoyi": "zh-CN-XiaoyiNeural",
    "Yunjian": "zh-CN-YunjianNeural",
    "Yunxia": "zh-CN-YunxiaNeural",
    "Xiaobei": "zh-CN-liaoning-XiaobeiNeural",
    "Xiaoni": "zh-CN-shaanxi-XiaoniNeural",
    "HiuMaan": "zh-HK-HiuMaanNeural",
    "HiuGaai": "zh-HK-HiuGaaiNeural",
    "WanLung": "zh-HK-WanLungNeural",
    "HsiaoChen": "zh-TW-HsiaoChenNeural",
    "HsiaoYu": "zh-TW-HsiaoYuNeural",
    "YunJhe": "zh-TW-YunJheNeural"
  },
  "Zulu": {
    "Thando": "zu-ZA-ThandoNeural",
    "Themba": "zu-ZA-ThembaNeural"
  }
}

client = Client("MohamedRashad/arabic-auto-tashkeel")

async def text_to_speech_edge(text, language_code, speaker, tashkeel_checkbox=False):

  # Remove diacritics from Arabic text then add tashkeel
    if language_code == "Arabic" and tashkeel_checkbox:
        text = client.predict(
        		input_text=araby.strip_diacritics(text),
        		api_name="/infer_shakkala"
        )
    
    # Get the voice for the selected language and speaker
    voice = language_dict[language_code][speaker]
    communicate = edge_tts.Communicate(text, voice)
    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
      tmp_path = tmp_file.name
      await communicate.save(tmp_path)

    return text, tmp_path


def get_speakers(language):
    print(language)
    speakers = list(language_dict[language].keys())
    return gr.Dropdown(choices=speakers, value=speakers[0], interactive=True), gr.Checkbox(visible=language == "Arabic", interactive=True)


default_language = None
default_speaker = None
with gr.Blocks(title="Multilingual TTS") as demo:
    gr.HTML("<center><h1>Multilingual TTS (Edge TTS)</h1></center>")
    gr.HTML(f"<h2 style='color:Tomato;'> {len(language_dict)} languages supported</h3>")
    gr.HTML(f"<p> {', '.join(language_dict.keys())} </h3>")
    gr.Markdown("**Note:** A special feature is added for Arabic language only.")
    with gr.Row():
        with gr.Column():
            input_text = gr.Textbox(lines=5, label="Input Text", placeholder="Enter text to convert to speech")
            language = gr.Dropdown(
                choices=list(language_dict.keys()), value=default_language, label="Languages", interactive=True
            )
            speaker = gr.Dropdown(choices=[], value=default_speaker, label="Speakers", interactive=False)
            tashkeel_checkbox = gr.Checkbox(label="Tashkeel", value=False, visible=False, interactive=False)
            run_btn = gr.Button(value="Generate Audio", variant="primary")

        with gr.Column():
            output_text = gr.Textbox(label="Output Text")
            output_audio = gr.Audio(type="filepath", label="Audio Output")

    language.change(get_speakers, inputs=[language], outputs=[speaker, tashkeel_checkbox])
    run_btn.click(text_to_speech_edge, inputs=[input_text, language, speaker, tashkeel_checkbox], outputs=[output_text, output_audio])

if __name__ == "__main__":
    demo.queue().launch(share=False)