Spaces:
Running
Running
File size: 4,214 Bytes
e95bc25 15e7b85 e95bc25 2c32692 5536535 2c32692 e95bc25 ad86f4b e95bc25 4e602d2 edbfc9a 4e602d2 ad86f4b 845cab6 e95bc25 ad86f4b e95bc25 5536535 e95bc25 5536535 ad86f4b e95bc25 15e7b85 e95bc25 ad86f4b f194fff 4e602d2 ad86f4b f194fff 96fabd6 f194fff 5536535 e95bc25 15e7b85 4e602d2 ad86f4b 15e7b85 ad86f4b 15e7b85 5536535 15e7b85 e95bc25 15e7b85 e95bc25 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 |
import gradio as gr
import tempfile
from TTS.api import TTS
from huggingface_hub import hf_hub_download
import torch
import json
from snfl_imdann import TifinaghNumberConverter
import re
CUDA = torch.cuda.is_available()
REPO_ID = "ayymen/Coqui-TTS-Vits-Multispeaker"
VOICE_CONVERSION_MODELS = {
'freevc24': 'voice_conversion_models/multilingual/vctk/freevc24',
'openvoice_v1': 'voice_conversion_models/multilingual/multi-dataset/openvoice_v1',
'openvoice_v2': 'voice_conversion_models/multilingual/multi-dataset/openvoice_v2',
}
VARIANTS = {"Tachelhit": "shi", "Tarifit": "rif"}
SPEAKERS = ["yan", "sin", "idj"]
my_title = "ⴰⴹⵕⵉⵚ ⵙ ⵉⵎⵙⵍⵉ - Tamazight Text-to-Speech"
my_description = "This model is based on [VITS](https://github.com/jaywalnut310/vits), thanks to 🐸 [Coqui.ai](https://coqui.ai/)."
my_examples = [
["ⴰⵣⵓⵍ. ⵎⴰⵏⵣⴰⴽⵉⵏ?", "shi", "yan", True],
["ⵡⴰ ⵜⴰⵎⵖⴰⵔⵜ ⵎⴰ ⴷ ⵓⴽⴰⵏ ⵜⵙⴽⵔⵜ?", "shi", "sin", False],
["ⴳⵏ ⴰⴷ ⴰⴽ ⵉⵙⵙⴳⵏ ⵕⴱⴱⵉ ⵉⵜⵜⵓ ⴽ.", "shi", "yan", False],
["ⴰⵔⵔⴰⵡ ⵏ ⵍⵀⵎⵎ ⵢⵓⴽⵔ ⴰⵖ ⵉⵀⴷⵓⵎⵏ ⵏⵏⵖ!", "shi", "yan", False],
["ⴰⵣⵓⵍ. ⵎⴰⵎⵛ ⵜⴷⵊⵉⵜ?", "rif", "idj", True],
["ⴰⵇⵎⵎⵓⵎ ⵉⵇⵏⴻⵏ ⵓⵔ ⵜ ⵜⵜⵉⴷⴼⵏ ⵉⵣⴰⵏ.", "rif", "idj", False],
["ⵇⵇⵉⵎ ⵅ ⵜⴰⴷⴷⴰⵔⵜ ⵏⵏⵛ!", "rif", "idj", False],
["ⵜⴻⵜⵜⵏ ⴰⴳ ⵡⵓⵛⵛⵏ, ⵜⵜⵔⵓⵏ ⵅ ⵓⵎⴽⵙⴰ.", "rif", "idj", False]
]
best_model_path = hf_hub_download(repo_id=REPO_ID, filename="checkpoint_390000.pth")
config_path = hf_hub_download(repo_id=REPO_ID, filename="config.json")
api = TTS(model_path=best_model_path, config_path=config_path).to("cuda" if CUDA else "cpu")
# pre-download voice conversion models
for model in VOICE_CONVERSION_MODELS.values():
api.load_vc_model_by_name(model, gpu=CUDA)
with open(config_path, "r") as f:
config = json.load(f)
available_chars = config["characters"]["characters"]
available_punct = config["characters"]["punctuations"]
available_chars = available_chars + "".join([str(i) for i in range(10)])
placeholder = f"The available characters are: {available_chars} and the available punctuation is: {available_punct}"
my_inputs = [
gr.Textbox(lines=5, label="Input Text", placeholder=placeholder),
gr.Dropdown(label="Variant", choices=list(VARIANTS.items()), value="shi"),
gr.Dropdown(label="Speaker", choices=SPEAKERS, value="yan"),
gr.Checkbox(label="Split Sentences (each sentence will be generated separately)", value=False),
gr.Audio(type="filepath", label="Speaker audio for voice cloning (optional)"),
gr.Dropdown(label="Voice Conversion Model", choices=list(VOICE_CONVERSION_MODELS.keys())),
]
my_outputs = gr.Audio(type="filepath", label="Output Audio", autoplay=True)
def tts(text: str, variant: str = "shi", speaker: str = "yan", split_sentences: bool = False, speaker_wav: str = None, voice_cv_model: str = 'freevc24'):
# replace oov characters
text = text.replace("\n", ". ")
text = text.replace("(", ",")
text = text.replace(")", ",")
text = text.replace('"', ",")
text = text.replace("'", ",")
text = text.replace(";", ",")
text = text.replace("-", " ")
# convert numbers to their spoken form
text = re.sub(r"\d+", lambda x: TifinaghNumberConverter.convert(int(x.group(0))), text)
with tempfile.NamedTemporaryFile(suffix = ".wav", delete = False) as fp:
if speaker_wav:
api.load_vc_model_by_name(VOICE_CONVERSION_MODELS[voice_cv_model], gpu=CUDA)
api.tts_with_vc_to_file(text, speaker_wav=speaker_wav, file_path=fp.name, split_sentences=split_sentences, speaker=speaker, language=variant)
else:
api.tts_to_file(text, file_path=fp.name, split_sentences=split_sentences, speaker=speaker, language=variant)
return fp.name
iface = gr.Interface(
fn=tts,
inputs=my_inputs,
outputs=my_outputs,
title=my_title,
description=my_description,
examples=my_examples,
cache_examples=True
)
iface.launch()
|