Spaces:
Running
Running
NorHsangPha
commited on
Commit
•
d1d6cc0
1
Parent(s):
4067b9d
Initial: initial commit
Browse files- app.py +24 -0
- requirements.txt +5 -0
- tts.py +76 -0
app.py
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from tts import synthesize, TTS_EXAMPLES
|
3 |
+
|
4 |
+
mms_synthesize = gr.Interface(
|
5 |
+
fn=synthesize,
|
6 |
+
inputs=[
|
7 |
+
gr.Dropdown(["original", "nova", "homhom"], label="Model", value="nova"),
|
8 |
+
gr.Textbox(label="Input text"),
|
9 |
+
gr.Slider(minimum=0.1, maximum=2.0, value=1.0, step=0.1, label="Speed"),
|
10 |
+
],
|
11 |
+
outputs=[
|
12 |
+
gr.Audio(label="Generated Audio", type="numpy"),
|
13 |
+
gr.Textbox(label="Filtered text after processing"),
|
14 |
+
],
|
15 |
+
examples=TTS_EXAMPLES,
|
16 |
+
title="Text-to-Speech Demo",
|
17 |
+
description="Generate audio in your desired language from input text.",
|
18 |
+
allow_flagging="never",
|
19 |
+
)
|
20 |
+
|
21 |
+
with gr.Blocks() as demo:
|
22 |
+
mms_synthesize.render()
|
23 |
+
|
24 |
+
demo.launch()
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio
|
2 |
+
transformers
|
3 |
+
torch
|
4 |
+
torchaudio
|
5 |
+
git+https://github.com/NoerNova/ShanNLP
|
tts.py
ADDED
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from transformers import VitsModel, VitsTokenizer
|
3 |
+
import torch
|
4 |
+
from shannlp import util, word_tokenize
|
5 |
+
|
6 |
+
|
7 |
+
def preprocess_string(input_string: str):
|
8 |
+
input_string = input_string.replace("(", "").replace(")", "")
|
9 |
+
string_token = word_tokenize(input_string)
|
10 |
+
num_to_shanword = util.num_to_shanword
|
11 |
+
|
12 |
+
result = []
|
13 |
+
for token in string_token:
|
14 |
+
if token.strip().isdigit():
|
15 |
+
result.append(num_to_shanword(int(token)))
|
16 |
+
else:
|
17 |
+
result.append(token)
|
18 |
+
|
19 |
+
full_token = "".join(result)
|
20 |
+
return full_token
|
21 |
+
|
22 |
+
|
23 |
+
def synthesize(model: str, input_string: str, speed: float = 1.0):
|
24 |
+
auth_token = os.environ.get("TOKEN_READ_SECRET") or True
|
25 |
+
|
26 |
+
model_id = {
|
27 |
+
"original": "facebook/mms-tts-shn",
|
28 |
+
"nova": "NorHsangPha/mms-tts-nova-train",
|
29 |
+
"homhom": "NorHsangPha/mms-tts-shn-train",
|
30 |
+
}[model]
|
31 |
+
|
32 |
+
model = VitsModel.from_pretrained(model_id, token=auth_token)
|
33 |
+
tokenizer = VitsTokenizer.from_pretrained(model_id, token=auth_token)
|
34 |
+
|
35 |
+
device = ""
|
36 |
+
if torch.cuda.is_available():
|
37 |
+
device = torch.device("cuda")
|
38 |
+
elif torch.backends.mps.is_available():
|
39 |
+
device = torch.device("mps")
|
40 |
+
else:
|
41 |
+
device = torch.device("cpu")
|
42 |
+
|
43 |
+
model.to(device)
|
44 |
+
|
45 |
+
processed_string = preprocess_string(input_string)
|
46 |
+
inputs = tokenizer(processed_string, return_tensors="pt").to(device)
|
47 |
+
|
48 |
+
torch.manual_seed(42)
|
49 |
+
|
50 |
+
model.speaking_rate = speed
|
51 |
+
model.noise_scale = 0.2
|
52 |
+
model.noise_scale_w = 0.2
|
53 |
+
model.length_scale = 1.0 / speed
|
54 |
+
|
55 |
+
with torch.no_grad():
|
56 |
+
output = model(**inputs).waveform
|
57 |
+
|
58 |
+
output = output.squeeze().cpu().numpy()
|
59 |
+
|
60 |
+
return ((16_000, output), processed_string)
|
61 |
+
|
62 |
+
|
63 |
+
TTS_EXAMPLES = [
|
64 |
+
["nova", "မႂ်ႇသုင်ၶႃႈ ယူႇလီၵိၼ်ဝၢၼ်ၵတ်းယဵၼ် လီယူႇၶႃႈၼေႃႈ။", 1.0],
|
65 |
+
["original", "ပဵၼ်ယၢမ်းဢၼ် ၸႂ်တိုၼ်ႇတဵၼ်ႈ ၽူင်ႉပိဝ် တႃႇၼုမ်ႇယိင်းၼုမ်ႇၸၢႆးၶဝ် ၸိူဝ်းဢၼ် တေလႆႈၶိုၼ်ႈႁဵၼ်းၼၼ်ႉယူႇ", 1.0],
|
66 |
+
[
|
67 |
+
"homhom",
|
68 |
+
"မိူဝ်ႈပီ 1958 လိူၼ်မေႊ 21 ဝၼ်းၼၼ်ႉ ၸဝ်ႈၼွႆႉသေႃးယၼ်ႇတ ဢမ်ႇၼၼ် ၸဝ်ႈၼွႆႉ ဢွၼ်ႁူဝ် ၽူႈႁၵ်ႉၸိူဝ်ႉၸၢတ်ႈ 31 ၵေႃႉသေ တိူင်ႇၵၢဝ်ႇယၼ်ႇၸႂ် ၵိၼ်ၼမ်ႉသတ်ႉၸႃႇ တႃႇၵေႃႇတင်ႈပူၵ်းပွင် ၵၢၼ်လုၵ်ႉၽိုၼ်ႉ တီႈႁူၺ်ႈပူႉ ႁိမ်းသူပ်းၼမ်ႉၵျွတ်ႈ ၼႂ်းဢိူင်ႇမိူင်းႁၢင် ၸႄႈဝဵင်းမိူင်းတူၼ် ၸိုင်ႈတႆးပွတ်းဢွၵ်ႇၶူင်း လႅၼ်လိၼ်ၸိုင်ႈထႆး။",
|
69 |
+
1.0,
|
70 |
+
],
|
71 |
+
[
|
72 |
+
"nova",
|
73 |
+
"ပဵၼ်ၵၢၼ်ၾုၵ်ႇၾင်ၸႂ်ၵၼ်ၼႅၼ်ႈ ၼၵ်းပၵ်းၸႂ် ယွင်ႈၵုၼ်းယွင်ႈမုၼ်ဢူငဝ်း ၸိူဝ်းၽူႈလဵပ်ႈႁဵၼ်းႁူႉပိုၼ်း ၸဵမ်လဵၵ်ႉယႂ်ႇၼုမ်ႇထဝ်ႈ ၼႂ်းၸိူဝ်း ၽူႈႁၵ်ႉ ၸိူဝ်ႉၸၢတ်ႈလၢႆပၢၼ်လၢႆသႅၼ်းမႃး 66 ပီ ၼပ်ႉတင်ႈတႄႇ 1958 ဝၼ်းတီႈ 21 လိူၼ်မေႊ။",
|
74 |
+
1.0,
|
75 |
+
],
|
76 |
+
]
|