NorHsangPha commited on
Commit
d1d6cc0
1 Parent(s): 4067b9d

Initial: initial commit

Browse files
Files changed (3) hide show
  1. app.py +24 -0
  2. requirements.txt +5 -0
  3. tts.py +76 -0
app.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from tts import synthesize, TTS_EXAMPLES
3
+
4
+ mms_synthesize = gr.Interface(
5
+ fn=synthesize,
6
+ inputs=[
7
+ gr.Dropdown(["original", "nova", "homhom"], label="Model", value="nova"),
8
+ gr.Textbox(label="Input text"),
9
+ gr.Slider(minimum=0.1, maximum=2.0, value=1.0, step=0.1, label="Speed"),
10
+ ],
11
+ outputs=[
12
+ gr.Audio(label="Generated Audio", type="numpy"),
13
+ gr.Textbox(label="Filtered text after processing"),
14
+ ],
15
+ examples=TTS_EXAMPLES,
16
+ title="Text-to-Speech Demo",
17
+ description="Generate audio in your desired language from input text.",
18
+ allow_flagging="never",
19
+ )
20
+
21
+ with gr.Blocks() as demo:
22
+ mms_synthesize.render()
23
+
24
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio
2
+ transformers
3
+ torch
4
+ torchaudio
5
+ git+https://github.com/NoerNova/ShanNLP
tts.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from transformers import VitsModel, VitsTokenizer
3
+ import torch
4
+ from shannlp import util, word_tokenize
5
+
6
+
7
+ def preprocess_string(input_string: str):
8
+ input_string = input_string.replace("(", "").replace(")", "")
9
+ string_token = word_tokenize(input_string)
10
+ num_to_shanword = util.num_to_shanword
11
+
12
+ result = []
13
+ for token in string_token:
14
+ if token.strip().isdigit():
15
+ result.append(num_to_shanword(int(token)))
16
+ else:
17
+ result.append(token)
18
+
19
+ full_token = "".join(result)
20
+ return full_token
21
+
22
+
23
+ def synthesize(model: str, input_string: str, speed: float = 1.0):
24
+ auth_token = os.environ.get("TOKEN_READ_SECRET") or True
25
+
26
+ model_id = {
27
+ "original": "facebook/mms-tts-shn",
28
+ "nova": "NorHsangPha/mms-tts-nova-train",
29
+ "homhom": "NorHsangPha/mms-tts-shn-train",
30
+ }[model]
31
+
32
+ model = VitsModel.from_pretrained(model_id, token=auth_token)
33
+ tokenizer = VitsTokenizer.from_pretrained(model_id, token=auth_token)
34
+
35
+ device = ""
36
+ if torch.cuda.is_available():
37
+ device = torch.device("cuda")
38
+ elif torch.backends.mps.is_available():
39
+ device = torch.device("mps")
40
+ else:
41
+ device = torch.device("cpu")
42
+
43
+ model.to(device)
44
+
45
+ processed_string = preprocess_string(input_string)
46
+ inputs = tokenizer(processed_string, return_tensors="pt").to(device)
47
+
48
+ torch.manual_seed(42)
49
+
50
+ model.speaking_rate = speed
51
+ model.noise_scale = 0.2
52
+ model.noise_scale_w = 0.2
53
+ model.length_scale = 1.0 / speed
54
+
55
+ with torch.no_grad():
56
+ output = model(**inputs).waveform
57
+
58
+ output = output.squeeze().cpu().numpy()
59
+
60
+ return ((16_000, output), processed_string)
61
+
62
+
63
+ TTS_EXAMPLES = [
64
+ ["nova", "မႂ်ႇသုင်ၶႃႈ ယူႇလီၵိၼ်ဝၢၼ်ၵတ်းယဵၼ် လီယူႇၶႃႈၼေႃႈ။", 1.0],
65
+ ["original", "ပဵၼ်ယၢမ်းဢၼ် ၸႂ်တိုၼ်ႇတဵၼ်ႈ ၽူင်ႉပိဝ် တႃႇၼုမ်ႇယိင်းၼုမ်ႇၸၢႆးၶဝ် ၸိူဝ်းဢၼ် တေလႆႈၶိုၼ်ႈႁဵၼ်းၼၼ်ႉယူႇ", 1.0],
66
+ [
67
+ "homhom",
68
+ "မိူဝ်ႈပီ 1958 လိူၼ်မေႊ 21 ဝၼ်းၼၼ်ႉ ၸဝ်ႈၼွႆႉသေႃးယၼ်ႇတ ဢမ်ႇၼၼ် ၸဝ်ႈၼွႆႉ ဢွၼ်ႁူဝ် ၽူႈႁၵ်ႉၸိူဝ်ႉၸၢတ်ႈ 31 ၵေႃႉသေ တိူင်ႇၵၢဝ်ႇယၼ်ႇၸႂ် ၵိၼ်ၼမ်ႉသတ်ႉၸႃႇ တႃႇၵေႃႇတင်ႈပူၵ်းပွင် ၵၢၼ်လုၵ်ႉၽိုၼ်ႉ တီႈႁူၺ်ႈပူႉ ႁိမ်းသူပ်းၼမ်ႉၵျွတ်ႈ ၼႂ်းဢိူင်ႇမိူင်းႁၢင် ၸႄႈဝဵင်းမိူင်းတူၼ် ၸိုင်ႈတႆးပွတ်းဢွၵ်ႇၶူင်း လႅၼ်လိၼ်ၸိုင်ႈထႆး။",
69
+ 1.0,
70
+ ],
71
+ [
72
+ "nova",
73
+ "ပဵၼ်ၵၢၼ်ၾုၵ်ႇၾင်ၸႂ်ၵၼ်ၼႅၼ်ႈ ၼၵ်းပၵ်းၸႂ် ယွင်ႈၵုၼ်းယွင်ႈမုၼ်ဢူငဝ်း ၸိူဝ်းၽူႈလဵပ်ႈႁဵၼ်းႁူႉပိုၼ်း ၸဵမ်လဵၵ်ႉယႂ်ႇၼုမ်ႇထဝ်ႈ ၼႂ်းၸိူဝ်း ၽူႈႁၵ်ႉ ၸိူဝ်ႉၸၢတ်ႈလၢႆပၢၼ်လၢႆသႅၼ်းမႃး 66 ပီ ၼပ်ႉတင်ႈတႄႇ 1958 ဝၼ်းတီႈ 21 လိူၼ်မေႊ။",
74
+ 1.0,
75
+ ],
76
+ ]