Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
fastpitch diff return response
Browse files- app.py +47 -31
- gr_client.py +22 -9
app.py
CHANGED
@@ -26,7 +26,7 @@ current_voice_type = None
|
|
26 |
base_speaker_emb = ''
|
27 |
|
28 |
def load_model(voice_model_name):
|
29 |
-
global current_voice_model, current_voice_type
|
30 |
|
31 |
if voice_model_name == 'x_selpahi':
|
32 |
# Lojban
|
@@ -47,8 +47,6 @@ def load_model(voice_model_name):
|
|
47 |
'pluginsContext': '{}',
|
48 |
}
|
49 |
|
50 |
-
embs = base_speaker_emb
|
51 |
-
|
52 |
print('Loading voice model...')
|
53 |
try:
|
54 |
json_data = xvaserver.loadModel(data)
|
@@ -59,13 +57,13 @@ def load_model(voice_model_name):
|
|
59 |
voice_model_json = json.load(f)
|
60 |
|
61 |
if model_type == 'xVAPitch':
|
62 |
-
|
63 |
elif model_type == 'FastPitch1.1':
|
64 |
-
|
65 |
except requests.exceptions.RequestException as err:
|
66 |
print(f'FAILED to load voice model: {err}')
|
67 |
|
68 |
-
return
|
69 |
|
70 |
|
71 |
class LocalBlocksDemo(BlocksDemo):
|
@@ -83,12 +81,14 @@ class LocalBlocksDemo(BlocksDemo):
|
|
83 |
surprise,
|
84 |
use_deepmoji
|
85 |
):
|
|
|
|
|
86 |
# grab only the first 1000 characters
|
87 |
input_text = input_text[:1000]
|
88 |
|
89 |
# load voice model if not the current model
|
90 |
if (current_voice_model != voice):
|
91 |
-
|
92 |
|
93 |
model_type = current_voice_type
|
94 |
pace = pacing if pacing else 1.0
|
@@ -144,34 +144,50 @@ class LocalBlocksDemo(BlocksDemo):
|
|
144 |
# with open('resources/app/server.log', 'r') as f:
|
145 |
# print(f.read())
|
146 |
|
147 |
-
arpabet_html = '
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
167 |
|
168 |
return [
|
169 |
save_path,
|
170 |
arpabet_html,
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
json_data
|
176 |
]
|
177 |
|
|
|
26 |
base_speaker_emb = ''
|
27 |
|
28 |
def load_model(voice_model_name):
|
29 |
+
global current_voice_model, current_voice_type, base_speaker_emb
|
30 |
|
31 |
if voice_model_name == 'x_selpahi':
|
32 |
# Lojban
|
|
|
47 |
'pluginsContext': '{}',
|
48 |
}
|
49 |
|
|
|
|
|
50 |
print('Loading voice model...')
|
51 |
try:
|
52 |
json_data = xvaserver.loadModel(data)
|
|
|
57 |
voice_model_json = json.load(f)
|
58 |
|
59 |
if model_type == 'xVAPitch':
|
60 |
+
base_speaker_emb = voice_model_json['games'][0]['base_speaker_emb']
|
61 |
elif model_type == 'FastPitch1.1':
|
62 |
+
base_speaker_emb = voice_model_json['games'][0]['resemblyzer']
|
63 |
except requests.exceptions.RequestException as err:
|
64 |
print(f'FAILED to load voice model: {err}')
|
65 |
|
66 |
+
return base_speaker_emb
|
67 |
|
68 |
|
69 |
class LocalBlocksDemo(BlocksDemo):
|
|
|
81 |
surprise,
|
82 |
use_deepmoji
|
83 |
):
|
84 |
+
global current_voice_model, current_voice_type, base_speaker_emb
|
85 |
+
|
86 |
# grab only the first 1000 characters
|
87 |
input_text = input_text[:1000]
|
88 |
|
89 |
# load voice model if not the current model
|
90 |
if (current_voice_model != voice):
|
91 |
+
load_model(voice)
|
92 |
|
93 |
model_type = current_voice_type
|
94 |
pace = pacing if pacing else 1.0
|
|
|
144 |
# with open('resources/app/server.log', 'r') as f:
|
145 |
# print(f.read())
|
146 |
|
147 |
+
arpabet_html = ''
|
148 |
+
if voice == 'x_selpahi':
|
149 |
+
angry = 0
|
150 |
+
happy = 0
|
151 |
+
sad = 0
|
152 |
+
surprise = 0
|
153 |
+
else:
|
154 |
+
arpabet_html = '<h6>ARPAbet & Durations</h6>'
|
155 |
+
arpabet_html += '<table style="margin: 0 var(--size-2)"><tbody><tr>'
|
156 |
+
arpabet_nopad = json_data['arpabet'].split('|PAD|')
|
157 |
+
arpabet_symbols = json_data['arpabet'].split('|')
|
158 |
+
wpad_len = len(arpabet_symbols)
|
159 |
+
nopad_len = len(arpabet_nopad)
|
160 |
+
total_dur_length = 0
|
161 |
+
for symb_i in range(wpad_len):
|
162 |
+
if (arpabet_symbols[symb_i] == '<PAD>'):
|
163 |
+
continue
|
164 |
+
total_dur_length += float(json_data['durations'][symb_i])
|
165 |
+
|
166 |
+
for symb_i in range(wpad_len):
|
167 |
+
if (arpabet_symbols[symb_i] == '<PAD>'):
|
168 |
+
continue
|
169 |
+
|
170 |
+
arpabet_length = float(json_data['durations'][symb_i])
|
171 |
+
cell_width = round(arpabet_length / total_dur_length * 100, 2)
|
172 |
+
arpabet_html += '<td class="arpabet" style="width: '\
|
173 |
+
+ str(cell_width)\
|
174 |
+
+'%">'\
|
175 |
+
+ arpabet_symbols[symb_i]\
|
176 |
+
+ '</td> '
|
177 |
+
arpabet_html += '<tr></tbody></table>'
|
178 |
+
|
179 |
+
angry = round(json_data['em_angry'][0], 2),
|
180 |
+
happy = round(json_data['em_happy'][0], 2),
|
181 |
+
sad = round(json_data['em_sad'][0], 2),
|
182 |
+
surprise = round(json_data['em_surprise'][0], 2),
|
183 |
|
184 |
return [
|
185 |
save_path,
|
186 |
arpabet_html,
|
187 |
+
angry,
|
188 |
+
happy,
|
189 |
+
sad,
|
190 |
+
surprise,
|
191 |
json_data
|
192 |
]
|
193 |
|
gr_client.py
CHANGED
@@ -502,6 +502,7 @@ class BlocksDemo:
|
|
502 |
inputs=voice_radio,
|
503 |
outputs=output_wav,
|
504 |
queue=True,
|
|
|
505 |
)
|
506 |
|
507 |
# Switched to Lojban voice
|
@@ -546,9 +547,16 @@ class BlocksDemo:
|
|
546 |
|
547 |
json_data = json.loads(response.replace("'", '"'))
|
548 |
|
549 |
-
arpabet_html = '
|
550 |
-
|
551 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
552 |
arpabet_symbols = json_data['arpabet'].split('|')
|
553 |
wpad_len = len(arpabet_symbols)
|
554 |
nopad_len = len(arpabet_nopad)
|
@@ -568,16 +576,21 @@ class BlocksDemo:
|
|
568 |
+ str(cell_width)\
|
569 |
+'%">'\
|
570 |
+ arpabet_symbols[symb_i]\
|
571 |
-
|
572 |
-
|
|
|
|
|
|
|
|
|
|
|
573 |
|
574 |
return [
|
575 |
wav_path,
|
576 |
arpabet_html,
|
577 |
-
|
578 |
-
|
579 |
-
|
580 |
-
|
581 |
response
|
582 |
]
|
583 |
|
|
|
502 |
inputs=voice_radio,
|
503 |
outputs=output_wav,
|
504 |
queue=True,
|
505 |
+
trigger_mode='once',
|
506 |
)
|
507 |
|
508 |
# Switched to Lojban voice
|
|
|
547 |
|
548 |
json_data = json.loads(response.replace("'", '"'))
|
549 |
|
550 |
+
arpabet_html = ''
|
551 |
+
if voice == 'x_selpahi':
|
552 |
+
angry = 0
|
553 |
+
happy = 0
|
554 |
+
sad = 0
|
555 |
+
surprise = 0
|
556 |
+
else:
|
557 |
+
arpabet_html = '<h6>ARPAbet & Durations</h6>'
|
558 |
+
arpabet_html += '<table style="margin: 0 var(--size-2)"><tbody><tr>'
|
559 |
+
arpabet_nopad = json_data['arpabet'].split('|PAD|')
|
560 |
arpabet_symbols = json_data['arpabet'].split('|')
|
561 |
wpad_len = len(arpabet_symbols)
|
562 |
nopad_len = len(arpabet_nopad)
|
|
|
576 |
+ str(cell_width)\
|
577 |
+'%">'\
|
578 |
+ arpabet_symbols[symb_i]\
|
579 |
+
+ '</td> '
|
580 |
+
arpabet_html += '<tr></tbody></table>'
|
581 |
+
|
582 |
+
angry = round(json_data['em_angry'][0], 2),
|
583 |
+
happy = round(json_data['em_happy'][0], 2),
|
584 |
+
sad = round(json_data['em_sad'][0], 2),
|
585 |
+
surprise = round(json_data['em_surprise'][0], 2),
|
586 |
|
587 |
return [
|
588 |
wav_path,
|
589 |
arpabet_html,
|
590 |
+
angry,
|
591 |
+
happy,
|
592 |
+
sad,
|
593 |
+
surprise,
|
594 |
response
|
595 |
]
|
596 |
|