Spaces:
Sleeping
Sleeping
stevenhillis
commited on
Commit
•
9c1a82f
1
Parent(s):
9bcb0d8
Use simple api call, write binary response to tmp file and read back out
Browse files
app.py
CHANGED
@@ -14,17 +14,20 @@ token_str = os.environ['DG_TOKEN']
|
|
14 |
def tts_fn(text, prompt_audio, prompt_seconds, inference_steps, inference_temperature, pitch_steps):
|
15 |
texts = [text]
|
16 |
sr = prompt_audio[0]
|
17 |
-
prompt_audio =
|
18 |
byte_io = io.BytesIO(bytes())
|
19 |
wavfile.write(byte_io, sr, prompt_audio)
|
20 |
prompt_audio_bytes = byte_io.read()
|
21 |
-
prompt_audio = [base64.b64encode(prompt_audio_bytes).decode('utf-8')] * len(texts)
|
22 |
params={'synthesize': 'true', 'pitch_steps': int(pitch_steps), 'soundstorm_steps': inference_steps, 'temperature': inference_temperature, 'prompt_seconds': prompt_seconds}
|
23 |
-
|
24 |
-
response = requests.post(base_url, files=files, params=params, headers={'Authorization': f'Token {token_str}'}).json()
|
25 |
try:
|
26 |
-
|
27 |
-
|
|
|
|
|
|
|
|
|
|
|
28 |
except Exception:
|
29 |
print(response)
|
30 |
return (sample_rate, audio)
|
|
|
14 |
def tts_fn(text, prompt_audio, prompt_seconds, inference_steps, inference_temperature, pitch_steps):
|
15 |
texts = [text]
|
16 |
sr = prompt_audio[0]
|
17 |
+
prompt_audio = prompt_audio[1].astype(np.float32, order='C') / 32768.0
|
18 |
byte_io = io.BytesIO(bytes())
|
19 |
wavfile.write(byte_io, sr, prompt_audio)
|
20 |
prompt_audio_bytes = byte_io.read()
|
|
|
21 |
params={'synthesize': 'true', 'pitch_steps': int(pitch_steps), 'soundstorm_steps': inference_steps, 'temperature': inference_temperature, 'prompt_seconds': prompt_seconds}
|
22 |
+
response = requests.post(base_url, data=prompt_audio_bytes, params=params, headers={'Authorization': f'Token {token_str}'})
|
|
|
23 |
try:
|
24 |
+
with open('result.wav', 'wb') as f:
|
25 |
+
for chunk in response.iter_content(chunk_size=1024):
|
26 |
+
if chunk: f.write(chunk)
|
27 |
+
sample_rate, audio = wavfile.read('result.wav')
|
28 |
+
print(audio.dtype)
|
29 |
+
audio = (audio / 1.414 * 32767).astype(np.int16)
|
30 |
+
print(audio.dtype)
|
31 |
except Exception:
|
32 |
print(response)
|
33 |
return (sample_rate, audio)
|