Spaces:

stevenhillis
/

intone_mvp

Sleeping

stevenhillis commited on Sep 20, 2023

Commit

9bcb0d8

•

1 Parent(s): 0270007

prompt audio as bytestring

Files changed (1) hide show

app.py CHANGED Viewed

@@ -14,9 +14,13 @@ token_str = os.environ['DG_TOKEN']
 def tts_fn(text, prompt_audio, prompt_seconds, inference_steps, inference_temperature, pitch_steps):
     texts = [text]
     sr = prompt_audio[0]
-    prompt_audio = np.reshape(prompt_audio[1], (1, 1, -1)).astype(np.float32, order='C') / 32768.0
     params={'synthesize': 'true', 'pitch_steps': int(pitch_steps), 'soundstorm_steps': inference_steps, 'temperature': inference_temperature, 'prompt_seconds': prompt_seconds}
-    files=[('texts', ('texts', json.dumps(texts), 'application/json')), ('prompt_audio', ('prompt_audio', json.dumps(prompt_audio.tolist()), 'application/json'))]
     response = requests.post(base_url, files=files, params=params, headers={'Authorization': f'Token {token_str}'}).json()
     try:
         sample_rate = int(response['results'][0]['sample_rate'])

 def tts_fn(text, prompt_audio, prompt_seconds, inference_steps, inference_temperature, pitch_steps):
     texts = [text]
     sr = prompt_audio[0]
+    prompt_audio = np.reshape(prompt_audio[1], (1, -1)).astype(np.float32, order='C') / 32768.0
+    byte_io = io.BytesIO(bytes())
+    wavfile.write(byte_io, sr, prompt_audio)
+    prompt_audio_bytes = byte_io.read()
+    prompt_audio = [base64.b64encode(prompt_audio_bytes).decode('utf-8')] * len(texts)
     params={'synthesize': 'true', 'pitch_steps': int(pitch_steps), 'soundstorm_steps': inference_steps, 'temperature': inference_temperature, 'prompt_seconds': prompt_seconds}
+    files=[('texts', ('texts', json.dumps(texts), 'application/json')), ('prompt_audio', ('prompt_audio', json.dumps(prompt_audio), 'application/json'))]
     response = requests.post(base_url, files=files, params=params, headers={'Authorization': f'Token {token_str}'}).json()
     try:
         sample_rate = int(response['results'][0]['sample_rate'])