stevenhillis commited on
Commit
9c1a82f
1 Parent(s): 9bcb0d8

Use simple api call, write binary response to tmp file and read back out

Browse files
Files changed (1) hide show
  1. app.py +9 -6
app.py CHANGED
@@ -14,17 +14,20 @@ token_str = os.environ['DG_TOKEN']
14
  def tts_fn(text, prompt_audio, prompt_seconds, inference_steps, inference_temperature, pitch_steps):
15
  texts = [text]
16
  sr = prompt_audio[0]
17
- prompt_audio = np.reshape(prompt_audio[1], (1, -1)).astype(np.float32, order='C') / 32768.0
18
  byte_io = io.BytesIO(bytes())
19
  wavfile.write(byte_io, sr, prompt_audio)
20
  prompt_audio_bytes = byte_io.read()
21
- prompt_audio = [base64.b64encode(prompt_audio_bytes).decode('utf-8')] * len(texts)
22
  params={'synthesize': 'true', 'pitch_steps': int(pitch_steps), 'soundstorm_steps': inference_steps, 'temperature': inference_temperature, 'prompt_seconds': prompt_seconds}
23
- files=[('texts', ('texts', json.dumps(texts), 'application/json')), ('prompt_audio', ('prompt_audio', json.dumps(prompt_audio), 'application/json'))]
24
- response = requests.post(base_url, files=files, params=params, headers={'Authorization': f'Token {token_str}'}).json()
25
  try:
26
- sample_rate = int(response['results'][0]['sample_rate'])
27
- audio = (np.array(response['results'][0]['audio']).transpose() / 1.414 * 32767).astype(np.int16)
 
 
 
 
 
28
  except Exception:
29
  print(response)
30
  return (sample_rate, audio)
 
14
  def tts_fn(text, prompt_audio, prompt_seconds, inference_steps, inference_temperature, pitch_steps):
15
  texts = [text]
16
  sr = prompt_audio[0]
17
+ prompt_audio = prompt_audio[1].astype(np.float32, order='C') / 32768.0
18
  byte_io = io.BytesIO(bytes())
19
  wavfile.write(byte_io, sr, prompt_audio)
20
  prompt_audio_bytes = byte_io.read()
 
21
  params={'synthesize': 'true', 'pitch_steps': int(pitch_steps), 'soundstorm_steps': inference_steps, 'temperature': inference_temperature, 'prompt_seconds': prompt_seconds}
22
+ response = requests.post(base_url, data=prompt_audio_bytes, params=params, headers={'Authorization': f'Token {token_str}'})
 
23
  try:
24
+ with open('result.wav', 'wb') as f:
25
+ for chunk in response.iter_content(chunk_size=1024):
26
+ if chunk: f.write(chunk)
27
+ sample_rate, audio = wavfile.read('result.wav')
28
+ print(audio.dtype)
29
+ audio = (audio / 1.414 * 32767).astype(np.int16)
30
+ print(audio.dtype)
31
  except Exception:
32
  print(response)
33
  return (sample_rate, audio)