Helw150 commited on
Commit
67da1a1
1 Parent(s): 3583d5c

Add Buffering to Avoid Speech Gaps due to Orca Slowdown

Browse files
Files changed (2) hide show
  1. app.py +9 -5
  2. requirements.txt +1 -1
app.py CHANGED
@@ -69,7 +69,7 @@ def response(state: AppState, audio: tuple):
69
  if not audio:
70
  return AppState()
71
 
72
- file_name = f"/tmp/{xxhash.xxh32(bytes(audio[1])).hexdigest()}.wav"
73
 
74
  sf.write(file_name, audio[1], audio[0], format="wav")
75
 
@@ -103,7 +103,8 @@ def response(state: AppState, audio: tuple):
103
  state.model_outs = None
104
  prev_outs = causal_outs
105
  stream = orca.stream_open()
106
-
 
107
  for resp, outs in diva_audio(
108
  (audio[0], audio[1]),
109
  prev_outs=(prev_outs if prev_outs is not None else None),
@@ -112,15 +113,18 @@ def response(state: AppState, audio: tuple):
112
  if prev_resp == LOADER_STR:
113
  prev_resp = ""
114
  state.conversation[-1]["content"] = resp
115
- pcm = stream.synthesize(resp[len(prev_resp) :])
116
  audio_chunk = None
 
117
  if pcm is not None:
 
 
118
  mp3_io = io.BytesIO()
119
  sf.write(
120
- mp3_io, np.asarray(pcm).astype(np.int16), orca.sample_rate, format="mp3"
121
  )
122
  audio_chunk = mp3_io.getvalue()
123
  mp3_io.close()
 
124
  yield state, state.conversation, audio_chunk
125
 
126
  del outs.logits
@@ -256,4 +260,4 @@ with gr.Blocks(theme=theme, js=js) as demo:
256
  )
257
 
258
  if __name__ == "__main__":
259
- demo.launch()
 
69
  if not audio:
70
  return AppState()
71
 
72
+ file_name = f"./{xxhash.xxh32(bytes(audio[1])).hexdigest()}.wav"
73
 
74
  sf.write(file_name, audio[1], audio[0], format="wav")
75
 
 
103
  state.model_outs = None
104
  prev_outs = causal_outs
105
  stream = orca.stream_open()
106
+ i = 0
107
+ buff = []
108
  for resp, outs in diva_audio(
109
  (audio[0], audio[1]),
110
  prev_outs=(prev_outs if prev_outs is not None else None),
 
113
  if prev_resp == LOADER_STR:
114
  prev_resp = ""
115
  state.conversation[-1]["content"] = resp
 
116
  audio_chunk = None
117
+ pcm = stream.synthesize(resp[len(prev_resp) :])
118
  if pcm is not None:
119
+ buff.extend(pcm)
120
+ if len(buff) > (orca.sample_rate*2):
121
  mp3_io = io.BytesIO()
122
  sf.write(
123
+ mp3_io, np.asarray(buff[:orca.sample_rate]).astype(np.int16), orca.sample_rate, format="mp3"
124
  )
125
  audio_chunk = mp3_io.getvalue()
126
  mp3_io.close()
127
+ buff = buff[orca.sample_rate:]
128
  yield state, state.conversation, audio_chunk
129
 
130
  del outs.logits
 
260
  )
261
 
262
  if __name__ == "__main__":
263
+ demo.launch(share=True)
requirements.txt CHANGED
@@ -1,5 +1,5 @@
1
  transformers==4.43.3
2
- gradio==5.0.1
3
  spaces
4
  accelerate
5
 
 
1
  transformers==4.43.3
2
+ gradio==5.1.0
3
  spaces
4
  accelerate
5