Spaces:

Baghdad99
/

ha-en

Sleeping

Baghdad99 commited on Dec 7, 2023

Commit

22cb79a

•

1 Parent(s): 03b277d

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import requests
 import soundfile as sf
 import numpy as np
 import tempfile
 # Define the Hugging Face Inference API URLs and headers
 ASR_API_URL = "https://api-inference.huggingface.co/models/Baghdad99/saad-speech-recognition-hausa-audio-to-text"
@@ -47,11 +48,13 @@ def translate_speech(audio):
     response = requests.post(TTS_API_URL, headers=headers, json={"inputs": translated_text})
     audio_bytes = response.content
-    # Convert the audio bytes to numpy array
-    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
-        f.write(audio_bytes)
-        audio_file = f.name
-    audio_data, _ = sf.read(audio_file)
     return audio_data

 import soundfile as sf
 import numpy as np
 import tempfile
+from pydub import AudioSegment
 # Define the Hugging Face Inference API URLs and headers
 ASR_API_URL = "https://api-inference.huggingface.co/models/Baghdad99/saad-speech-recognition-hausa-audio-to-text"
     response = requests.post(TTS_API_URL, headers=headers, json={"inputs": translated_text})
     audio_bytes = response.content
+    # Convert the audio bytes to an audio segment
+    audio_segment = AudioSegment.from_file(io.BytesIO(audio_bytes), format="wav")
+    # Convert the audio segment to a numpy array
+    audio_data = np.array(audio_segment.get_array_of_samples())
+    if audio_segment.channels == 2:
+        audio_data = audio_data.reshape((-1, 2))
     return audio_data