Baghdad99 commited on
Commit
22cb79a
1 Parent(s): 03b277d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -5
app.py CHANGED
@@ -3,6 +3,7 @@ import requests
3
  import soundfile as sf
4
  import numpy as np
5
  import tempfile
 
6
 
7
  # Define the Hugging Face Inference API URLs and headers
8
  ASR_API_URL = "https://api-inference.huggingface.co/models/Baghdad99/saad-speech-recognition-hausa-audio-to-text"
@@ -47,11 +48,13 @@ def translate_speech(audio):
47
  response = requests.post(TTS_API_URL, headers=headers, json={"inputs": translated_text})
48
  audio_bytes = response.content
49
 
50
- # Convert the audio bytes to numpy array
51
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
52
- f.write(audio_bytes)
53
- audio_file = f.name
54
- audio_data, _ = sf.read(audio_file)
 
 
55
 
56
  return audio_data
57
 
 
3
  import soundfile as sf
4
  import numpy as np
5
  import tempfile
6
+ from pydub import AudioSegment
7
 
8
  # Define the Hugging Face Inference API URLs and headers
9
  ASR_API_URL = "https://api-inference.huggingface.co/models/Baghdad99/saad-speech-recognition-hausa-audio-to-text"
 
48
  response = requests.post(TTS_API_URL, headers=headers, json={"inputs": translated_text})
49
  audio_bytes = response.content
50
 
51
+ # Convert the audio bytes to an audio segment
52
+ audio_segment = AudioSegment.from_file(io.BytesIO(audio_bytes), format="wav")
53
+
54
+ # Convert the audio segment to a numpy array
55
+ audio_data = np.array(audio_segment.get_array_of_samples())
56
+ if audio_segment.channels == 2:
57
+ audio_data = audio_data.reshape((-1, 2))
58
 
59
  return audio_data
60