Baghdad99 commited on
Commit
fb1d852
1 Parent(s): 47453aa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -10
app.py CHANGED
@@ -1,7 +1,8 @@
1
  import gradio as gr
2
- from transformers import pipeline, AutoTokenizer
3
  import numpy as np
4
  from pydub import AudioSegment
 
 
5
 
6
  # Load the pipeline for speech recognition and translation
7
  pipe = pipeline(
@@ -13,22 +14,32 @@ translator = pipeline("text2text-generation", model="Baghdad99/saad-hausa-text-t
13
  tts = pipeline("text-to-speech", model="Baghdad99/english_voice_tts")
14
 
15
  def translate_speech(audio_data_tuple):
16
- print(f"Type of audio: {type(audio_data_tuple)}, Value of audio: {audio_data_tuple}") # Debug line
17
-
18
  # Extract the audio data from the tuple
19
  sample_rate, audio_data = audio_data_tuple
20
 
21
- # Print the shape and type of the audio data
22
- print(f"Audio data type: {type(audio_data)}, Audio data shape: {audio_data.shape}")
 
 
 
 
 
 
 
 
23
 
24
- # Normalize the audio data to the range [-1, 1]
25
- audio_data_normalized = audio_data / np.iinfo(audio_data.dtype).max
 
26
 
27
- # Convert the normalized audio data to float64
28
- audio_data_float64 = audio_data_normalized.astype(np.float64)
 
 
29
 
 
30
  # Use the speech recognition pipeline to transcribe the audio
31
- output = pipe(audio_data_float64)
32
 
33
  print(f"Output: {output}") # Print the output to see what it contains
34
 
 
1
  import gradio as gr
 
2
  import numpy as np
3
  from pydub import AudioSegment
4
+ import io
5
+ from transformers import pipeline, AutoTokenizer
6
 
7
  # Load the pipeline for speech recognition and translation
8
  pipe = pipeline(
 
14
  tts = pipeline("text-to-speech", model="Baghdad99/english_voice_tts")
15
 
16
  def translate_speech(audio_data_tuple):
 
 
17
  # Extract the audio data from the tuple
18
  sample_rate, audio_data = audio_data_tuple
19
 
20
+ # Convert the audio data to int16 format
21
+ audio_data_int16 = audio_data.astype(np.int16)
22
+
23
+ # Create an AudioSegment from the audio data
24
+ audio_segment = AudioSegment(
25
+ audio_data_int16.tobytes(), # Audio data as bytes
26
+ frame_rate=sample_rate,
27
+ sample_width=audio_data_int16.dtype.itemsize, # Width in bytes
28
+ channels=1
29
+ )
30
 
31
+ # Export the AudioSegment as MP3
32
+ mp3_buffer = io.BytesIO()
33
+ audio_segment.export(mp3_buffer, format="mp3")
34
 
35
+ # Now you have an MP3 file in a BytesIO buffer. You can write it to a file,
36
+ # send it over a network, etc. Here's how you can write it to a file:
37
+ with open("audio.mp3", "wb") as f:
38
+ f.write(mp3_buffer.getvalue())
39
 
40
+ # Now you can feed the MP3 file to your model
41
  # Use the speech recognition pipeline to transcribe the audio
42
+ output = pipe("audio.mp3")
43
 
44
  print(f"Output: {output}") # Print the output to see what it contains
45