Abbas0786 commited on
Commit
ab0b8b5
·
verified ·
1 Parent(s): f156c67

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -26
app.py CHANGED
@@ -1,46 +1,49 @@
1
  import streamlit as st
2
  from transformers import pipeline
 
 
3
  import numpy as np
4
- from pydub import AudioSegment
5
  import io
6
 
7
- # Load the ASR pipeline with Whisper model
8
- pipe = pipeline("automatic-speech-recognition", model="openai/whisper-large-v3")
9
 
10
- def audio_to_numpy(audio_file):
11
- # Load the audio file into an AudioSegment object
12
- audio = AudioSegment.from_file(io.BytesIO(audio_file.read()))
 
13
 
14
- # Convert audio to mono and set sample rate to 16000
15
- audio = audio.set_channels(1).set_frame_rate(16000)
16
 
17
- # Convert to numpy array
18
- samples = np.array(audio.get_array_of_samples())
19
-
20
- # Normalize the data
21
- samples = samples.astype(np.float32) / np.iinfo(audio.sample_width * 8).max
22
-
23
- return samples
24
 
25
- def transcribe_audio(audio_file):
26
- # Convert audio bytes to numpy array
27
- audio_numpy = audio_to_numpy(audio_file)
 
28
 
29
  # Transcribe audio
30
- transcription = pipe(audio_numpy)
 
31
  return transcription['text']
32
 
33
  # Streamlit UI
34
- st.title("Speech-to-Text Transcription App")
35
- st.write("Upload an audio file to transcribe its content into text.")
36
 
37
- uploaded_file = st.file_uploader("Choose an audio file...", type=["wav", "mp3", "flac"])
38
 
39
  if uploaded_file is not None:
40
  try:
41
- with st.spinner("Transcribing..."):
42
- text = transcribe_audio(uploaded_file)
43
- st.subheader("Transcription Result:")
44
- st.write(text)
 
 
 
 
 
45
  except Exception as e:
46
  st.error(f"An error occurred: {e}")
 
1
  import streamlit as st
2
  from transformers import pipeline
3
+ import librosa
4
+ import soundfile as sf
5
  import numpy as np
 
6
  import io
7
 
8
+ # Load the ASR pipeline with the specified model
9
+ pipe = pipeline("automatic-speech-recognition", model="kingabzpro/wav2vec2-large-xls-r-300m-Urdu")
10
 
11
+ def load_audio(audio_file):
12
+ """Load an audio file and convert to the correct format."""
13
+ audio_bytes = audio_file.read()
14
+ audio = io.BytesIO(audio_bytes)
15
 
16
+ # Use librosa to load the audio file
17
+ audio_np, sr = librosa.load(audio, sr=16000)
18
 
19
+ return audio_np, sr
 
 
 
 
 
 
20
 
21
+ def transcribe_audio(audio_np):
22
+ """Transcribe the given audio numpy array using the model pipeline."""
23
+ # Convert the audio numpy array to a format acceptable by the pipeline
24
+ audio = sf.write(io.BytesIO(), audio_np, 16000, format='wav')
25
 
26
  # Transcribe audio
27
+ transcription = pipe(audio)
28
+
29
  return transcription['text']
30
 
31
  # Streamlit UI
32
+ st.title("Urdu Speech-to-Text Transcription App")
33
+ st.write("Upload an audio file to transcribe its content into Urdu text.")
34
 
35
+ uploaded_file = st.file_uploader("Choose an audio file...", type=["wav", "mp3"])
36
 
37
  if uploaded_file is not None:
38
  try:
39
+ # Load and process the audio file
40
+ audio_np, sr = load_audio(uploaded_file)
41
+
42
+ # Transcribe the audio
43
+ text = transcribe_audio(audio_np)
44
+
45
+ # Display the transcription result
46
+ st.subheader("Transcription Result:")
47
+ st.write(text)
48
  except Exception as e:
49
  st.error(f"An error occurred: {e}")