fargerm commited on
Commit
9bc5a61
1 Parent(s): 9cd0c5f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -44
app.py CHANGED
@@ -1,40 +1,49 @@
1
  import streamlit as st
2
- from transformers import pipeline, Wav2Vec2ForCTC, Wav2Vec2Processor
3
- import torch
4
- from io import BytesIO
5
- import soundfile as sf
 
 
6
 
7
- # Title
8
- st.title("Text/Audio Translator")
9
-
10
- # Text Input
11
- text_input = st.text_area("Enter text in English:")
12
 
13
- # Audio Input
14
- audio_file = st.file_uploader("Or upload an audio file:", type=["wav", "mp3"])
 
 
 
 
15
 
16
- # Initialize variables
17
- transcription = ""
18
- translated_text = ""
 
 
 
19
 
20
- # Speech-to-Text Conversion
21
- if audio_file is not None:
22
- st.write("Processing audio file...")
23
- # Load Wav2Vec2 model for speech-to-text
24
- processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h")
25
- model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h")
26
 
27
- # Read and process the audio input
28
- audio_input, _ = sf.read(BytesIO(audio_file.read()))
29
- input_values = processor(torch.tensor(audio_input), return_tensors="pt", padding="longest").input_values
30
 
31
- # Perform speech-to-text
32
- logits = model(input_values).logits
33
- predicted_ids = torch.argmax(logits, dim=-1)
34
- transcription = processor.decode(predicted_ids[0])
 
 
35
  st.write("Transcription:", transcription)
36
 
 
 
 
37
  # Language Translation
 
38
  if text_input or transcription:
39
  st.write("Translating text...")
40
  # Select the language to translate into
@@ -45,33 +54,30 @@ if text_input or transcription:
45
 
46
  # Choose the correct translation model based on the selected language
47
  if target_language == "French":
48
- translator = pipeline("translation_en_to_fr")
49
  elif target_language == "Chinese":
50
- translator = pipeline("translation_en_to_zh")
51
  elif target_language == "Italian":
52
- translator = pipeline("translation_en_to_it")
53
  elif target_language == "Urdu":
54
- translator = pipeline("translation_en_to_ur") # Make sure you have the correct model for this
55
  elif target_language == "Hindi":
56
- translator = pipeline("translation_en_to_hi") # Make sure you have the correct model for this
57
  elif target_language == "Punjabi":
58
- translator = pipeline("translation_en_to_pa") # Make sure you have the correct model for this
59
  elif target_language == "Saraiki":
60
- translator = pipeline("translation_en_to_skr") # Custom model for Saraiki, if available
61
  elif target_language == "Pashto":
62
- translator = pipeline("translation_en_to_ps") # Custom model for Pashto, if available
63
-
64
- # Translate the text
65
- translated_text = translator(text_input or transcription)
66
- st.write("Translated Text:", translated_text[0]['translation_text'])
 
67
 
68
  # Text-to-Speech
69
- if translated_text:
70
  st.write("Generating speech...")
71
  tts = pipeline("text-to-speech", model="microsoft/speecht5_tts")
72
  tts_audio = tts(translated_text[0]['translation_text'])[0]
73
  st.audio(tts_audio, format="audio/wav")
74
-
75
- # Instructions for deployment on Hugging Face Spaces (not part of the app code)
76
- st.write("Deploy this app on Hugging Face Spaces by pushing this code to your repository.")
77
-
 
1
  import streamlit as st
2
+ import whisper
3
+ import numpy as np
4
+ import sounddevice as sd
5
+ import tempfile
6
+ import os
7
+ import io
8
 
9
+ # Load Whisper model
10
+ model = whisper.load_model("base") # You can choose a larger model if needed
 
 
 
11
 
12
+ # Real-time audio recording
13
+ def record_audio(duration=5, samplerate=16000):
14
+ st.write("Recording...")
15
+ audio = sd.rec(int(duration * samplerate), samplerate=samplerate, channels=1, dtype='int16')
16
+ sd.wait()
17
+ return audio.flatten()
18
 
19
+ # Convert recorded audio to WAV format
20
+ def audio_to_wav(audio, samplerate=16000):
21
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.wav')
22
+ with open(temp_file.name, 'wb') as f:
23
+ f.write(audio)
24
+ return temp_file.name
25
 
26
+ # Function to transcribe audio using Whisper
27
+ def transcribe_audio(audio):
28
+ result = model.transcribe(audio)
29
+ return result['text']
 
 
30
 
31
+ # Streamlit interface
32
+ st.title("Text/Audio Translator")
 
33
 
34
+ # Recording button
35
+ if st.button("Record"):
36
+ audio = record_audio(duration=5) # Record for 5 seconds
37
+ st.write("Processing audio...")
38
+ wav_path = audio_to_wav(audio)
39
+ transcription = transcribe_audio(wav_path)
40
  st.write("Transcription:", transcription)
41
 
42
+ # Text Input
43
+ text_input = st.text_area("Or enter text in English:")
44
+
45
  # Language Translation
46
+ translator = None
47
  if text_input or transcription:
48
  st.write("Translating text...")
49
  # Select the language to translate into
 
54
 
55
  # Choose the correct translation model based on the selected language
56
  if target_language == "French":
57
+ translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-fr")
58
  elif target_language == "Chinese":
59
+ translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-zh")
60
  elif target_language == "Italian":
61
+ translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-it")
62
  elif target_language == "Urdu":
63
+ translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-ur")
64
  elif target_language == "Hindi":
65
+ translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-hi")
66
  elif target_language == "Punjabi":
67
+ translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-pa")
68
  elif target_language == "Saraiki":
69
+ st.write("Saraiki model not available.")
70
  elif target_language == "Pashto":
71
+ st.write("Pashto model not available.")
72
+
73
+ if translator:
74
+ text = text_input or transcription
75
+ translated_text = translator(text)
76
+ st.write("Translated Text:", translated_text[0]['translation_text'])
77
 
78
  # Text-to-Speech
79
+ if translator:
80
  st.write("Generating speech...")
81
  tts = pipeline("text-to-speech", model="microsoft/speecht5_tts")
82
  tts_audio = tts(translated_text[0]['translation_text'])[0]
83
  st.audio(tts_audio, format="audio/wav")