Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,40 +1,49 @@
|
|
1 |
import streamlit as st
|
2 |
-
|
3 |
-
import
|
4 |
-
|
5 |
-
import
|
|
|
|
|
6 |
|
7 |
-
#
|
8 |
-
|
9 |
-
|
10 |
-
# Text Input
|
11 |
-
text_input = st.text_area("Enter text in English:")
|
12 |
|
13 |
-
#
|
14 |
-
|
|
|
|
|
|
|
|
|
15 |
|
16 |
-
#
|
17 |
-
|
18 |
-
|
|
|
|
|
|
|
19 |
|
20 |
-
#
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h")
|
25 |
-
model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h")
|
26 |
|
27 |
-
|
28 |
-
|
29 |
-
input_values = processor(torch.tensor(audio_input), return_tensors="pt", padding="longest").input_values
|
30 |
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
|
|
|
|
35 |
st.write("Transcription:", transcription)
|
36 |
|
|
|
|
|
|
|
37 |
# Language Translation
|
|
|
38 |
if text_input or transcription:
|
39 |
st.write("Translating text...")
|
40 |
# Select the language to translate into
|
@@ -45,33 +54,30 @@ if text_input or transcription:
|
|
45 |
|
46 |
# Choose the correct translation model based on the selected language
|
47 |
if target_language == "French":
|
48 |
-
translator = pipeline("
|
49 |
elif target_language == "Chinese":
|
50 |
-
translator = pipeline("
|
51 |
elif target_language == "Italian":
|
52 |
-
translator = pipeline("
|
53 |
elif target_language == "Urdu":
|
54 |
-
translator = pipeline("
|
55 |
elif target_language == "Hindi":
|
56 |
-
translator = pipeline("
|
57 |
elif target_language == "Punjabi":
|
58 |
-
translator = pipeline("
|
59 |
elif target_language == "Saraiki":
|
60 |
-
|
61 |
elif target_language == "Pashto":
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
|
|
67 |
|
68 |
# Text-to-Speech
|
69 |
-
if
|
70 |
st.write("Generating speech...")
|
71 |
tts = pipeline("text-to-speech", model="microsoft/speecht5_tts")
|
72 |
tts_audio = tts(translated_text[0]['translation_text'])[0]
|
73 |
st.audio(tts_audio, format="audio/wav")
|
74 |
-
|
75 |
-
# Instructions for deployment on Hugging Face Spaces (not part of the app code)
|
76 |
-
st.write("Deploy this app on Hugging Face Spaces by pushing this code to your repository.")
|
77 |
-
|
|
|
1 |
import streamlit as st
|
2 |
+
import whisper
|
3 |
+
import numpy as np
|
4 |
+
import sounddevice as sd
|
5 |
+
import tempfile
|
6 |
+
import os
|
7 |
+
import io
|
8 |
|
9 |
+
# Load Whisper model
|
10 |
+
model = whisper.load_model("base") # You can choose a larger model if needed
|
|
|
|
|
|
|
11 |
|
12 |
+
# Real-time audio recording
|
13 |
+
def record_audio(duration=5, samplerate=16000):
|
14 |
+
st.write("Recording...")
|
15 |
+
audio = sd.rec(int(duration * samplerate), samplerate=samplerate, channels=1, dtype='int16')
|
16 |
+
sd.wait()
|
17 |
+
return audio.flatten()
|
18 |
|
19 |
+
# Convert recorded audio to WAV format
|
20 |
+
def audio_to_wav(audio, samplerate=16000):
|
21 |
+
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.wav')
|
22 |
+
with open(temp_file.name, 'wb') as f:
|
23 |
+
f.write(audio)
|
24 |
+
return temp_file.name
|
25 |
|
26 |
+
# Function to transcribe audio using Whisper
|
27 |
+
def transcribe_audio(audio):
|
28 |
+
result = model.transcribe(audio)
|
29 |
+
return result['text']
|
|
|
|
|
30 |
|
31 |
+
# Streamlit interface
|
32 |
+
st.title("Text/Audio Translator")
|
|
|
33 |
|
34 |
+
# Recording button
|
35 |
+
if st.button("Record"):
|
36 |
+
audio = record_audio(duration=5) # Record for 5 seconds
|
37 |
+
st.write("Processing audio...")
|
38 |
+
wav_path = audio_to_wav(audio)
|
39 |
+
transcription = transcribe_audio(wav_path)
|
40 |
st.write("Transcription:", transcription)
|
41 |
|
42 |
+
# Text Input
|
43 |
+
text_input = st.text_area("Or enter text in English:")
|
44 |
+
|
45 |
# Language Translation
|
46 |
+
translator = None
|
47 |
if text_input or transcription:
|
48 |
st.write("Translating text...")
|
49 |
# Select the language to translate into
|
|
|
54 |
|
55 |
# Choose the correct translation model based on the selected language
|
56 |
if target_language == "French":
|
57 |
+
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-fr")
|
58 |
elif target_language == "Chinese":
|
59 |
+
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-zh")
|
60 |
elif target_language == "Italian":
|
61 |
+
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-it")
|
62 |
elif target_language == "Urdu":
|
63 |
+
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-ur")
|
64 |
elif target_language == "Hindi":
|
65 |
+
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-hi")
|
66 |
elif target_language == "Punjabi":
|
67 |
+
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-pa")
|
68 |
elif target_language == "Saraiki":
|
69 |
+
st.write("Saraiki model not available.")
|
70 |
elif target_language == "Pashto":
|
71 |
+
st.write("Pashto model not available.")
|
72 |
+
|
73 |
+
if translator:
|
74 |
+
text = text_input or transcription
|
75 |
+
translated_text = translator(text)
|
76 |
+
st.write("Translated Text:", translated_text[0]['translation_text'])
|
77 |
|
78 |
# Text-to-Speech
|
79 |
+
if translator:
|
80 |
st.write("Generating speech...")
|
81 |
tts = pipeline("text-to-speech", model="microsoft/speecht5_tts")
|
82 |
tts_audio = tts(translated_text[0]['translation_text'])[0]
|
83 |
st.audio(tts_audio, format="audio/wav")
|
|
|
|
|
|
|
|