Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,39 +1,77 @@
|
|
1 |
import streamlit as st
|
2 |
-
from transformers import pipeline
|
3 |
-
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
|
4 |
import torch
|
5 |
from io import BytesIO
|
6 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
audio_file = st.file_uploader("Or upload an audio file:", type=["wav", "mp3"])
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
if audio_file is not None:
|
|
|
|
|
9 |
processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h")
|
10 |
model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h")
|
11 |
-
|
|
|
|
|
12 |
input_values = processor(torch.tensor(audio_input), return_tensors="pt", padding="longest").input_values
|
|
|
|
|
13 |
logits = model(input_values).logits
|
14 |
predicted_ids = torch.argmax(logits, dim=-1)
|
15 |
transcription = processor.decode(predicted_ids[0])
|
16 |
st.write("Transcription:", transcription)
|
17 |
-
translator = pipeline("translation_en_to_fr") # Change to the desired language pair
|
18 |
-
translated_text = translator(st.session_state.text_input or transcription)
|
19 |
-
st.write("Translated Text:", translated_text[0]['translation_text'])
|
20 |
-
tts = pipeline("text-to-speech", model="microsoft/speecht5_tts")
|
21 |
-
tts_audio = tts(translated_text[0]['translation_text'])[0]
|
22 |
-
st.audio(tts_audio, format="audio/wav")
|
23 |
-
st.title("Text/Audio Translator")
|
24 |
-
|
25 |
-
st.text_area("Enter text in English:", key="text_input")
|
26 |
-
audio_file = st.file_uploader("Or upload an audio file:", type=["wav", "mp3"])
|
27 |
|
28 |
-
#
|
29 |
-
if
|
30 |
-
|
|
|
|
|
|
|
|
|
|
|
31 |
|
32 |
-
#
|
33 |
-
if
|
34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
# Text-to-Speech
|
37 |
if translated_text:
|
38 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
|
|
|
1 |
import streamlit as st
|
2 |
+
from transformers import pipeline, Wav2Vec2ForCTC, Wav2Vec2Processor
|
|
|
3 |
import torch
|
4 |
from io import BytesIO
|
5 |
+
import soundfile as sf
|
6 |
+
|
7 |
+
# Title
|
8 |
+
st.title("Text/Audio Translator")
|
9 |
+
|
10 |
+
# Text Input
|
11 |
+
text_input = st.text_area("Enter text in English:")
|
12 |
+
|
13 |
+
# Audio Input
|
14 |
audio_file = st.file_uploader("Or upload an audio file:", type=["wav", "mp3"])
|
15 |
+
|
16 |
+
# Initialize variables
|
17 |
+
transcription = ""
|
18 |
+
translated_text = ""
|
19 |
+
|
20 |
+
# Speech-to-Text Conversion
|
21 |
if audio_file is not None:
|
22 |
+
st.write("Processing audio file...")
|
23 |
+
# Load Wav2Vec2 model for speech-to-text
|
24 |
processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h")
|
25 |
model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h")
|
26 |
+
|
27 |
+
# Read and process the audio input
|
28 |
+
audio_input, _ = sf.read(BytesIO(audio_file.read()))
|
29 |
input_values = processor(torch.tensor(audio_input), return_tensors="pt", padding="longest").input_values
|
30 |
+
|
31 |
+
# Perform speech-to-text
|
32 |
logits = model(input_values).logits
|
33 |
predicted_ids = torch.argmax(logits, dim=-1)
|
34 |
transcription = processor.decode(predicted_ids[0])
|
35 |
st.write("Transcription:", transcription)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
|
37 |
+
# Language Translation
|
38 |
+
if text_input or transcription:
|
39 |
+
st.write("Translating text...")
|
40 |
+
# Select the language to translate into
|
41 |
+
target_language = st.selectbox(
|
42 |
+
"Select target language:",
|
43 |
+
["French", "Chinese", "Italian", "Urdu", "Hindi", "Punjabi", "Saraiki", "Pashto"]
|
44 |
+
)
|
45 |
|
46 |
+
# Choose the correct translation model based on the selected language
|
47 |
+
if target_language == "French":
|
48 |
+
translator = pipeline("translation_en_to_fr")
|
49 |
+
elif target_language == "Chinese":
|
50 |
+
translator = pipeline("translation_en_to_zh")
|
51 |
+
elif target_language == "Italian":
|
52 |
+
translator = pipeline("translation_en_to_it")
|
53 |
+
elif target_language == "Urdu":
|
54 |
+
translator = pipeline("translation_en_to_ur") # Make sure you have the correct model for this
|
55 |
+
elif target_language == "Hindi":
|
56 |
+
translator = pipeline("translation_en_to_hi") # Make sure you have the correct model for this
|
57 |
+
elif target_language == "Punjabi":
|
58 |
+
translator = pipeline("translation_en_to_pa") # Make sure you have the correct model for this
|
59 |
+
elif target_language == "Saraiki":
|
60 |
+
translator = pipeline("translation_en_to_skr") # Custom model for Saraiki, if available
|
61 |
+
elif target_language == "Pashto":
|
62 |
+
translator = pipeline("translation_en_to_ps") # Custom model for Pashto, if available
|
63 |
+
|
64 |
+
# Translate the text
|
65 |
+
translated_text = translator(text_input or transcription)
|
66 |
+
st.write("Translated Text:", translated_text[0]['translation_text'])
|
67 |
|
68 |
# Text-to-Speech
|
69 |
if translated_text:
|
70 |
+
st.write("Generating speech...")
|
71 |
+
tts = pipeline("text-to-speech", model="microsoft/speecht5_tts")
|
72 |
+
tts_audio = tts(translated_text[0]['translation_text'])[0]
|
73 |
+
st.audio(tts_audio, format="audio/wav")
|
74 |
+
|
75 |
+
# Instructions for deployment on Hugging Face Spaces (not part of the app code)
|
76 |
+
st.write("Deploy this app on Hugging Face Spaces by pushing this code to your repository.")
|
77 |
|