fargerm commited on
Commit
242815e
1 Parent(s): ae15bc4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -21
app.py CHANGED
@@ -1,39 +1,77 @@
1
  import streamlit as st
2
- from transformers import pipeline
3
- from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
4
  import torch
5
  from io import BytesIO
6
- st.text_area("Enter text in English:", key="text_input")
 
 
 
 
 
 
 
 
7
  audio_file = st.file_uploader("Or upload an audio file:", type=["wav", "mp3"])
 
 
 
 
 
 
8
  if audio_file is not None:
 
 
9
  processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h")
10
  model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h")
11
- audio_input = audio_file.read()
 
 
12
  input_values = processor(torch.tensor(audio_input), return_tensors="pt", padding="longest").input_values
 
 
13
  logits = model(input_values).logits
14
  predicted_ids = torch.argmax(logits, dim=-1)
15
  transcription = processor.decode(predicted_ids[0])
16
  st.write("Transcription:", transcription)
17
- translator = pipeline("translation_en_to_fr") # Change to the desired language pair
18
- translated_text = translator(st.session_state.text_input or transcription)
19
- st.write("Translated Text:", translated_text[0]['translation_text'])
20
- tts = pipeline("text-to-speech", model="microsoft/speecht5_tts")
21
- tts_audio = tts(translated_text[0]['translation_text'])[0]
22
- st.audio(tts_audio, format="audio/wav")
23
- st.title("Text/Audio Translator")
24
-
25
- st.text_area("Enter text in English:", key="text_input")
26
- audio_file = st.file_uploader("Or upload an audio file:", type=["wav", "mp3"])
27
 
28
- # Speech-to-Text
29
- if audio_file is not None:
30
- # Wav2Vec2 model processing here
 
 
 
 
 
31
 
32
- # Translation
33
- if st.session_state.text_input or transcription:
34
- # Translation code here
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
  # Text-to-Speech
37
  if translated_text:
38
- # TTS code here
 
 
 
 
 
 
39
 
 
1
  import streamlit as st
2
+ from transformers import pipeline, Wav2Vec2ForCTC, Wav2Vec2Processor
 
3
  import torch
4
  from io import BytesIO
5
+ import soundfile as sf
6
+
7
+ # Title
8
+ st.title("Text/Audio Translator")
9
+
10
+ # Text Input
11
+ text_input = st.text_area("Enter text in English:")
12
+
13
+ # Audio Input
14
  audio_file = st.file_uploader("Or upload an audio file:", type=["wav", "mp3"])
15
+
16
+ # Initialize variables
17
+ transcription = ""
18
+ translated_text = ""
19
+
20
+ # Speech-to-Text Conversion
21
  if audio_file is not None:
22
+ st.write("Processing audio file...")
23
+ # Load Wav2Vec2 model for speech-to-text
24
  processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h")
25
  model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h")
26
+
27
+ # Read and process the audio input
28
+ audio_input, _ = sf.read(BytesIO(audio_file.read()))
29
  input_values = processor(torch.tensor(audio_input), return_tensors="pt", padding="longest").input_values
30
+
31
+ # Perform speech-to-text
32
  logits = model(input_values).logits
33
  predicted_ids = torch.argmax(logits, dim=-1)
34
  transcription = processor.decode(predicted_ids[0])
35
  st.write("Transcription:", transcription)
 
 
 
 
 
 
 
 
 
 
36
 
37
+ # Language Translation
38
+ if text_input or transcription:
39
+ st.write("Translating text...")
40
+ # Select the language to translate into
41
+ target_language = st.selectbox(
42
+ "Select target language:",
43
+ ["French", "Chinese", "Italian", "Urdu", "Hindi", "Punjabi", "Saraiki", "Pashto"]
44
+ )
45
 
46
+ # Choose the correct translation model based on the selected language
47
+ if target_language == "French":
48
+ translator = pipeline("translation_en_to_fr")
49
+ elif target_language == "Chinese":
50
+ translator = pipeline("translation_en_to_zh")
51
+ elif target_language == "Italian":
52
+ translator = pipeline("translation_en_to_it")
53
+ elif target_language == "Urdu":
54
+ translator = pipeline("translation_en_to_ur") # Make sure you have the correct model for this
55
+ elif target_language == "Hindi":
56
+ translator = pipeline("translation_en_to_hi") # Make sure you have the correct model for this
57
+ elif target_language == "Punjabi":
58
+ translator = pipeline("translation_en_to_pa") # Make sure you have the correct model for this
59
+ elif target_language == "Saraiki":
60
+ translator = pipeline("translation_en_to_skr") # Custom model for Saraiki, if available
61
+ elif target_language == "Pashto":
62
+ translator = pipeline("translation_en_to_ps") # Custom model for Pashto, if available
63
+
64
+ # Translate the text
65
+ translated_text = translator(text_input or transcription)
66
+ st.write("Translated Text:", translated_text[0]['translation_text'])
67
 
68
  # Text-to-Speech
69
  if translated_text:
70
+ st.write("Generating speech...")
71
+ tts = pipeline("text-to-speech", model="microsoft/speecht5_tts")
72
+ tts_audio = tts(translated_text[0]['translation_text'])[0]
73
+ st.audio(tts_audio, format="audio/wav")
74
+
75
+ # Instructions for deployment on Hugging Face Spaces (not part of the app code)
76
+ st.write("Deploy this app on Hugging Face Spaces by pushing this code to your repository.")
77