deepugaur commited on
Commit
52d87a4
1 Parent(s): 5b6f753

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -32
app.py CHANGED
@@ -1,40 +1,46 @@
1
- import streamlit as st
2
- import tensorflow as tf
3
- import numpy as np
4
  import librosa
5
- from datetime import datetime
6
 
7
- # Load models
8
- speech_to_text_model = tf.keras.models.load_model('speech_to_text_model.h5')
9
- translation_model = tf.keras.models.load_model('translation_model.h5')
 
10
 
11
- def preprocess_audio(file):
12
- audio, sr = librosa.load(file, sr=16000)
13
- mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13)
14
- return np.expand_dims(mfccs, axis=0)
15
 
16
- def translate_speech_to_text(audio_file):
17
- audio_features = preprocess_audio(audio_file)
18
- predicted_text = speech_to_text_model.predict(audio_features)
19
- translated_text = translation_model.predict([predicted_text])
20
- return translated_text
21
 
22
- def is_after_six_pm():
23
- current_time = datetime.now()
24
- return current_time.hour >= 18
25
 
26
- def main():
27
- st.title("Audio Translation App")
 
28
 
29
- uploaded_file = st.file_uploader("Choose an audio file", type="wav")
 
 
 
 
30
 
31
- if uploaded_file is not None:
32
- if is_after_six_pm():
33
- st.write("Processing...")
34
- translated_text = translate_speech_to_text(uploaded_file)
35
- st.write("Translated Text:", translated_text)
36
- else:
37
- st.write("Service available only after 6 PM IST.")
38
-
39
- if __name__ == "__main__":
40
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import librosa
2
+ import numpy as np
3
 
4
+ def preprocess_audio(file_path):
5
+ y, sr = librosa.load(file_path, sr=16000)
6
+ mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr)
7
+ return mel_spectrogram
8
 
9
+ from transformers import Wav2Vec2ForCTC, Wav2Vec2Tokenizer, MarianMTModel, MarianTokenizer
 
 
 
10
 
11
+ # Load pre-trained models
12
+ speech_to_text_model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-xlsr-53")
13
+ speech_to_text_tokenizer = Wav2Vec2Tokenizer.from_pretrained("facebook/wav2vec2-large-xlsr-53")
 
 
14
 
15
+ translation_model = MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-en-hi")
16
+ translation_tokenizer = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-hi")
 
17
 
18
+ def translate_audio(file_path):
19
+ # Preprocess the audio
20
+ mel_spectrogram = preprocess_audio(file_path)
21
 
22
+ # Convert to text
23
+ audio_input = speech_to_text_tokenizer(file_path, return_tensors="pt").input_values
24
+ logits = speech_to_text_model(audio_input).logits
25
+ predicted_ids = logits.argmax(dim=-1)
26
+ transcription = speech_to_text_tokenizer.batch_decode(predicted_ids)[0]
27
 
28
+ # Translate text
29
+ translation_input = translation_tokenizer(transcription, return_tensors="pt")
30
+ translated_output = translation_model.generate(**translation_input)
31
+ translation = translation_tokenizer.batch_decode(translated_output, skip_special_tokens=True)[0]
32
+
33
+ return translation
34
+
35
+ import datetime
36
+
37
+ def should_translate():
38
+ now = datetime.datetime.now()
39
+ return now.hour >= 18
40
+
41
+ def handle_translation(file_path):
42
+ if should_translate():
43
+ return translate_audio(file_path)
44
+ else:
45
+ return "Translation is only available after 6 PM IST."
46
+