Spaces:
Runtime error
Runtime error
Upload 2 files
Browse files- app.py +81 -0
- requirements.txt +4 -0
app.py
ADDED
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from PIL import Image
|
3 |
+
import torch
|
4 |
+
import whisper
|
5 |
+
import joblib
|
6 |
+
import os
|
7 |
+
import json
|
8 |
+
import pydub
|
9 |
+
#
|
10 |
+
st.set_page_config(page_title="Detect the language of the audio file App", layout="centered")
|
11 |
+
st.image("image.jpg", caption='Detect the language of the audio file')
|
12 |
+
# page header
|
13 |
+
#st.title(f"English Text to Hindi Translation App")
|
14 |
+
with st.form("Prediction_form"):
|
15 |
+
uploadFile = st.file_uploader(label = "Please upload your file ",type=["mp3"])
|
16 |
+
if uploadFile is not None:
|
17 |
+
audio = pydub.AudioSegment.from_mp3(uploadFile)
|
18 |
+
file_type = 'mp3'
|
19 |
+
#filename = uploadFile.name+".mp3"
|
20 |
+
audio.export(uploadFile.name, format="mp3")
|
21 |
+
audio1 = open(uploadFile.name, "rb")
|
22 |
+
audio_bytes = audio1.read() #reading the file
|
23 |
+
st.audio(audio_bytes, format='audio/ogg') #displaying the audio
|
24 |
+
#
|
25 |
+
submit = st.form_submit_button("Detect Language")
|
26 |
+
#
|
27 |
+
if submit:
|
28 |
+
model = whisper.load_model("large").to('cpu')
|
29 |
+
# load audio and pad/trim it to fit 30 seconds
|
30 |
+
audio = whisper.load_audio(uploadFile.name)
|
31 |
+
audio = whisper.pad_or_trim(audio)
|
32 |
+
# make log-Mel spectrogram and move to the same device as the model
|
33 |
+
mel = whisper.log_mel_spectrogram(audio).to(model.device)
|
34 |
+
# detect the spoken language
|
35 |
+
_, probs = model.detect_language(mel)
|
36 |
+
# decode the audio
|
37 |
+
options = whisper.DecodingOptions(fp16 = False)
|
38 |
+
result = whisper.decode(model, mel, options)
|
39 |
+
languages = {"af_za": "Afrikaans", "am_et": "Amharic",
|
40 |
+
"ar_eg": "Arabic", "as_in": "Assamese",
|
41 |
+
"az_az": "Azerbaijani", "be_by": "Belarusian",
|
42 |
+
"bg_bg": "Bulgarian", "bn_in": "Bengali",
|
43 |
+
"bs_ba": "Bosnian", "ca_es": "Catalan",
|
44 |
+
"cmn_hans_cn": "Chinese", "cs_cz": "Czech",
|
45 |
+
"cy_gb": "Welsh", "da_dk": "Danish",
|
46 |
+
"de_de": "German", "el_gr": "Greek",
|
47 |
+
"en_us": "English", "es_419": "Spanish",
|
48 |
+
"et_ee": "Estonian", "fa_ir": "Persian",
|
49 |
+
"fi_fi": "Finnish", "fil_ph": "Tagalog",
|
50 |
+
"fr_fr": "French", "gl_es": "Galician",
|
51 |
+
"gu_in": "Gujarati", "ha_ng": "Hausa",
|
52 |
+
"he_il": "Hebrew", "hi_in": "Hindi",
|
53 |
+
"hr_hr": "Croatian", "hu_hu": "Hungarian",
|
54 |
+
"hy_am": "Armenian", "id_id": "Indonesian",
|
55 |
+
"is_is": "Icelandic", "it_it": "Italian",
|
56 |
+
"ja_jp": "Japanese", "jv_id": "Javanese",
|
57 |
+
"ka_ge": "Georgian", "kk_kz": "Kazakh",
|
58 |
+
"km_kh": "Khmer", "kn_in": "Kannada",
|
59 |
+
"ko_kr": "Korean", "lb_lu": "Luxembourgish",
|
60 |
+
"ln_cd": "Lingala", "lo_la": "Lao",
|
61 |
+
"lt_lt": "Lithuanian", "lv_lv": "Latvian",
|
62 |
+
"mi_nz": "Maori", "mk_mk": "Macedonian",
|
63 |
+
"ml_in": "Malayalam", "mn_mn": "Mongolian",
|
64 |
+
"mr_in": "Marathi", "ms_my": "Malay",
|
65 |
+
"mt_mt": "Maltese", "my_mm": "Myanmar",
|
66 |
+
"nb_no": "Norwegian", "ne_np": "Nepali",
|
67 |
+
"nl_nl": "Dutch", "oc_fr": "Occitan",
|
68 |
+
"pa_in": "Punjabi", "pl_pl": "Polish",
|
69 |
+
"ps_af": "Pashto", "pt_br": "Portuguese",
|
70 |
+
"ro_ro": "Romanian", "ru_ru": "Russian",
|
71 |
+
"sd_in": "Sindhi", "sk_sk": "Slovak",
|
72 |
+
"sl_si": "Slovenian", "sn_zw": "Shona", "so_so": "Somali", "sr_rs": "Serbian", "sv_se": "Swedish", "sw_ke": "Swahili", "ta_in": "Tamil", "te_in": "Telugu", "tg_tj": "Tajik", "th_th": "Thai", "tr_tr": "Turkish", "uk_ua": "Ukrainian",
|
73 |
+
"ur_pk": "Urdu", "uz_uz": "Uzbek", "vi_vn": "Vietnamese", "yo_ng": "Yoruba"}
|
74 |
+
decode_language = {k.split("_")[0]: v for k,v in languages.items()}
|
75 |
+
language = decode_language[result.language]
|
76 |
+
text = result.text
|
77 |
+
out = f"Language Detected: {language} Text: {text}"
|
78 |
+
# output header
|
79 |
+
st.header("Summary Gnereated")
|
80 |
+
# output results
|
81 |
+
st.success(f"Translated Text : {out}")
|
requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
git+https://github.com/openai/whisper.git
|
2 |
+
ffmpeg-python==0.2.0
|
3 |
+
torch==1.13.0
|
4 |
+
streamlit==0.81.1
|