Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -41,7 +41,35 @@ frame_asr = FrameBatchMultiTaskAED(
|
|
41 |
amp_dtype = torch.float16
|
42 |
|
43 |
|
44 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
|
46 |
if audio_filepath is None:
|
47 |
raise gr.Error("Please provide some input audio: either upload an audio file or use the microphone")
|
@@ -50,32 +78,9 @@ def transcribe(audio_filepath, src_lang="en", tgt_lang="en", pnc="yes"):
|
|
50 |
with tempfile.TemporaryDirectory() as tmpdir:
|
51 |
converted_audio_filepath, duration = convert_audio(audio_filepath, tmpdir, str(utt_id))
|
52 |
|
53 |
-
# map src_lang and tgt_lang from long versions to short
|
54 |
-
LANG_LONG_TO_LANG_SHORT = {
|
55 |
-
"English": "en",
|
56 |
-
"Spanish": "es",
|
57 |
-
"French": "fr",
|
58 |
-
"German": "de",
|
59 |
-
}
|
60 |
-
if src_lang not in LANG_LONG_TO_LANG_SHORT.keys():
|
61 |
-
raise ValueError(f"src_lang must be one of {LANG_LONG_TO_LANG_SHORT.keys()}")
|
62 |
-
else:
|
63 |
-
src_lang = LANG_LONG_TO_LANG_SHORT[src_lang]
|
64 |
|
65 |
-
|
66 |
-
|
67 |
-
else:
|
68 |
-
tgt_lang = LANG_LONG_TO_LANG_SHORT[tgt_lang]
|
69 |
-
|
70 |
-
|
71 |
-
# infer taskname from src_lang and tgt_lang
|
72 |
-
if src_lang == tgt_lang:
|
73 |
-
taskname = "asr"
|
74 |
-
else:
|
75 |
-
taskname = "s2t_translation"
|
76 |
-
|
77 |
-
# update pnc variable to be "yes" or "no"
|
78 |
-
pnc = "yes" if pnc else "no"
|
79 |
|
80 |
# make manifest file and save
|
81 |
manifest_data = {
|
|
|
41 |
amp_dtype = torch.float16
|
42 |
|
43 |
|
44 |
+
def convert_audio(audio_filepath, tmpdir, utt_id):
|
45 |
+
"""
|
46 |
+
Convert all files to monochannel 16 kHz wav files.
|
47 |
+
Do not convert and raise error if audio too long.
|
48 |
+
Returns output filename and duration.
|
49 |
+
"""
|
50 |
+
|
51 |
+
data, sr = librosa.load(audio_filepath, sr=None, mono=True)
|
52 |
+
|
53 |
+
duration = librosa.get_duration(y=data, sr=sr)
|
54 |
+
|
55 |
+
if duration / 60.0 > MAX_AUDIO_MINUTES:
|
56 |
+
raise gr.Error(
|
57 |
+
f"This demo can transcribe up to {MAX_AUDIO_MINUTES} minutes of audio. "
|
58 |
+
"If you wish, you may trim the audio using the Audio viewer in Step 1 "
|
59 |
+
"(click on the scissors icon to start trimming audio)."
|
60 |
+
)
|
61 |
+
|
62 |
+
if sr != SAMPLE_RATE:
|
63 |
+
data = librosa.resample(data, orig_sr=sr, target_sr=SAMPLE_RATE)
|
64 |
+
|
65 |
+
out_filename = os.path.join(tmpdir, utt_id + '.wav')
|
66 |
+
|
67 |
+
# save output audio
|
68 |
+
sf.write(out_filename, data, SAMPLE_RATE)
|
69 |
+
|
70 |
+
return out_filename, duration
|
71 |
+
|
72 |
+
def transcribe(audio_filepath):
|
73 |
|
74 |
if audio_filepath is None:
|
75 |
raise gr.Error("Please provide some input audio: either upload an audio file or use the microphone")
|
|
|
78 |
with tempfile.TemporaryDirectory() as tmpdir:
|
79 |
converted_audio_filepath, duration = convert_audio(audio_filepath, tmpdir, str(utt_id))
|
80 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
81 |
|
82 |
+
taskname = "asr"
|
83 |
+
pnc = "no"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
84 |
|
85 |
# make manifest file and save
|
86 |
manifest_data = {
|