gdnartea commited on
Commit
db28023
·
verified ·
1 Parent(s): b09cd28

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -26
app.py CHANGED
@@ -41,7 +41,35 @@ frame_asr = FrameBatchMultiTaskAED(
41
  amp_dtype = torch.float16
42
 
43
 
44
- def transcribe(audio_filepath, src_lang="en", tgt_lang="en", pnc="yes"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
  if audio_filepath is None:
47
  raise gr.Error("Please provide some input audio: either upload an audio file or use the microphone")
@@ -50,32 +78,9 @@ def transcribe(audio_filepath, src_lang="en", tgt_lang="en", pnc="yes"):
50
  with tempfile.TemporaryDirectory() as tmpdir:
51
  converted_audio_filepath, duration = convert_audio(audio_filepath, tmpdir, str(utt_id))
52
 
53
- # map src_lang and tgt_lang from long versions to short
54
- LANG_LONG_TO_LANG_SHORT = {
55
- "English": "en",
56
- "Spanish": "es",
57
- "French": "fr",
58
- "German": "de",
59
- }
60
- if src_lang not in LANG_LONG_TO_LANG_SHORT.keys():
61
- raise ValueError(f"src_lang must be one of {LANG_LONG_TO_LANG_SHORT.keys()}")
62
- else:
63
- src_lang = LANG_LONG_TO_LANG_SHORT[src_lang]
64
 
65
- if tgt_lang not in LANG_LONG_TO_LANG_SHORT.keys():
66
- raise ValueError(f"tgt_lang must be one of {LANG_LONG_TO_LANG_SHORT.keys()}")
67
- else:
68
- tgt_lang = LANG_LONG_TO_LANG_SHORT[tgt_lang]
69
-
70
-
71
- # infer taskname from src_lang and tgt_lang
72
- if src_lang == tgt_lang:
73
- taskname = "asr"
74
- else:
75
- taskname = "s2t_translation"
76
-
77
- # update pnc variable to be "yes" or "no"
78
- pnc = "yes" if pnc else "no"
79
 
80
  # make manifest file and save
81
  manifest_data = {
 
41
  amp_dtype = torch.float16
42
 
43
 
44
+ def convert_audio(audio_filepath, tmpdir, utt_id):
45
+ """
46
+ Convert all files to monochannel 16 kHz wav files.
47
+ Do not convert and raise error if audio too long.
48
+ Returns output filename and duration.
49
+ """
50
+
51
+ data, sr = librosa.load(audio_filepath, sr=None, mono=True)
52
+
53
+ duration = librosa.get_duration(y=data, sr=sr)
54
+
55
+ if duration / 60.0 > MAX_AUDIO_MINUTES:
56
+ raise gr.Error(
57
+ f"This demo can transcribe up to {MAX_AUDIO_MINUTES} minutes of audio. "
58
+ "If you wish, you may trim the audio using the Audio viewer in Step 1 "
59
+ "(click on the scissors icon to start trimming audio)."
60
+ )
61
+
62
+ if sr != SAMPLE_RATE:
63
+ data = librosa.resample(data, orig_sr=sr, target_sr=SAMPLE_RATE)
64
+
65
+ out_filename = os.path.join(tmpdir, utt_id + '.wav')
66
+
67
+ # save output audio
68
+ sf.write(out_filename, data, SAMPLE_RATE)
69
+
70
+ return out_filename, duration
71
+
72
+ def transcribe(audio_filepath):
73
 
74
  if audio_filepath is None:
75
  raise gr.Error("Please provide some input audio: either upload an audio file or use the microphone")
 
78
  with tempfile.TemporaryDirectory() as tmpdir:
79
  converted_audio_filepath, duration = convert_audio(audio_filepath, tmpdir, str(utt_id))
80
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
+ taskname = "asr"
83
+ pnc = "no"
 
 
 
 
 
 
 
 
 
 
 
 
84
 
85
  # make manifest file and save
86
  manifest_data = {