Spaces:

gdnartea
/

Chatty_Ashe

Runtime error

gdnartea commited on May 1, 2024

Commit

db28023

verified ·

1 Parent(s): b09cd28

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -41,7 +41,35 @@ frame_asr = FrameBatchMultiTaskAED(
 amp_dtype = torch.float16
-def transcribe(audio_filepath, src_lang="en", tgt_lang="en", pnc="yes"):
 	if audio_filepath is None:
 		raise gr.Error("Please provide some input audio: either upload an audio file or use the microphone")
@@ -50,32 +78,9 @@ def transcribe(audio_filepath, src_lang="en", tgt_lang="en", pnc="yes"):
 	with tempfile.TemporaryDirectory() as tmpdir:
 		converted_audio_filepath, duration = convert_audio(audio_filepath, tmpdir, str(utt_id))
-		# map src_lang and tgt_lang from long versions to short
-		LANG_LONG_TO_LANG_SHORT = {
-			"English": "en",
-			"Spanish": "es",
-			"French": "fr",
-			"German": "de",
-		}
-		if src_lang not in LANG_LONG_TO_LANG_SHORT.keys():
-			raise ValueError(f"src_lang must be one of {LANG_LONG_TO_LANG_SHORT.keys()}")
-		else:
-			src_lang = LANG_LONG_TO_LANG_SHORT[src_lang]
-		if tgt_lang not in LANG_LONG_TO_LANG_SHORT.keys():
-			raise ValueError(f"tgt_lang must be one of {LANG_LONG_TO_LANG_SHORT.keys()}")
-		else:
-			tgt_lang = LANG_LONG_TO_LANG_SHORT[tgt_lang]
-		# infer taskname from src_lang and tgt_lang
-		if src_lang == tgt_lang:
-			taskname = "asr"
-		else:
-			taskname = "s2t_translation"
-		# update pnc variable to be "yes" or "no"
-		pnc = "yes" if pnc else "no"
 		# make manifest file and save
 		manifest_data = {

 amp_dtype = torch.float16
+def convert_audio(audio_filepath, tmpdir, utt_id):
+	"""
+	Convert all files to monochannel 16 kHz wav files.
+	Do not convert and raise error if audio too long.
+	Returns output filename and duration.
+	"""
+	data, sr = librosa.load(audio_filepath, sr=None, mono=True)
+	duration = librosa.get_duration(y=data, sr=sr)
+	if duration / 60.0 > MAX_AUDIO_MINUTES:
+		raise gr.Error(
+			f"This demo can transcribe up to {MAX_AUDIO_MINUTES} minutes of audio. "
+			"If you wish, you may trim the audio using the Audio viewer in Step 1 "
+			"(click on the scissors icon to start trimming audio)."
+		)
+	if sr != SAMPLE_RATE:
+		data = librosa.resample(data, orig_sr=sr, target_sr=SAMPLE_RATE)
+	out_filename = os.path.join(tmpdir, utt_id + '.wav')
+	# save output audio
+	sf.write(out_filename, data, SAMPLE_RATE)
+	return out_filename, duration
+def transcribe(audio_filepath):
 	if audio_filepath is None:
 		raise gr.Error("Please provide some input audio: either upload an audio file or use the microphone")
 	with tempfile.TemporaryDirectory() as tmpdir:
 		converted_audio_filepath, duration = convert_audio(audio_filepath, tmpdir, str(utt_id))
+        taskname = "asr"
+		pnc =  "no"
 		# make manifest file and save
 		manifest_data = {