Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -35,7 +35,7 @@ def preprocess_audio(audio_tensor, original_sample_rate, apply_normalization):
|
|
35 |
|
36 |
if apply_normalization:
|
37 |
audio_tensor = audio_tensor / torch.max(torch.abs(audio_tensor)) # Normalize
|
38 |
-
audio_tensor = torch.clamp(audio_tensor, min=-1, max=1)
|
39 |
|
40 |
audio_tensor = torchaudio.functional.resample(audio_tensor, orig_freq=original_sample_rate, new_freq=16000) # Resample
|
41 |
return audio_tensor
|
@@ -82,10 +82,13 @@ def transcribe_from_youtube(url, apply_wiener_filter, apply_normalization, apply
|
|
82 |
|
83 |
transcription, _ = transcribe_speech(audio)
|
84 |
|
|
|
|
|
|
|
85 |
except Exception as e:
|
86 |
return str(e), None
|
87 |
|
88 |
-
return transcription, (16000,
|
89 |
|
90 |
def populate_metadata(url):
|
91 |
yt = YouTube(url)
|
|
|
35 |
|
36 |
if apply_normalization:
|
37 |
audio_tensor = audio_tensor / torch.max(torch.abs(audio_tensor)) # Normalize
|
38 |
+
# audio_tensor = torch.clamp(audio_tensor, min=-1, max=1)
|
39 |
|
40 |
audio_tensor = torchaudio.functional.resample(audio_tensor, orig_freq=original_sample_rate, new_freq=16000) # Resample
|
41 |
return audio_tensor
|
|
|
82 |
|
83 |
transcription, _ = transcribe_speech(audio)
|
84 |
|
85 |
+
# Convert to 32-bit float for Gradio output
|
86 |
+
audio_output = audio.numpy().astype(np.float32)
|
87 |
+
|
88 |
except Exception as e:
|
89 |
return str(e), None
|
90 |
|
91 |
+
return transcription, (16000, audio_output)
|
92 |
|
93 |
def populate_metadata(url):
|
94 |
yt = YouTube(url)
|