Spaces:

sp-uhh
/

urusha

Runtime error

App Files Files Community

Shokoufehhh commited on Oct 23

Commit

dfb36ea

•

1 Parent(s): c65e0e9

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -24

app.py CHANGED Viewed

@@ -1,34 +1,50 @@
-import gradio as gr
 import torch
-from sgmse.model import SGMSENoiseReducer  # Adjust import as per your model structure
-import soundfile as sf
-# Load your pre-trained model
 model = SGMSENoiseReducer.from_pretrained("sp-uhh/speech-enhancement-sgmse")
-# Define a function to process the uploaded file
-def enhance_speech(noisy_audio):
-    # Load noisy audio file
-    noisy, sr = sf.read(noisy_audio)
-    # Apply your model to enhance the speech
-    enhanced_audio = model.enhance(noisy, sr)
-    # Save enhanced audio to a temporary file
-    output_file = "enhanced_output.wav"
-    sf.write(output_file, enhanced_audio, sr)
     return output_file
-# Set up the Gradio interface
-interface = gr.Interface(
-    fn=enhance_speech,
-    inputs=gr.Audio(source="upload", type="filepath"),
-    outputs=gr.Audio(type="file"),
-    title="SGMSE Speech Enhancement",
-    description="Upload a noisy audio file and download the enhanced (clean) version."
-)
-# Launch the interface
-if __name__ == "__main__":
-    interface.launch()

 import torch
+import torchaudio
+from sgmse.model import ScoreModel
+import gradio as gr
+from sgmse.util.other import pad_spec
+# Load the pre-trained model
 model = SGMSENoiseReducer.from_pretrained("sp-uhh/speech-enhancement-sgmse")
+def enhance_speech(audio_file):
+    # Load and process the audio file
+    y, sr = torchaudio.load(audio_file)
+    T_orig = y.size(1)
+    # Normalize
+    norm_factor = y.abs().max()
+    y = y / norm_factor
+    # Prepare DNN input
+    Y = torch.unsqueeze(model._forward_transform(model._stft(y.to(args.device))), 0)
+    Y = pad_spec(Y, mode=pad_mode)
+    # Reverse sampling
+    sampler = model.get_pc_sampler(
+        'reverse_diffusion', args.corrector, Y.to(args.device), N=args.N,
+        corrector_steps=args.corrector_steps, snr=args.snr)
+    sample, _ = sampler()
+    # Backward transform in time domain
+    x_hat = model.to_audio(sample.squeeze(), T_orig)
+    # Renormalize
+    x_hat = x_hat * norm_factor
+    # Save the enhanced audio
+    output_file = 'enhanced_output.wav'
+    torchaudio.save(output_file, x_hat.cpu().numpy(), sr)
     return output_file
+# Gradio interface setup
+inputs = gr.Audio(label="Input Audio", type="filepath")
+outputs = gr.Audio(label="Output Audio", type="filepath")
+title = "Speech Enhancement using SGMSE"
+description = "This Gradio demo uses the SGMSE model for speech enhancement. Upload your audio file to enhance it."
+article = "<p style='text-align: center'><a href='https://huggingface.co/SP-UHH/speech-enhancement-sgmse' target='_blank'>Model Card</a></p>"
+# Launch without share=True (as it's not supported on Hugging Face Spaces)
+gr.Interface(fn=enhance_speech, inputs=inputs, outputs=outputs, title=title, description=description, article=article).launch()