Wendyellé Abubakrh Alban NYANTUDRE commited on
Commit
5da6c3d
1 Parent(s): af8f854

set up everything

Browse files
Files changed (5) hide show
  1. Makefile +17 -0
  2. README.md +29 -0
  3. app.py +96 -0
  4. requirements.txt +18 -0
  5. resemble-enhance +1 -0
Makefile ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ install:
2
+ pip install --upgrade pip &&\
3
+ pip install -r requirements.txt
4
+
5
+ test:
6
+ python app.py
7
+
8
+ debug:
9
+ #python -m pytest -vv --pdb #Debugger is invoked
10
+
11
+ format:
12
+ #black *.py
13
+
14
+ lint:
15
+ #pylint --disable=R,C *.py
16
+
17
+ all: install lint test format
README.md CHANGED
@@ -1,2 +1,31 @@
1
  # resemble-enhance-hf-demo
2
  Demo of Resemble Enhance, an AI-powered tool that aims to improve the overall quality of speech by performing denoising and enhancement.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # resemble-enhance-hf-demo
2
  Demo of Resemble Enhance, an AI-powered tool that aims to improve the overall quality of speech by performing denoising and enhancement.
3
+
4
+
5
+
6
+
7
+ ---
8
+ title: Demo
9
+ emoji: 🌖
10
+ colorFrom: purple
11
+ colorTo: purple
12
+ sdk: gradio
13
+ sdk_version: 3.0.6
14
+ app_file: app.py
15
+ pinned: false
16
+ license: cc
17
+ ---
18
+
19
+
20
+ [![Sync to Hugging Face hub](https://github.com/nogibjj/hugging-face/actions/workflows/main.yml/badge.svg)](https://github.com/nogibjj/hugging-face/actions/workflows/main.yml)
21
+
22
+
23
+ [Try Demo Text Summarization Here](https://huggingface.co/spaces/noahgift/demo)
24
+
25
+
26
+ ![mlops-hugging-face](https://user-images.githubusercontent.com/58792/170845235-7f00d61c-ea36-4d28-82d0-3a9b8c0f1769.png)
27
+
28
+
29
+ ## References
30
+
31
+ [Watch YouTube Walkthrough](https://youtu.be/VYSGjUa5sc4)
app.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import argparse
4
+ from functools import partial
5
+
6
+ import gradio as gr
7
+ import torch
8
+ import torchaudio
9
+
10
+ # Add the directory containing resemble-enhance to the Python path
11
+ sys.path.append(os.path.abspath('resemble-enhance'))
12
+
13
+ from resemble_enhance.enhancer.inference import denoise, enhance
14
+
15
+ if torch.cuda.is_available():
16
+ device = "cuda"
17
+ else:
18
+ device = "cpu"
19
+
20
+
21
+ def _fn(path, solver, nfe, tau, denoising, unlimited):
22
+ if path is None:
23
+ gr.Warning("Please upload an audio file.")
24
+ return None, None
25
+
26
+ info = torchaudio.info(path)
27
+ if not unlimited and (info.num_frames / info.sample_rate > 60):
28
+ gr.Warning("Only audio files shorter than 60 seconds are supported.")
29
+ return None, None
30
+
31
+ solver = solver.lower()
32
+ nfe = int(nfe)
33
+ lambd = 0.9 if denoising else 0.1
34
+
35
+ dwav, sr = torchaudio.load(path)
36
+ dwav = dwav.mean(dim=0)
37
+
38
+ wav1, new_sr = denoise(dwav, sr, device)
39
+ wav2, new_sr = enhance(dwav, sr, device, nfe=nfe, solver=solver, lambd=lambd, tau=tau)
40
+
41
+ wav1 = wav1.cpu().numpy()
42
+ wav2 = wav2.cpu().numpy()
43
+
44
+ return (new_sr, wav1), (new_sr, wav2)
45
+
46
+
47
+ def main():
48
+ parser = argparse.ArgumentParser()
49
+ parser.add_argument("--unlimited", action="store_true")
50
+ args = parser.parse_args()
51
+
52
+ inputs: list = [
53
+ gr.Audio(type="filepath", label="Input Audio"),
54
+ gr.Dropdown(
55
+ choices=["Midpoint", "RK4", "Euler"],
56
+ value="Midpoint",
57
+ label="CFM ODE Solver (Midpoint is recommended)",
58
+ ),
59
+ gr.Slider(
60
+ minimum=1,
61
+ maximum=128,
62
+ value=64,
63
+ step=1,
64
+ label="CFM Number of Function Evaluations (higher values in general yield better quality but may be slower)",
65
+ ),
66
+ gr.Slider(
67
+ minimum=0,
68
+ maximum=1,
69
+ value=0.5,
70
+ step=0.01,
71
+ label="CFM Prior Temperature (higher values can improve quality but can reduce stability)",
72
+ ),
73
+ gr.Checkbox(
74
+ value=False,
75
+ label="Denoise Before Enhancement (tick if your audio contains heavy background noise)",
76
+ ),
77
+ ]
78
+
79
+ outputs: list = [
80
+ gr.Audio(label="Output Denoised Audio"),
81
+ gr.Audio(label="Output Enhanced Audio"),
82
+ ]
83
+
84
+ interface = gr.Interface(
85
+ fn=partial(_fn, unlimited=args.unlimited),
86
+ title="Resemble Enhance",
87
+ description="AI-driven audio enhancement for your audio files, powered by Resemble AI.",
88
+ inputs=inputs,
89
+ outputs=outputs,
90
+ )
91
+
92
+ interface.launch()
93
+
94
+
95
+ if __name__ == "__main__":
96
+ main()
requirements.txt ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ celluloid==0.2.0
2
+ deepspeed==0.15.1
3
+ librosa==0.10.2.post1
4
+ matplotlib==3.9.2
5
+ numpy==2.0.2
6
+ omegaconf==2.3.0
7
+ pandas==2.2.2
8
+ ptflops==0.7.3
9
+ resampy==0.4.3
10
+ rich==13.8.1
11
+ scipy==1.14.1
12
+ soundfile==0.12.1
13
+ tabulate==0.9.0
14
+ torch==2.4.0
15
+ torchaudio==2.4.0
16
+ torchvision==0.19.0
17
+ tqdm==4.66.5
18
+ gradio==4.44.0
resemble-enhance ADDED
@@ -0,0 +1 @@
 
 
1
+ Subproject commit bd713fae892212e0ae3bf76eabf4f5665e95b370