File size: 1,567 Bytes
aa0e4dc
2ad0ccc
 
4a33c20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aa0e4dc
 
4a33c20
2ad0ccc
 
 
 
aa0e4dc
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import gradio as gr
import os
import bark
from bark import generate_audio, preload_models, SAMPLE_RATE
import time
import scipy
import noisereduce as nr

########################
# Voice cloning functions

# make sure to only use CPU
os.environ["CUDA_VISIBLE_DEVICES"] = ""
os.environ["SUNO_USE_SMALL_MODELS"] = "1"

preload_models()


def generate_cloned_voice_audio(text_prompt, history_prompt):


    # keep track of duration 
    t0 = time.time()

    # generate cloned voice audio
    audio_array = generate_audio(
        text_prompt,
        history_prompt = history_prompt
    )

    # keep track of duration 
    generation_duration_s = time.time() - t0
    audio_duration_s = audio_array.shape[0] / SAMPLE_RATE
    print(f"took {generation_duration_s:.0f}s to generate {audio_duration_s:.0f}s of audio")

    # reduce noise
    reduced_noise_audio_array = nr.reduce_noise(y=audio_array, sr=SAMPLE_RATE)
    
    # write to file
    audio_output_path = "output_audio.wav"
    noisereduced_audio_output_path = "output_noisereduced_audio.wav"
    scipy.io.wavfile.write(audio_output_path, rate=SAMPLE_RATE, data=audio_array)
    scipy.io.wavfile.write(noisereduced_audio_output_path, rate=SAMPLE_RATE, data=reduced_noise_audio_array)

    return audio_output_path

########################

def greet(name):
    if os.path.isfile("pm_voice.npz"):
        preffix = "Found the voice file"
    else:
        preffix = "Voice file not found"
    return "Hello " + name + "!!" + preffix

iface = gr.Interface(fn=greet, inputs="text", outputs="text")
iface.launch()