Spaces:
Paused
Paused
File size: 4,422 Bytes
eb21a2f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
from TTS.api import TTS
import json
import gradio as gr
from share_btn import community_icon_html, loading_icon_html, share_js
import os
import shutil
import re
import numpy as np
from scipy.io import wavfile
from scipy.io.wavfile import write, read
from pydub import AudioSegment
file_upload_available = os.environ.get("ALLOW_FILE_UPLOAD")
MAX_NUMBER_SENTENCES = 10
with open("characters.json", "r") as file:
data = json.load(file)
characters = [
{
"image": item["image"],
"title": item["title"],
"speaker": item["speaker"]
}
for item in data
]
tts = TTS("tts_models/multilingual/multi-dataset/bark", gpu=False)
def load_hidden_mic(audio_in):
print("USER RECORDED A NEW SAMPLE")
library_path = 'bark_voices'
folder_name = 'audio-0-100'
second_folder_name = 'audio-0-100_cleaned'
folder_path = os.path.join(library_path, folder_name)
second_folder_path = os.path.join(library_path, second_folder_name)
print("We need to clean previous util files, if needed:")
if os.path.exists(folder_path):
try:
shutil.rmtree(folder_path)
print(
f"Successfully deleted the folder previously created from last raw recorded sample: {folder_path}")
except OSError as e:
print(f"Error: {folder_path} - {e.strerror}")
else:
print(
f"OK, the folder a raw recorded sample does not exist: {folder_path}")
if os.path.exists(second_folder_path):
try:
shutil.rmtree(second_folder_path)
print(
f"Successfully deleted the folder previously created from last cleaned recorded sample: {second_folder_path}")
except OSError as e:
print(f"Error: {second_folder_path} - {e.strerror}")
else:
print(
f"Ok, the folderfor a cleaned recorded sample does not exist: {second_folder_path}")
return audio_in
def infer(hidden_numpy_audio):
print("""
βββββ
NEW INFERENCE:
βββββββ
""")
prompt = "Hi mom, I have a broken tire and need a transfer. Can you send me some money please?"
gr.Info("Generating audio from prompt")
tts.tts_to_file(text=prompt,
file_path="output.wav",
voice_dir="bark_voices/",
speaker=f"{file_name}")
print("Preparing final waveform video ...")
tts_video = gr.make_waveform(audio="output.wav")
print(tts_video)
print("FINISHED")
return "output.wav", tts_video, gr.update(value=f"bark_voices/{file_name}/{contents[1]}", visible=True), gr.Group.update(visible=True), destination_path
css = """
.mic-wrap > button {
width: 100%;
height: 60px;
font-size: 1.4em!important;
}
.record-icon.svelte-1thnwz {
display: flex;
position: relative;
margin-right: var(--size-2);
width: unset;
height: unset;
}
span.record-icon > span.dot.svelte-1thnwz {
width: 20px!important;
height: 20px!important;
}
"""
html_header = """
<h1 style="text-align: center;">Coqui + Bark Voice Cloning</h1>
<p style="text-align: center;">
Mimic any voice character in less than 2 minutes with this <a href="https://tts.readthedocs.io/en/dev/models/bark.html" target="_blank">Coqui TTS + Bark</a> demo ! <br />
Record a clean 20 seconds voice using the microphone provided.<br />
The hard-coded TTS prompt is: βHi mom, I have a broken tire and need an e-transfer. Can you send me some money please?β<br />
</p>
"""
with gr.Blocks(css=css) as demo:
gr.Markdown(html_header)
micro_in = gr.Audio(
label="Record voice to clone",
type="filepath",
source="microphone",
interactive=True
)
hidden_audio_numpy = gr.Audio(type="numpy", visible=False)
micro_submit_btn = gr.Button("Submit")
micro_in.stop_recording(fn=load_hidden_mic, inputs=[micro_in], outputs=[
hidden_audio_numpy], queue=False)
cloned_out = gr.Audio(
label="Text to speech output",
visible=False
)
video_out = gr.Video(
label="Waveform video",
elem_id="voice-video-out"
)
micro_submit_btn.click(
fn=infer,
inputs=[hidden_audio_numpy],
outputs=[cloned_out, video_out]
)
demo.queue(api_open=False, max_size=10).launch()
|