|
import gradio as gr |
|
import torch |
|
from TTS.api import TTS |
|
import os |
|
import librosa |
|
import requests |
|
from datetime import datetime |
|
|
|
|
|
import import_local_tts_models |
|
|
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
|
|
|
tts = TTS(model_name="voice_conversion_models/multilingual/vctk/freevc24", progress_bar=False).to(device) |
|
|
|
def convert_audio_to_wav(file_path): |
|
"""Convert any supported format (mp3, etc.) to wav using librosa""" |
|
output_path = "temp_input.wav" |
|
audio, sr = librosa.load(file_path, sr=None) |
|
librosa.output.write_wav(output_path, audio, sr) |
|
return output_path |
|
|
|
def upload_to_file_io(file_path): |
|
"""Uploads a file to file.io and returns the temporary link""" |
|
url = "https://file.io" |
|
with open(file_path, 'rb') as f: |
|
response = requests.post(url, files={"file": f}) |
|
if response.status_code == 200: |
|
temp_link = response.json().get('link') |
|
return temp_link |
|
return None |
|
|
|
def voice_conversion(input_audio, target_voice, uploaded_target_voice): |
|
output_path = "output.wav" |
|
|
|
|
|
duration = librosa.get_duration(filename=input_audio) |
|
if duration > 120: |
|
print("Error: Input Audio file exceeds 2 minutes.") |
|
raise gr.Error("Error: Input Audio file exceeds 2 minutes.") |
|
elif duration > 30: |
|
gr.Info("Your input file is over 30 seconds, \nso be patient with the loading time lol.") |
|
|
|
|
|
if uploaded_target_voice is not None: |
|
target_voice_path = uploaded_target_voice |
|
if not uploaded_target_voice.endswith(".wav"): |
|
target_voice_path = convert_audio_to_wav(uploaded_target_voice) |
|
else: |
|
target_voice_path = os.path.join("Examples", target_voice) |
|
if not os.path.exists(target_voice_path): |
|
return None, "Error: Target voice file not found." |
|
|
|
|
|
if not input_audio.endswith(".wav"): |
|
input_audio = convert_audio_to_wav(input_audio) |
|
|
|
|
|
tts.voice_conversion_to_file(source_wav=input_audio, target_wav=target_voice_path, file_path=output_path) |
|
|
|
|
|
input_file_link = upload_to_file_io(input_audio) |
|
if input_file_link: |
|
print(f"Input file uploaded to: {input_file_link}") |
|
else: |
|
print("Error uploading the input file to file.io") |
|
|
|
return output_path, None |
|
|
|
|
|
examples_folder = "Examples/" |
|
example_files = [f for f in os.listdir(examples_folder) if f.endswith(".wav")] |
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("## Voice Conversion using Coqui TTS") |
|
|
|
with gr.Row(): |
|
input_audio = gr.Audio(label="Record or Upload Your Voice Max input length of 2 minutes.", type="filepath") |
|
target_voice = gr.Dropdown( |
|
choices=example_files, |
|
label="Select Target Voice from Examples", |
|
value=example_files[0], |
|
info="Located in Examples/ folder" |
|
) |
|
uploaded_target_voice = gr.Audio( |
|
label="Or Upload Your Own Target Voice", |
|
type="filepath" |
|
) |
|
|
|
with gr.Row(): |
|
play_button = gr.Button("Preview Selected Target Voice") |
|
preview_audio = gr.Audio(label="Preview Target Voice", type="filepath") |
|
|
|
convert_button = gr.Button("Convert Voice") |
|
output_audio = gr.Audio(label="Converted Voice", type="filepath") |
|
error_message = gr.Textbox(label="Error Message", visible=False) |
|
|
|
|
|
def preview_target_voice(selected_target_voice): |
|
return os.path.join(examples_folder, selected_target_voice) |
|
|
|
play_button.click(preview_target_voice, inputs=[target_voice], outputs=preview_audio) |
|
|
|
|
|
convert_button.click( |
|
voice_conversion, |
|
inputs=[input_audio, target_voice, uploaded_target_voice], |
|
outputs=[output_audio, error_message] |
|
) |
|
|
|
|
|
|
|
demo.queue().launch() |
|
|