Spaces:

Kevin676
/

ChatGPT-with-Voice-Cloning-for-All

Runtime error

File size: 4,860 Bytes

import gradio as gr

from TTS.api import TTS

tts1 = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False, gpu=False)

tts2 = TTS("tts_models/zh-CN/baker/tacotron2-DDC-GST")

import os

import openai

import torch
import torchaudio
from speechbrain.pretrained import SpectralMaskEnhancement

enhance_model = SpectralMaskEnhancement.from_hparams(
source="speechbrain/metricgan-plus-voicebank",
savedir="pretrained_models/metricgan-plus-voicebank",
#run_opts={"device":"cuda"},
)

mes = [
    {"role": "system", "content": "You are my personal assistant. Try to be helpful."}
]

def chatgpt(apikey, result):
    
    openai.api_key = apikey

    messages = mes

    # chatgpt
    content = result
    messages.append({"role": "user", "content": content})

    completion = openai.ChatCompletion.create(
      model = "gpt-3.5-turbo",
      messages = messages
    )

    chat_response = completion.choices[0].message.content

    messages.append({"role": "assistant", "content": chat_response}) 

    return chat_response

def english(text_en, upload, VoiceMicrophone):
    if upload is not None:
        tts1.tts_to_file(text_en, speaker_wav = upload, language="en", file_path="output.wav")

    else:
        tts1.tts_to_file(text_en, speaker_wav = VoiceMicrophone, language="en", file_path="output.wav")
    
    noisy = enhance_model.load_audio(
    "output.wav"
    ).unsqueeze(0)

    enhanced = enhance_model.enhance_batch(noisy, lengths=torch.tensor([1.]))
    torchaudio.save("enhanced.wav", enhanced.cpu(), 16000)

    return "enhanced.wav"

def chinese(text_cn, upload1, VoiceMicrophone1):
    if upload1 is not None:
        tts2.tts_with_vc_to_file(
            text_cn + "。",
            speaker_wav=upload1,
            file_path="ouptut1.wav"
        )

    else:
        tts2.tts_with_vc_to_file(
            text_cn + "。",
            speaker_wav=VoiceMicrophone1,
            file_path="ouptut1.wav"
        )

    return "ouptut1.wav"

block = gr.Blocks()

with block:
    with gr.Group():
        gr.Markdown(
            """ # <center>🥳💬💕 - TalktoAI，随时随地，谈天说地！</center>
            
            ## <center>🤖 - 让有人文关怀的AI造福每一个人！AI向善，文明璀璨！TalktoAI - Enable the future！</center>
            
      """
        )
        
        with gr.Box():
            with gr.Row().style(mobile_collapse=False, equal_height=True):
              
                inp1 = gr.Textbox(label='请输入您的Openai-API-Key', type = "password")
                inp2 = gr.Textbox(label='说点什么吧(中英皆可)')

                btn = gr.Button("开始对话吧")

        texts1 = gr.Textbox(lines=3, label="ChatGPT的回答")
              
        btn.click(chatgpt, [inp1, inp2], [texts1])

        with gr.Box():
            with gr.Row().style(mobile_collapse=False, equal_height=True):
                inp3 = texts1
                inp4 = gr.Audio(source="upload", label = "请上传您喜欢的声音(wav/mp3文件)", type="filepath")
                inp5 = gr.Audio(source="microphone", type="filepath", label = '请用麦克风上传您喜欢的声音，与文件上传二选一即可')

                btn1 = gr.Button("用喜欢的声音听一听吧(中文)")

        out1 = gr.Audio(label="合成的专属声音(中文)")

        btn1.click(chinese, [inp3, inp4, inp5], [out1])

        with gr.Box():
            with gr.Row().style(mobile_collapse=False, equal_height=True):

                btn2 = gr.Button("用喜欢的声音听一听吧(英文)")

        out2 = gr.Audio(label="合成的专属声音(英文)")
        
        btn2.click(english, [inp3, inp4, inp5], [out2])

        gr.Markdown(
            """ ### <center>注意❗：请不要输入或生成会对个人以及组织造成侵害的内容，此程序仅供科研、学习及娱乐使用。用户输入或生成的内容与程序开发者无关，请自觉合法合规使用，违反者一切后果自负。</center>
            
            ### <center>Powered by [ChatGPT](https://chat.openai.com/). Please follow me on [Bilibili](https://space.bilibili.com/501495851?spm_id_from=333.1007.0.0).</center>
            
      """
        )
        
        gr.HTML('''
        <div class="footer">
                    <p>🎶🖼️🎡 - It’s the intersection of technology and liberal arts that makes our hearts sing. - Steve Jobs
                    </p>
                    <p>注：中文声音克隆实际上是由声音转换(Voice Conversion)实现，所以输出结果可能更像是一种新的声音，效果不一定很理想，希望大家理解(之后也会不断迭代的)！为了更好的效果，使用中文声音克隆时请尽量上传女声。
                    </p>
        </div>
        ''')

block.launch(show_error=True)