File size: 4,860 Bytes
be1a4e4
 
 
 
8cd7a5e
be1a4e4
 
 
 
 
 
 
 
 
 
 
 
 
 
8cd7a5e
be1a4e4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22c3d58
 
 
be1a4e4
22c3d58
 
 
be1a4e4
ad65957
be1a4e4
 
 
 
 
 
 
c921881
22c3d58
 
 
 
 
 
 
 
 
 
c921881
22c3d58
 
 
2c7e417
be1a4e4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a3da95c
be1a4e4
 
 
 
 
 
 
 
 
 
 
22c3d58
be1a4e4
 
 
 
 
22c3d58
be1a4e4
 
 
 
 
 
 
 
22c3d58
be1a4e4
 
 
 
 
 
 
 
 
 
 
 
 
22c3d58
be1a4e4
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import gradio as gr

from TTS.api import TTS

tts1 = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False, gpu=False)

tts2 = TTS("tts_models/zh-CN/baker/tacotron2-DDC-GST")

import os

import openai

import torch
import torchaudio
from speechbrain.pretrained import SpectralMaskEnhancement

enhance_model = SpectralMaskEnhancement.from_hparams(
source="speechbrain/metricgan-plus-voicebank",
savedir="pretrained_models/metricgan-plus-voicebank",
#run_opts={"device":"cuda"},
)

mes = [
    {"role": "system", "content": "You are my personal assistant. Try to be helpful."}
]

def chatgpt(apikey, result):
    
    openai.api_key = apikey

    messages = mes

    # chatgpt
    content = result
    messages.append({"role": "user", "content": content})

    completion = openai.ChatCompletion.create(
      model = "gpt-3.5-turbo",
      messages = messages
    )

    chat_response = completion.choices[0].message.content

    messages.append({"role": "assistant", "content": chat_response}) 

    return chat_response

def english(text_en, upload, VoiceMicrophone):
    if upload is not None:
        tts1.tts_to_file(text_en, speaker_wav = upload, language="en", file_path="output.wav")

    else:
        tts1.tts_to_file(text_en, speaker_wav = VoiceMicrophone, language="en", file_path="output.wav")
    
    noisy = enhance_model.load_audio(
    "output.wav"
    ).unsqueeze(0)

    enhanced = enhance_model.enhance_batch(noisy, lengths=torch.tensor([1.]))
    torchaudio.save("enhanced.wav", enhanced.cpu(), 16000)

    return "enhanced.wav"

def chinese(text_cn, upload1, VoiceMicrophone1):
    if upload1 is not None:
        tts2.tts_with_vc_to_file(
            text_cn + "。",
            speaker_wav=upload1,
            file_path="ouptut1.wav"
        )

    else:
        tts2.tts_with_vc_to_file(
            text_cn + "。",
            speaker_wav=VoiceMicrophone1,
            file_path="ouptut1.wav"
        )

    return "ouptut1.wav"

block = gr.Blocks()

with block:
    with gr.Group():
        gr.Markdown(
            """ # <center>🥳💬💕 - TalktoAI,随时随地,谈天说地!</center>
            
            ## <center>🤖 - 让有人文关怀的AI造福每一个人!AI向善,文明璀璨!TalktoAI - Enable the future!</center>
            
      """
        )
        
        with gr.Box():
            with gr.Row().style(mobile_collapse=False, equal_height=True):
              
                inp1 = gr.Textbox(label='请输入您的Openai-API-Key', type = "password")
                inp2 = gr.Textbox(label='说点什么吧(中英皆可)')

                btn = gr.Button("开始对话吧")

        texts1 = gr.Textbox(lines=3, label="ChatGPT的回答")
              
        btn.click(chatgpt, [inp1, inp2], [texts1])

        with gr.Box():
            with gr.Row().style(mobile_collapse=False, equal_height=True):
                inp3 = texts1
                inp4 = gr.Audio(source="upload", label = "请上传您喜欢的声音(wav/mp3文件)", type="filepath")
                inp5 = gr.Audio(source="microphone", type="filepath", label = '请用麦克风上传您喜欢的声音,与文件上传二选一即可')

                btn1 = gr.Button("用喜欢的声音听一听吧(中文)")

        out1 = gr.Audio(label="合成的专属声音(中文)")

        btn1.click(chinese, [inp3, inp4, inp5], [out1])

        with gr.Box():
            with gr.Row().style(mobile_collapse=False, equal_height=True):

                btn2 = gr.Button("用喜欢的声音听一听吧(英文)")

        out2 = gr.Audio(label="合成的专属声音(英文)")
        
        btn2.click(english, [inp3, inp4, inp5], [out2])

        gr.Markdown(
            """ ### <center>注意❗:请不要输入或生成会对个人以及组织造成侵害的内容,此程序仅供科研、学习及娱乐使用。用户输入或生成的内容与程序开发者无关,请自觉合法合规使用,违反者一切后果自负。</center>
            
            ### <center>Powered by [ChatGPT](https://chat.openai.com/). Please follow me on [Bilibili](https://space.bilibili.com/501495851?spm_id_from=333.1007.0.0).</center>
            
      """
        )
        
        gr.HTML('''
        <div class="footer">
                    <p>🎶🖼️🎡 - It’s the intersection of technology and liberal arts that makes our hearts sing. - Steve Jobs
                    </p>
                    <p>注:中文声音克隆实际上是由声音转换(Voice Conversion)实现,所以输出结果可能更像是一种新的声音,效果不一定很理想,希望大家理解(之后也会不断迭代的)!为了更好的效果,使用中文声音克隆时请尽量上传女声。
                    </p>
        </div>
        ''')

block.launch(show_error=True)