File size: 4,850 Bytes
d2b7e94
 
ebc4336
 
d2b7e94
ebc4336
627d3d7
ebc4336
 
 
 
 
ba0472f
37195a7
 
 
 
 
ebc4336
 
 
37195a7
ebc4336
 
 
 
 
 
 
d5d0921
ebc4336
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
import tempfile

import gradio as gr
import torch

from modules.speaker import Speaker
from modules.utils.hf import spaces
from modules.webui import webui_config
from modules.webui.webui_utils import tts_generate


@torch.inference_mode()
@spaces.GPU(duration=120)
def test_spk_voice(
    spk_file,
    text: str,
    progress=gr.Progress(track_tqdm=True),
):
    if spk_file == "" or spk_file is None:
        return None
    spk = Speaker.from_file(spk_file)
    return tts_generate(spk=spk, text=text, progress=progress)


def speaker_editor_ui():
    def on_generate(spk_file, name, gender, desc):
        spk: Speaker = Speaker.from_file(spk_file)
        spk.name = name
        spk.gender = gender
        spk.describe = desc

        with tempfile.NamedTemporaryFile(delete=False, suffix=".pt") as tmp_file:
            torch.save(spk, tmp_file)
            tmp_file_path = tmp_file.name

        return tmp_file_path

    def create_test_voice_card(spk_file):
        with gr.Group():
            gr.Markdown("🎤Test voice")
            with gr.Row():
                test_voice_btn = gr.Button(
                    "Test Voice", variant="secondary", interactive=False
                )

                with gr.Column(scale=4):
                    test_text = gr.Textbox(
                        label="Test Text",
                        placeholder="Please input test text",
                        value=webui_config.localization.DEFAULT_SPEAKER_TEST_TEXT,
                    )
                    with gr.Row():
                        with gr.Column(scale=4):
                            output_audio = gr.Audio(label="Output Audio", format="mp3")

        test_voice_btn.click(
            fn=test_spk_voice,
            inputs=[spk_file, test_text],
            outputs=[output_audio],
        )

        return test_voice_btn

    has_file = gr.State(False)

    # TODO 也许需要写个说明?
    # gr.Markdown("SPEAKER_CREATOR_GUIDE")

    with gr.Row():
        with gr.Column(scale=2):
            with gr.Group():
                gr.Markdown("💼Speaker file")
                spk_file = gr.File(label="*.pt file", file_types=[".pt"])

            with gr.Group():
                gr.Markdown("ℹ️Speaker info")
                name_input = gr.Textbox(
                    label="Name",
                    placeholder="Enter speaker name",
                    value="*",
                    interactive=False,
                )
                gender_input = gr.Textbox(
                    label="Gender",
                    placeholder="Enter gender",
                    value="*",
                    interactive=False,
                )
                desc_input = gr.Textbox(
                    label="Description",
                    placeholder="Enter description",
                    value="*",
                    interactive=False,
                )
            with gr.Group():
                gr.Markdown("🔊Generate speaker.pt")
                generate_button = gr.Button("Save .pt file", interactive=False)
                output_file = gr.File(label="Save to File")
        with gr.Column(scale=5):
            btn1 = create_test_voice_card(spk_file=spk_file)
            btn2 = create_test_voice_card(spk_file=spk_file)
            btn3 = create_test_voice_card(spk_file=spk_file)
            btn4 = create_test_voice_card(spk_file=spk_file)

    generate_button.click(
        fn=on_generate,
        inputs=[spk_file, name_input, gender_input, desc_input],
        outputs=[output_file],
    )

    def spk_file_change(spk_file):
        empty = spk_file is None or spk_file == ""
        if empty:
            return [
                gr.Textbox(value="*", interactive=False),
                gr.Textbox(value="*", interactive=False),
                gr.Textbox(value="*", interactive=False),
                gr.Button(interactive=False),
                gr.Button(interactive=False),
                gr.Button(interactive=False),
                gr.Button(interactive=False),
                gr.Button(interactive=False),
            ]
        spk: Speaker = Speaker.from_file(spk_file)
        return [
            gr.Textbox(value=spk.name, interactive=True),
            gr.Textbox(value=spk.gender, interactive=True),
            gr.Textbox(value=spk.describe, interactive=True),
            gr.Button(interactive=True),
            gr.Button(interactive=True),
            gr.Button(interactive=True),
            gr.Button(interactive=True),
            gr.Button(interactive=True),
        ]

    spk_file.change(
        fn=spk_file_change,
        inputs=[spk_file],
        outputs=[
            name_input,
            gender_input,
            desc_input,
            generate_button,
            btn1,
            btn2,
            btn3,
            btn4,
        ],
    )