File size: 4,775 Bytes
ebc4336
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import gradio as gr
import torch
from modules.speaker import Speaker
from modules.hf import spaces
from modules.webui import webui_config
from modules.webui.webui_utils import tts_generate

import tempfile


@torch.inference_mode()
@spaces.GPU
def test_spk_voice(spk_file, text: str):
    if spk_file == "" or spk_file is None:
        return None
    spk = Speaker.from_file(spk_file)
    return tts_generate(
        spk=spk,
        text=text,
    )


def speaker_editor_ui():
    def on_generate(spk_file, name, gender, desc):
        spk: Speaker = Speaker.from_file(spk_file)
        spk.name = name
        spk.gender = gender
        spk.desc = desc

        with tempfile.NamedTemporaryFile(delete=False, suffix=".pt") as tmp_file:
            torch.save(spk, tmp_file)
            tmp_file_path = tmp_file.name

        return tmp_file_path

    def create_test_voice_card(spk_file):
        with gr.Group():
            gr.Markdown("🎤Test voice")
            with gr.Row():
                test_voice_btn = gr.Button(
                    "Test Voice", variant="secondary", interactive=False
                )

                with gr.Column(scale=4):
                    test_text = gr.Textbox(
                        label="Test Text",
                        placeholder="Please input test text",
                        value=webui_config.localization.DEFAULT_SPEAKER_TEST_TEXT,
                    )
                    with gr.Row():
                        with gr.Column(scale=4):
                            output_audio = gr.Audio(label="Output Audio", format="mp3")

        test_voice_btn.click(
            fn=test_spk_voice,
            inputs=[spk_file, test_text],
            outputs=[output_audio],
        )

        return test_voice_btn

    has_file = gr.State(False)

    # TODO 也许需要写个说明?
    # gr.Markdown("SPEAKER_CREATOR_GUIDE")

    with gr.Row():
        with gr.Column(scale=2):
            with gr.Group():
                gr.Markdown("💼Speaker file")
                spk_file = gr.File(label="*.pt file", file_types=[".pt"])

            with gr.Group():
                gr.Markdown("ℹ️Speaker info")
                name_input = gr.Textbox(
                    label="Name",
                    placeholder="Enter speaker name",
                    value="*",
                    interactive=False,
                )
                gender_input = gr.Textbox(
                    label="Gender",
                    placeholder="Enter gender",
                    value="*",
                    interactive=False,
                )
                desc_input = gr.Textbox(
                    label="Description",
                    placeholder="Enter description",
                    value="*",
                    interactive=False,
                )
            with gr.Group():
                gr.Markdown("🔊Generate speaker.pt")
                generate_button = gr.Button("Save .pt file", interactive=False)
                output_file = gr.File(label="Save to File")
        with gr.Column(scale=5):
            btn1 = create_test_voice_card(spk_file=spk_file)
            btn2 = create_test_voice_card(spk_file=spk_file)
            btn3 = create_test_voice_card(spk_file=spk_file)
            btn4 = create_test_voice_card(spk_file=spk_file)

    generate_button.click(
        fn=on_generate,
        inputs=[spk_file, name_input, gender_input, desc_input],
        outputs=[output_file],
    )

    def spk_file_change(spk_file):
        empty = spk_file is None or spk_file == ""
        if empty:
            return [
                gr.Textbox(value="*", interactive=False),
                gr.Textbox(value="*", interactive=False),
                gr.Textbox(value="*", interactive=False),
                gr.Button(interactive=False),
                gr.Button(interactive=False),
                gr.Button(interactive=False),
                gr.Button(interactive=False),
                gr.Button(interactive=False),
            ]
        spk: Speaker = Speaker.from_file(spk_file)
        return [
            gr.Textbox(value=spk.name, interactive=True),
            gr.Textbox(value=spk.gender, interactive=True),
            gr.Textbox(value=spk.describe, interactive=True),
            gr.Button(interactive=True),
            gr.Button(interactive=True),
            gr.Button(interactive=True),
            gr.Button(interactive=True),
            gr.Button(interactive=True),
        ]

    spk_file.change(
        fn=spk_file_change,
        inputs=[spk_file],
        outputs=[
            name_input,
            gender_input,
            desc_input,
            generate_button,
            btn1,
            btn2,
            btn3,
            btn4,
        ],
    )