File size: 3,008 Bytes
01e655b
02e90e4
01e655b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
02e90e4
 
 
 
01e655b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
02e90e4
01e655b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
from pydantic import BaseModel
from typing import Any, Union

import torch

from modules.speaker import Speaker, speaker_mgr


from modules.data import styles_mgr

from pydub import AudioSegment

from modules.ssml import merge_prompt


from enum import Enum


class ParamsTypeError(Exception):
    pass


class AudioFormat(str, Enum):
    mp3 = "mp3"
    wav = "wav"


class BaseResponse(BaseModel):
    message: str
    data: Any

    class Config:
        json_encoders = {
            torch.Tensor: lambda v: v.tolist(),
            Speaker: lambda v: v.to_json(),
        }


def success_response(data: Any, message: str = "Success") -> BaseResponse:
    return BaseResponse(message=message, data=data)


def wav_to_mp3(wav_data, bitrate="48k"):
    audio = AudioSegment.from_wav(
        wav_data,
    )
    return audio.export(format="mp3", bitrate=bitrate)


def to_number(value, t, default=0):
    try:
        number = t(value)
        return number
    except (ValueError, TypeError) as e:
        return default


def calc_spk_style(spk: Union[str, int], style: Union[str, int]):
    voice_attrs = {
        "spk": None,
        "seed": None,
        "prompt1": None,
        "prompt2": None,
        "prefix": None,
        "temperature": None,
    }
    params = {}

    if type(spk) == int:
        voice_attrs["spk"] = spk
    elif type(spk) == str:
        if spk.isdigit():
            voice_attrs["spk"] = int(spk)
        else:
            spker = speaker_mgr.get_speaker(spk)
            if spker:
                voice_attrs["spk"] = spker

    if type(style) == int or type(style) == float:
        raise ParamsTypeError("The style parameter cannot be a number.")
    elif type(style) == str and style != "":
        if style.isdigit():
            raise ParamsTypeError("The style parameter cannot be a number.")
        else:
            style_params = styles_mgr.find_params_by_name(style)
            for k, v in style_params.items():
                params[k] = v

    voice_attrs = {k: v for k, v in voice_attrs.items() if v is not None}

    merge_prompt(voice_attrs, params)

    voice_attrs["spk"] = params.get("spk", voice_attrs.get("spk", None))
    voice_attrs["seed"] = params.get("seed", voice_attrs.get("seed", None))
    voice_attrs["temperature"] = params.get(
        "temp", voice_attrs.get("temperature", None)
    )
    voice_attrs["prefix"] = params.get("prefix", voice_attrs.get("prefix", None))
    voice_attrs["prompt1"] = params.get("prompt1", voice_attrs.get("prompt1", None))
    voice_attrs["prompt2"] = params.get("prompt2", voice_attrs.get("prompt2", None))

    if voice_attrs.get("temperature", "") == "min":
        # ref: https://github.com/2noise/ChatTTS/issues/123#issue-2326908144
        voice_attrs["temperature"] = 0.000000000001
    if voice_attrs.get("temperature", "") == "max":
        voice_attrs["temperature"] = 1

    voice_attrs = {k: v for k, v in voice_attrs.items() if v is not None}
    # print(voice_attrs)

    return voice_attrs