Spaces:
Sleeping
Sleeping
File size: 3,008 Bytes
01e655b 02e90e4 01e655b 02e90e4 01e655b 02e90e4 01e655b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
from pydantic import BaseModel
from typing import Any, Union
import torch
from modules.speaker import Speaker, speaker_mgr
from modules.data import styles_mgr
from pydub import AudioSegment
from modules.ssml import merge_prompt
from enum import Enum
class ParamsTypeError(Exception):
pass
class AudioFormat(str, Enum):
mp3 = "mp3"
wav = "wav"
class BaseResponse(BaseModel):
message: str
data: Any
class Config:
json_encoders = {
torch.Tensor: lambda v: v.tolist(),
Speaker: lambda v: v.to_json(),
}
def success_response(data: Any, message: str = "Success") -> BaseResponse:
return BaseResponse(message=message, data=data)
def wav_to_mp3(wav_data, bitrate="48k"):
audio = AudioSegment.from_wav(
wav_data,
)
return audio.export(format="mp3", bitrate=bitrate)
def to_number(value, t, default=0):
try:
number = t(value)
return number
except (ValueError, TypeError) as e:
return default
def calc_spk_style(spk: Union[str, int], style: Union[str, int]):
voice_attrs = {
"spk": None,
"seed": None,
"prompt1": None,
"prompt2": None,
"prefix": None,
"temperature": None,
}
params = {}
if type(spk) == int:
voice_attrs["spk"] = spk
elif type(spk) == str:
if spk.isdigit():
voice_attrs["spk"] = int(spk)
else:
spker = speaker_mgr.get_speaker(spk)
if spker:
voice_attrs["spk"] = spker
if type(style) == int or type(style) == float:
raise ParamsTypeError("The style parameter cannot be a number.")
elif type(style) == str and style != "":
if style.isdigit():
raise ParamsTypeError("The style parameter cannot be a number.")
else:
style_params = styles_mgr.find_params_by_name(style)
for k, v in style_params.items():
params[k] = v
voice_attrs = {k: v for k, v in voice_attrs.items() if v is not None}
merge_prompt(voice_attrs, params)
voice_attrs["spk"] = params.get("spk", voice_attrs.get("spk", None))
voice_attrs["seed"] = params.get("seed", voice_attrs.get("seed", None))
voice_attrs["temperature"] = params.get(
"temp", voice_attrs.get("temperature", None)
)
voice_attrs["prefix"] = params.get("prefix", voice_attrs.get("prefix", None))
voice_attrs["prompt1"] = params.get("prompt1", voice_attrs.get("prompt1", None))
voice_attrs["prompt2"] = params.get("prompt2", voice_attrs.get("prompt2", None))
if voice_attrs.get("temperature", "") == "min":
# ref: https://github.com/2noise/ChatTTS/issues/123#issue-2326908144
voice_attrs["temperature"] = 0.000000000001
if voice_attrs.get("temperature", "") == "max":
voice_attrs["temperature"] = 1
voice_attrs = {k: v for k, v in voice_attrs.items() if v is not None}
# print(voice_attrs)
return voice_attrs
|