Spaces:
Running
on
Zero
Running
on
Zero
init
Browse files- README.md +1 -1
- app.py +65 -0
- requirements.txt +3 -0
README.md
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
---
|
2 |
-
title: Japanese Parler
|
3 |
emoji: 🏢
|
4 |
colorFrom: gray
|
5 |
colorTo: pink
|
|
|
1 |
---
|
2 |
+
title: Japanese Parler-TTS Large Demo
|
3 |
emoji: 🏢
|
4 |
colorFrom: gray
|
5 |
colorTo: pink
|
app.py
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import spaces
|
3 |
+
import torch
|
4 |
+
from loguru import logger
|
5 |
+
from parler_tts import ParlerTTSForConditionalGeneration
|
6 |
+
from rubyinserter import add_ruby
|
7 |
+
from transformers import AutoTokenizer
|
8 |
+
|
9 |
+
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
10 |
+
repo_id = "2121-8/japanese-parler-tts-large-bate"
|
11 |
+
|
12 |
+
logger.info(f"Using device: {device}")
|
13 |
+
logger.info(f"Loading model from: {repo_id}")
|
14 |
+
model = ParlerTTSForConditionalGeneration.from_pretrained(repo_id).to(device)
|
15 |
+
logger.success("Model loaded successfully")
|
16 |
+
model.eval()
|
17 |
+
tokenizer = AutoTokenizer.from_pretrained(repo_id)
|
18 |
+
|
19 |
+
|
20 |
+
@spaces.GPU
|
21 |
+
def parler_tts(prompt: str, description: str):
|
22 |
+
logger.info(f"Prompt: {prompt}")
|
23 |
+
logger.info(f"Description: {description}")
|
24 |
+
if len(prompt) > 150:
|
25 |
+
return "Text is too long. Please keep it under 150 characters.", None
|
26 |
+
if len(description) > 300:
|
27 |
+
return "Description is too long. Please keep it under 300 characters.", None
|
28 |
+
prompt = add_ruby(prompt)
|
29 |
+
logger.info(f"Prompt with ruby: {prompt}")
|
30 |
+
input_ids = tokenizer(description, return_tensors="pt").input_ids.to(device)
|
31 |
+
prompt_input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
|
32 |
+
with torch.no_grad():
|
33 |
+
generation = model.generate(
|
34 |
+
input_ids=input_ids, prompt_input_ids=prompt_input_ids
|
35 |
+
)
|
36 |
+
audio_arr = generation.cpu().numpy().squeeze()
|
37 |
+
return "Success", (model.config.sampling_rate, audio_arr)
|
38 |
+
|
39 |
+
|
40 |
+
md = """
|
41 |
+
# Japanese Parler-TTS Large (β版) デモ
|
42 |
+
|
43 |
+
第三者による [Japanese Parler-TTS Large (β版)](https://huggingface.co/2121-8/japanese-parler-tts-large-bate) の音声合成デモです。
|
44 |
+
|
45 |
+
- 入力文章: 150文字以内の文章を入力してください。
|
46 |
+
- 説明文章: 300文字以内の文章を入力してください。音声の特徴を説明する文章を入力します(多分)。
|
47 |
+
"""
|
48 |
+
|
49 |
+
with gr.Blocks() as app:
|
50 |
+
prompt = gr.Textbox(label="入力文章")
|
51 |
+
description = gr.Textbox(
|
52 |
+
label="説明文章",
|
53 |
+
value="A female speaker with a slightly high-pitched voice delivers her words at a moderate speed with a quite monotone tone in a confined environment, resulting in a quite clear audio recording.",
|
54 |
+
)
|
55 |
+
btn = gr.Button("生成")
|
56 |
+
info_text = gr.Textbox(label="情報")
|
57 |
+
audio = gr.Audio()
|
58 |
+
|
59 |
+
btn.click(
|
60 |
+
fn=parler_tts,
|
61 |
+
inputs=[prompt, description],
|
62 |
+
outputs=[info_text, audio],
|
63 |
+
)
|
64 |
+
|
65 |
+
app.launch()
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
git+https://github.com/huggingface/parler-tts.git
|
2 |
+
git+https://github.com/getuka/RubyInserter.git
|
3 |
+
loguru
|