litagin commited on
Commit
f8672c1
1 Parent(s): e41623e
Files changed (3) hide show
  1. README.md +1 -1
  2. app.py +65 -0
  3. requirements.txt +3 -0
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: Japanese Parler Tts Large Demo
3
  emoji: 🏢
4
  colorFrom: gray
5
  colorTo: pink
 
1
  ---
2
+ title: Japanese Parler-TTS Large Demo
3
  emoji: 🏢
4
  colorFrom: gray
5
  colorTo: pink
app.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import spaces
3
+ import torch
4
+ from loguru import logger
5
+ from parler_tts import ParlerTTSForConditionalGeneration
6
+ from rubyinserter import add_ruby
7
+ from transformers import AutoTokenizer
8
+
9
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
10
+ repo_id = "2121-8/japanese-parler-tts-large-bate"
11
+
12
+ logger.info(f"Using device: {device}")
13
+ logger.info(f"Loading model from: {repo_id}")
14
+ model = ParlerTTSForConditionalGeneration.from_pretrained(repo_id).to(device)
15
+ logger.success("Model loaded successfully")
16
+ model.eval()
17
+ tokenizer = AutoTokenizer.from_pretrained(repo_id)
18
+
19
+
20
+ @spaces.GPU
21
+ def parler_tts(prompt: str, description: str):
22
+ logger.info(f"Prompt: {prompt}")
23
+ logger.info(f"Description: {description}")
24
+ if len(prompt) > 150:
25
+ return "Text is too long. Please keep it under 150 characters.", None
26
+ if len(description) > 300:
27
+ return "Description is too long. Please keep it under 300 characters.", None
28
+ prompt = add_ruby(prompt)
29
+ logger.info(f"Prompt with ruby: {prompt}")
30
+ input_ids = tokenizer(description, return_tensors="pt").input_ids.to(device)
31
+ prompt_input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
32
+ with torch.no_grad():
33
+ generation = model.generate(
34
+ input_ids=input_ids, prompt_input_ids=prompt_input_ids
35
+ )
36
+ audio_arr = generation.cpu().numpy().squeeze()
37
+ return "Success", (model.config.sampling_rate, audio_arr)
38
+
39
+
40
+ md = """
41
+ # Japanese Parler-TTS Large (β版) デモ
42
+
43
+ 第三者による [Japanese Parler-TTS Large (β版)](https://huggingface.co/2121-8/japanese-parler-tts-large-bate) の音声合成デモです。
44
+
45
+ - 入力文章: 150文字以内の文章を入力してください。
46
+ - 説明文章: 300文字以内の文章を入力してください。音声の特徴を説明する文章を入力します(多分)。
47
+ """
48
+
49
+ with gr.Blocks() as app:
50
+ prompt = gr.Textbox(label="入力文章")
51
+ description = gr.Textbox(
52
+ label="説明文章",
53
+ value="A female speaker with a slightly high-pitched voice delivers her words at a moderate speed with a quite monotone tone in a confined environment, resulting in a quite clear audio recording.",
54
+ )
55
+ btn = gr.Button("生成")
56
+ info_text = gr.Textbox(label="情報")
57
+ audio = gr.Audio()
58
+
59
+ btn.click(
60
+ fn=parler_tts,
61
+ inputs=[prompt, description],
62
+ outputs=[info_text, audio],
63
+ )
64
+
65
+ app.launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ git+https://github.com/huggingface/parler-tts.git
2
+ git+https://github.com/getuka/RubyInserter.git
3
+ loguru