Spaces:
Running
on
Zero
Running
on
Zero
Add .gitignore, update requirements, and implement book processing utilities
Browse files- Added .gitignore to exclude specific files and directories.
- Updated SDK version in README.md.
- Created new utility functions for processing book XML files and retrieving chapter information.
- Added mock TTS implementation for local development.
- Updated UI content with additional information and warnings.
- .gitignore +3 -0
- README.md +3 -5
- app.py +168 -15
- lib/book_utils.py +66 -0
- lib/mock_tts.py +41 -0
- lib/ui_content.py +17 -4
- parse_chapters.py +32 -0
- requirements.txt +1 -1
- texts/processed/dorian_grey.xml +0 -0
- texts/processed/time_machine.xml +0 -0
- texts/processor.py +80 -0
- voices/voices/mock_voice.pt +3 -0
.gitignore
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
dorian_grey.txt
|
2 |
+
texts/time_machine.txt
|
3 |
+
*.pyc
|
README.md
CHANGED
@@ -4,10 +4,10 @@ emoji: 🎴
|
|
4 |
colorFrom: gray
|
5 |
colorTo: purple
|
6 |
sdk: gradio
|
7 |
-
sdk_version: 5.
|
8 |
app_file: app.py
|
9 |
pinned: true
|
10 |
-
short_description: Accelerated Text-To-Speech on Kokoro-82M
|
11 |
models:
|
12 |
- hexgrad/Kokoro-82M
|
13 |
---
|
@@ -42,6 +42,4 @@ Main dependencies:
|
|
42 |
- Transformers 4.47.1
|
43 |
- HuggingFace Hub ≥0.25.1
|
44 |
|
45 |
-
For a complete list, see requirements.txt.
|
46 |
-
|
47 |
-
|
|
|
4 |
colorFrom: gray
|
5 |
colorTo: purple
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 5.10.0
|
8 |
app_file: app.py
|
9 |
pinned: true
|
10 |
+
short_description: Accelerated Text-To-Speech on Kokoro-82M
|
11 |
models:
|
12 |
- hexgrad/Kokoro-82M
|
13 |
---
|
|
|
42 |
- Transformers 4.47.1
|
43 |
- HuggingFace Hub ≥0.25.1
|
44 |
|
45 |
+
For a complete list, see requirements.txt.
|
|
|
|
app.py
CHANGED
@@ -1,14 +1,16 @@
|
|
1 |
import os
|
2 |
import gradio as gr
|
3 |
-
import spaces
|
4 |
import time
|
|
|
|
|
5 |
import matplotlib.pyplot as plt
|
6 |
import numpy as np
|
7 |
-
import
|
8 |
-
import os
|
9 |
-
from tts_model import TTSModel
|
10 |
from lib import format_audio_output
|
11 |
from lib.ui_content import header_html, demo_text_info
|
|
|
|
|
|
|
12 |
|
13 |
# Set HF_HOME for faster restarts with cached models/voices
|
14 |
os.environ["HF_HOME"] = "/data/.huggingface"
|
@@ -16,6 +18,16 @@ os.environ["HF_HOME"] = "/data/.huggingface"
|
|
16 |
# Create TTS model instance
|
17 |
model = TTSModel()
|
18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
def initialize_model():
|
20 |
"""Initialize model and get voices"""
|
21 |
if model.model is None:
|
@@ -163,6 +175,7 @@ def create_performance_plot(metrics, voice_names):
|
|
163 |
|
164 |
return fig, metrics_text
|
165 |
|
|
|
166 |
# Create Gradio interface
|
167 |
with gr.Blocks(title="Kokoro TTS Demo", css="""
|
168 |
.equal-height {
|
@@ -170,20 +183,157 @@ with gr.Blocks(title="Kokoro TTS Demo", css="""
|
|
170 |
display: flex;
|
171 |
flex-direction: column;
|
172 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
173 |
""") as demo:
|
174 |
gr.HTML(header_html)
|
175 |
|
176 |
with gr.Row():
|
177 |
-
# Column 1: Text Input
|
178 |
-
with open("the_time_machine_hgwells.txt") as f:
|
179 |
-
text = f.readlines()[:200]
|
180 |
-
text = "".join(text)
|
181 |
with gr.Column(elem_classes="equal-height"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
182 |
text_input = gr.TextArea(
|
183 |
-
label=
|
184 |
-
placeholder="Enter text here or upload a .txt file",
|
|
|
185 |
lines=10,
|
186 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
187 |
)
|
188 |
|
189 |
# Column 2: Controls
|
@@ -196,16 +346,19 @@ with gr.Blocks(title="Kokoro TTS Demo", css="""
|
|
196 |
|
197 |
def load_text_from_file(file_bytes):
|
198 |
if file_bytes is None:
|
199 |
-
return None
|
200 |
try:
|
201 |
-
|
|
|
|
|
|
|
202 |
except Exception as e:
|
203 |
raise gr.Error(f"Failed to read file: {str(e)}")
|
204 |
|
205 |
file_input.change(
|
206 |
fn=load_text_from_file,
|
207 |
inputs=[file_input],
|
208 |
-
outputs=[text_input]
|
209 |
)
|
210 |
|
211 |
with gr.Group():
|
@@ -231,7 +384,7 @@ with gr.Blocks(title="Kokoro TTS Demo", css="""
|
|
231 |
label="GPU Timeout (seconds)",
|
232 |
minimum=15,
|
233 |
maximum=120,
|
234 |
-
value=
|
235 |
step=1,
|
236 |
info="Maximum time allowed for GPU processing"
|
237 |
)
|
|
|
1 |
import os
|
2 |
import gradio as gr
|
|
|
3 |
import time
|
4 |
+
import math
|
5 |
+
import logging
|
6 |
import matplotlib.pyplot as plt
|
7 |
import numpy as np
|
8 |
+
# from lib.mock_tts import MockTTSModel
|
|
|
|
|
9 |
from lib import format_audio_output
|
10 |
from lib.ui_content import header_html, demo_text_info
|
11 |
+
from lib.book_utils import get_available_books, get_book_info, get_chapter_text
|
12 |
+
from lib.text_utils import count_tokens
|
13 |
+
from tts_model import TTSModel
|
14 |
|
15 |
# Set HF_HOME for faster restarts with cached models/voices
|
16 |
os.environ["HF_HOME"] = "/data/.huggingface"
|
|
|
18 |
# Create TTS model instance
|
19 |
model = TTSModel()
|
20 |
|
21 |
+
# Configure logging
|
22 |
+
logging.basicConfig(level=logging.DEBUG)
|
23 |
+
# Suppress matplotlib debug messages
|
24 |
+
logging.getLogger('matplotlib').setLevel(logging.WARNING)
|
25 |
+
logger = logging.getLogger(__name__)
|
26 |
+
logger.debug("Starting app initialization...")
|
27 |
+
|
28 |
+
|
29 |
+
model = TTSModel()
|
30 |
+
|
31 |
def initialize_model():
|
32 |
"""Initialize model and get voices"""
|
33 |
if model.model is None:
|
|
|
175 |
|
176 |
return fig, metrics_text
|
177 |
|
178 |
+
|
179 |
# Create Gradio interface
|
180 |
with gr.Blocks(title="Kokoro TTS Demo", css="""
|
181 |
.equal-height {
|
|
|
183 |
display: flex;
|
184 |
flex-direction: column;
|
185 |
}
|
186 |
+
.token-label {
|
187 |
+
font-size: 1rem;
|
188 |
+
margin-bottom: 0.5rem;
|
189 |
+
}
|
190 |
+
.token-count {
|
191 |
+
color: #4169e1;
|
192 |
+
}
|
193 |
+
.centered-label {
|
194 |
+
display: flex;
|
195 |
+
justify-content: center;
|
196 |
+
align-items: center;
|
197 |
+
text-align: center;
|
198 |
+
margin: 10px 0;
|
199 |
+
}
|
200 |
""") as demo:
|
201 |
gr.HTML(header_html)
|
202 |
|
203 |
with gr.Row():
|
204 |
+
# Column 1: Text Input and Book Selection
|
|
|
|
|
|
|
205 |
with gr.Column(elem_classes="equal-height"):
|
206 |
+
# Book selection
|
207 |
+
books = get_available_books()
|
208 |
+
book_dropdown = gr.Dropdown(
|
209 |
+
label="Select Book",
|
210 |
+
choices=[book['label'] for book in books],
|
211 |
+
value=books[0]['label'] if books else None,
|
212 |
+
type="value",
|
213 |
+
allow_custom_value=True
|
214 |
+
)
|
215 |
+
|
216 |
+
# Initialize chapters for first book
|
217 |
+
initial_book = books[0]['value'] if books else None
|
218 |
+
initial_chapters = []
|
219 |
+
if initial_book:
|
220 |
+
book_path = os.path.join("texts/processed", initial_book)
|
221 |
+
_, chapters = get_book_info(book_path)
|
222 |
+
initial_chapters = [ch['title'] for ch in chapters]
|
223 |
+
|
224 |
+
# Chapter selection with initial chapters
|
225 |
+
chapter_dropdown = gr.Dropdown(
|
226 |
+
label="Select Chapter",
|
227 |
+
choices=initial_chapters,
|
228 |
+
value=initial_chapters[0] if initial_chapters else None,
|
229 |
+
type="value",
|
230 |
+
allow_custom_value=True
|
231 |
+
)
|
232 |
+
lab_tps = 175
|
233 |
+
lab_rts = 50
|
234 |
+
# Text input area with initial chapter text
|
235 |
+
initial_text = ""
|
236 |
+
if initial_chapters and initial_book:
|
237 |
+
book_path = os.path.join("texts/processed", initial_book)
|
238 |
+
_, chapters = get_book_info(book_path)
|
239 |
+
if chapters:
|
240 |
+
initial_text = get_chapter_text(book_path, chapters[0]['id'])
|
241 |
+
tokens = count_tokens(initial_text)
|
242 |
+
time_estimate = math.ceil(tokens / lab_tps)
|
243 |
+
output_estimate = (time_estimate * lab_rts)//60
|
244 |
+
initial_label = f'<div class="token-label">Text to speak <span class="token-count">Estimated {output_estimate} minutes in ~{time_estimate}s</span></div>'
|
245 |
+
else:
|
246 |
+
initial_label = '<div class="token-label">Text to speak</div>'
|
247 |
+
else:
|
248 |
+
initial_label = '<div class="token-label">Text to speak</div>'
|
249 |
+
|
250 |
+
def update_text_label(text):
|
251 |
+
if not text:
|
252 |
+
return '<div class="token-label">Text to speak</div>'
|
253 |
+
tokens = count_tokens(text)
|
254 |
+
time_estimate = math.ceil(tokens / lab_tps)
|
255 |
+
output_estimate = (time_estimate * lab_rts)//60
|
256 |
+
return f'<div class="token-label">Text to speak <span class="token-count">Estimated {output_estimate} minutes in ~{time_estimate}s</span></div>'
|
257 |
+
|
258 |
+
|
259 |
text_input = gr.TextArea(
|
260 |
+
label=None,
|
261 |
+
placeholder="Enter text here, select a chapter, or upload a .txt file",
|
262 |
+
value=initial_text,
|
263 |
lines=10,
|
264 |
+
show_label=False,
|
265 |
+
show_copy_button=True # Add copy button for convenience
|
266 |
+
)
|
267 |
+
with gr.Row(equal_height=True):
|
268 |
+
with gr.Column():
|
269 |
+
label_html = gr.HTML(initial_label, elem_classes="centered-label")
|
270 |
+
# Update label whenever text changes
|
271 |
+
text_input.change(
|
272 |
+
fn=update_text_label,
|
273 |
+
inputs=[text_input],
|
274 |
+
outputs=[label_html],
|
275 |
+
trigger_mode="always_last"
|
276 |
+
)
|
277 |
+
clear_btn = gr.Button("Clear Text", variant="secondary")
|
278 |
+
|
279 |
+
def clear_text():
|
280 |
+
return "", '<div class="token-label">Text to speak</div>'
|
281 |
+
|
282 |
+
clear_btn.click(
|
283 |
+
fn=clear_text,
|
284 |
+
outputs=[text_input, label_html]
|
285 |
+
)
|
286 |
+
|
287 |
+
def update_chapters(book_name):
|
288 |
+
if not book_name:
|
289 |
+
return gr.update(choices=[], value=None), "", '<div class="token-label">Text to speak</div>'
|
290 |
+
# Find the corresponding book file
|
291 |
+
book_file = next((book['value'] for book in books if book['label'] == book_name), None)
|
292 |
+
if not book_file:
|
293 |
+
return gr.update(choices=[], value=None), "", '<div class="token-label">Text to speak</div>'
|
294 |
+
book_path = os.path.join("texts/processed", book_file)
|
295 |
+
book_title, chapters = get_book_info(book_path)
|
296 |
+
# Create simple choices list of chapter titles
|
297 |
+
chapter_choices = [ch['title'] for ch in chapters]
|
298 |
+
# Set initial chapter text when book is selected
|
299 |
+
initial_text = get_chapter_text(book_path, chapters[0]['id']) if chapters else ""
|
300 |
+
if initial_text:
|
301 |
+
tokens = count_tokens(initial_text)
|
302 |
+
time_estimate = math.ceil(tokens / 150 / 10) * 10
|
303 |
+
label = f'<div class="token-label">Text to speak <span class="token-count">({tokens} tokens, ~{time_estimate}s generation time)</span></div>'
|
304 |
+
else:
|
305 |
+
label = '<div class="token-label">Text to speak</div>'
|
306 |
+
return gr.update(choices=chapter_choices, value=chapter_choices[0] if chapter_choices else None), initial_text, label
|
307 |
+
|
308 |
+
def load_chapter_text(book_name, chapter_title):
|
309 |
+
if not book_name or not chapter_title:
|
310 |
+
return "", '<div class="token-label">Text to speak</div>'
|
311 |
+
# Find the corresponding book file
|
312 |
+
book_file = next((book['value'] for book in books if book['label'] == book_name), None)
|
313 |
+
if not book_file:
|
314 |
+
return "", '<div class="token-label">Text to speak</div>'
|
315 |
+
book_path = os.path.join("texts/processed", book_file)
|
316 |
+
# Get all chapters and find the one matching the title
|
317 |
+
_, chapters = get_book_info(book_path)
|
318 |
+
for ch in chapters:
|
319 |
+
if ch['title'] == chapter_title:
|
320 |
+
text = get_chapter_text(book_path, ch['id'])
|
321 |
+
tokens = count_tokens(text)
|
322 |
+
time_estimate = math.ceil(tokens / 150 / 10) * 10
|
323 |
+
return text, f'<div class="token-label">Text to speak <span class="token-count">({tokens} tokens, ~{time_estimate}s generation time)</span></div>'
|
324 |
+
return "", '<div class="token-label">Text to speak</div>'
|
325 |
+
|
326 |
+
# Set up event handlers for book/chapter selection
|
327 |
+
book_dropdown.change(
|
328 |
+
fn=update_chapters,
|
329 |
+
inputs=[book_dropdown],
|
330 |
+
outputs=[chapter_dropdown, text_input, label_html]
|
331 |
+
)
|
332 |
+
|
333 |
+
chapter_dropdown.change(
|
334 |
+
fn=load_chapter_text,
|
335 |
+
inputs=[book_dropdown, chapter_dropdown],
|
336 |
+
outputs=[text_input, label_html]
|
337 |
)
|
338 |
|
339 |
# Column 2: Controls
|
|
|
346 |
|
347 |
def load_text_from_file(file_bytes):
|
348 |
if file_bytes is None:
|
349 |
+
return None, '<div class="token-label">Text to speak</div>'
|
350 |
try:
|
351 |
+
text = file_bytes.decode('utf-8')
|
352 |
+
tokens = count_tokens(text)
|
353 |
+
time_estimate = math.ceil(tokens / 150 / 10) * 10 # Round up to nearest 10 seconds
|
354 |
+
return text, f'<div class="token-label">Text to speak <span class="token-count">({tokens} tokens, ~{time_estimate}s generation time)</span></div>'
|
355 |
except Exception as e:
|
356 |
raise gr.Error(f"Failed to read file: {str(e)}")
|
357 |
|
358 |
file_input.change(
|
359 |
fn=load_text_from_file,
|
360 |
inputs=[file_input],
|
361 |
+
outputs=[text_input, label_html]
|
362 |
)
|
363 |
|
364 |
with gr.Group():
|
|
|
384 |
label="GPU Timeout (seconds)",
|
385 |
minimum=15,
|
386 |
maximum=120,
|
387 |
+
value=90,
|
388 |
step=1,
|
389 |
info="Maximum time allowed for GPU processing"
|
390 |
)
|
lib/book_utils.py
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import xml.etree.ElementTree as ET
|
2 |
+
import os
|
3 |
+
from typing import Dict, List, Tuple
|
4 |
+
from .text_utils import count_tokens
|
5 |
+
import logging
|
6 |
+
|
7 |
+
logger = logging.getLogger(__name__)
|
8 |
+
|
9 |
+
def get_available_books() -> List[Dict[str, str]]:
|
10 |
+
"""Get list of available book XML files
|
11 |
+
|
12 |
+
Returns:
|
13 |
+
List of dicts with keys:
|
14 |
+
- value: filename with extension (for internal use)
|
15 |
+
- label: display name without extension
|
16 |
+
"""
|
17 |
+
processed_dir = "texts/processed"
|
18 |
+
books = []
|
19 |
+
logger.info(f"Checking directory: {processed_dir}")
|
20 |
+
for file in os.listdir(processed_dir):
|
21 |
+
logger.info(f"Found file: {file}")
|
22 |
+
if file.endswith('.xml'):
|
23 |
+
books.append({
|
24 |
+
'value': file,
|
25 |
+
'label': file[:-4] # Remove .xml extension for display
|
26 |
+
})
|
27 |
+
return books
|
28 |
+
|
29 |
+
def get_book_info(xml_path: str) -> Tuple[str, List[Dict]]:
|
30 |
+
"""Get book title and chapter information from XML file
|
31 |
+
|
32 |
+
Returns:
|
33 |
+
Tuple containing:
|
34 |
+
- Book title (str)
|
35 |
+
- List of chapter dicts with keys: id, title, text
|
36 |
+
"""
|
37 |
+
tree = ET.parse(xml_path)
|
38 |
+
root = tree.getroot()
|
39 |
+
|
40 |
+
book_title = root.get('title')
|
41 |
+
chapters = []
|
42 |
+
|
43 |
+
for chapter in root.findall('chapter'):
|
44 |
+
chapter_info = {
|
45 |
+
'id': chapter.get('id'),
|
46 |
+
'title': chapter.get('title'),
|
47 |
+
'text': chapter.text.strip() if chapter.text else ""
|
48 |
+
}
|
49 |
+
# Remove first line and strip whitespace
|
50 |
+
chapter_info['text'] = chapter_info['text'][chapter_info['text'].find("\n") + 1:].strip()
|
51 |
+
chapters.append(chapter_info)
|
52 |
+
|
53 |
+
return book_title, chapters
|
54 |
+
|
55 |
+
def get_chapter_text(xml_path: str, chapter_id: str) -> str:
|
56 |
+
"""Get text content for a specific chapter"""
|
57 |
+
_, chapters = get_book_info(xml_path)
|
58 |
+
for chapter in chapters:
|
59 |
+
if chapter['id'] == chapter_id:
|
60 |
+
return chapter['text']
|
61 |
+
return ""
|
62 |
+
|
63 |
+
def get_book_chapters(xml_path: str) -> List[Dict]:
|
64 |
+
"""Get list of chapters with id and title for dropdown"""
|
65 |
+
_, chapters = get_book_info(xml_path)
|
66 |
+
return [{'id': ch['id'], 'title': ch['title']} for ch in chapters]
|
lib/mock_tts.py
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# """Mock TTS implementation for local development"""
|
2 |
+
# import numpy as np
|
3 |
+
|
4 |
+
# class MockTTSModel:
|
5 |
+
# def __init__(self):
|
6 |
+
# self.model = None
|
7 |
+
|
8 |
+
# def initialize(self):
|
9 |
+
# """Mock initialization"""
|
10 |
+
# self.model = "mock_model"
|
11 |
+
# return True
|
12 |
+
|
13 |
+
# def list_voices(self):
|
14 |
+
# """Return mock list of voices"""
|
15 |
+
# return ["mock_voice_1", "mock_voice_2"]
|
16 |
+
|
17 |
+
# def generate_speech(self, text, voice_names, speed, gpu_timeout=90, progress_callback=None, progress_state=None, progress=None):
|
18 |
+
# """Generate mock audio data"""
|
19 |
+
# # Create mock audio data (1 second of silence)
|
20 |
+
# sample_rate = 22050
|
21 |
+
# duration = 1.0
|
22 |
+
# t = np.linspace(0, duration, int(sample_rate * duration))
|
23 |
+
# audio_array = np.zeros_like(t)
|
24 |
+
|
25 |
+
# # Mock metrics
|
26 |
+
# metrics = {
|
27 |
+
# "tokens_per_sec": [10.5, 11.2, 10.8],
|
28 |
+
# "rtf": [0.5, 0.48, 0.52],
|
29 |
+
# "total_time": 3,
|
30 |
+
# "total_tokens": 100
|
31 |
+
# }
|
32 |
+
|
33 |
+
# # Simulate progress updates
|
34 |
+
# if progress_callback and progress_state and progress:
|
35 |
+
# for i in range(3):
|
36 |
+
# progress_callback(i+1, 3, metrics["tokens_per_sec"][i],
|
37 |
+
# metrics["rtf"][i], progress_state,
|
38 |
+
# progress_state.get("start_time", 0),
|
39 |
+
# gpu_timeout, progress)
|
40 |
+
|
41 |
+
# return audio_array, duration, metrics
|
lib/ui_content.py
CHANGED
@@ -1,5 +1,18 @@
|
|
1 |
# HTML content for the header section
|
2 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
<div>
|
4 |
<!-- Top badges bar -->
|
5 |
<div style="display: flex; justify-content: flex-end; padding: 4px; gap: 8px; height: 32px; align-items: center;">
|
@@ -13,16 +26,16 @@ header_html = """
|
|
13 |
|
14 |
<div style="text-align: center; margin-bottom: 1rem;">
|
15 |
<h1 style="font-size: 1.75rem; font-weight: bold; color: #ffffff; margin-bottom: 0.5rem;">Kokoro TTS Demo</h1>
|
16 |
-
<p style="color: #d1d5db;">
|
17 |
</div>
|
18 |
|
19 |
<div style="display: flex; gap: 1rem;">
|
20 |
<div style="flex: 1; background: rgba(30, 58, 138, 0.3); border: 1px solid rgba(59, 130, 246, 0.3); padding: 0.5rem 1rem; border-radius: 6px; display: flex; align-items: center; justify-content: center;">
|
21 |
-
<span style="font-weight: 500; color: #60a5fa; text-align: center;"
|
22 |
</div>
|
23 |
|
24 |
<div style="flex: 1; background: rgba(147, 51, 234, 0.3); border: 1px solid rgba(168, 85, 247, 0.3); padding: 0.5rem 1rem; border-radius: 6px; display: flex; align-items: center; justify-content: center;">
|
25 |
-
<span style="font-weight: 500; color: #e879f9; text-align: center;"
|
26 |
</div>
|
27 |
</div>
|
28 |
</div>
|
|
|
1 |
# HTML content for the header section
|
2 |
+
|
3 |
+
header_title = """
|
4 |
+
Generates about 1 hour of audio per minute, with unexpected quality
|
5 |
+
""".strip()
|
6 |
+
|
7 |
+
time_button = """
|
8 |
+
⏱️ Small requests/Initial chunks can be slower due to warm-up
|
9 |
+
"""
|
10 |
+
|
11 |
+
warning_button = """
|
12 |
+
⚠️ 120-second maximum timeout per request
|
13 |
+
"""
|
14 |
+
|
15 |
+
header_html = f"""
|
16 |
<div>
|
17 |
<!-- Top badges bar -->
|
18 |
<div style="display: flex; justify-content: flex-end; padding: 4px; gap: 8px; height: 32px; align-items: center;">
|
|
|
26 |
|
27 |
<div style="text-align: center; margin-bottom: 1rem;">
|
28 |
<h1 style="font-size: 1.75rem; font-weight: bold; color: #ffffff; margin-bottom: 0.5rem;">Kokoro TTS Demo</h1>
|
29 |
+
<p style="color: #d1d5db;">{header_title}</p>
|
30 |
</div>
|
31 |
|
32 |
<div style="display: flex; gap: 1rem;">
|
33 |
<div style="flex: 1; background: rgba(30, 58, 138, 0.3); border: 1px solid rgba(59, 130, 246, 0.3); padding: 0.5rem 1rem; border-radius: 6px; display: flex; align-items: center; justify-content: center;">
|
34 |
+
<span style="font-weight: 500; color: #60a5fa; text-align: center;">{time_button}</span>
|
35 |
</div>
|
36 |
|
37 |
<div style="flex: 1; background: rgba(147, 51, 234, 0.3); border: 1px solid rgba(168, 85, 247, 0.3); padding: 0.5rem 1rem; border-radius: 6px; display: flex; align-items: center; justify-content: center;">
|
38 |
+
<span style="font-weight: 500; color: #e879f9; text-align: center;">{warning_button}</span>
|
39 |
</div>
|
40 |
</div>
|
41 |
</div>
|
parse_chapters.py
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import xml.etree.ElementTree as ET
|
2 |
+
|
3 |
+
def parse_chapters(xml_path):
|
4 |
+
# Parse the XML file
|
5 |
+
tree = ET.parse(xml_path)
|
6 |
+
root = tree.getroot()
|
7 |
+
|
8 |
+
# Get book title
|
9 |
+
book_title = root.get('title')
|
10 |
+
print(f"\nBook: {book_title}\n")
|
11 |
+
|
12 |
+
# Find all chapter elements
|
13 |
+
chapters = root.findall('chapter')
|
14 |
+
|
15 |
+
for chapter in chapters:
|
16 |
+
# Get chapter info
|
17 |
+
chapter_id = chapter.get('id')
|
18 |
+
chapter_title = chapter.get('title')
|
19 |
+
|
20 |
+
# Get chapter text and limit to first 100 chars
|
21 |
+
chapter_text = chapter.text.strip() if chapter.text else ""
|
22 |
+
# cut off top line and strip
|
23 |
+
chapter_text = chapter_text[chapter_text.find("\n") + 1:].strip()
|
24 |
+
|
25 |
+
preview = chapter_text[:100] + "..." if len(chapter_text) > 100 else chapter_text
|
26 |
+
|
27 |
+
print(f"=== {chapter_title} ({chapter_id}) ===")
|
28 |
+
print(f"{preview}\n")
|
29 |
+
|
30 |
+
if __name__ == "__main__":
|
31 |
+
xml_path = "texts/processed/dorian_grey.xml"
|
32 |
+
parse_chapters(xml_path)
|
requirements.txt
CHANGED
@@ -9,4 +9,4 @@ regex==2024.11.6
|
|
9 |
tiktoken==0.8.0
|
10 |
transformers==4.47.1
|
11 |
munch==4.0.0
|
12 |
-
matplotlib==3.4.3
|
|
|
9 |
tiktoken==0.8.0
|
10 |
transformers==4.47.1
|
11 |
munch==4.0.0
|
12 |
+
matplotlib==3.4.3
|
texts/processed/dorian_grey.xml
ADDED
The diff for this file is too large to render.
See raw diff
|
|
texts/processed/time_machine.xml
ADDED
The diff for this file is too large to render.
See raw diff
|
|
texts/processor.py
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# import re
|
2 |
+
# import os
|
3 |
+
# from xml.etree import ElementTree as ET
|
4 |
+
# from xml.dom import minidom
|
5 |
+
|
6 |
+
# def process_dorian_grey():
|
7 |
+
# # Create processed directory if it doesn't exist
|
8 |
+
# os.makedirs('texts/processed', exist_ok=True)
|
9 |
+
|
10 |
+
# # Read the file
|
11 |
+
# with open('texts/dorian_grey.txt', 'r', encoding='utf-8') as f:
|
12 |
+
# text = f.read()
|
13 |
+
|
14 |
+
# # Create root XML element
|
15 |
+
# root = ET.Element("book")
|
16 |
+
# root.set("title", "The Picture of Dorian Gray")
|
17 |
+
|
18 |
+
# # Split into chapters using regex
|
19 |
+
# # Look for chapter markers and keep them with the content
|
20 |
+
# chapter_pattern = r'(CHAPTER [IVXLC\d]+\..*?)(?=CHAPTER [IVXLC\d]+\.|$)'
|
21 |
+
# chapters = re.findall(chapter_pattern, text, re.DOTALL)
|
22 |
+
|
23 |
+
# # Process chapters
|
24 |
+
# for i, content in enumerate(chapters):
|
25 |
+
# # Create chapter element
|
26 |
+
# chapter = ET.SubElement(root, "chapter")
|
27 |
+
# chapter.set("id", f"chapter_{i}")
|
28 |
+
# chapter.set("title", f"Chapter {i}")
|
29 |
+
# chapter.text = content.strip()
|
30 |
+
|
31 |
+
# # Pretty print XML
|
32 |
+
# xml_str = minidom.parseString(ET.tostring(root)).toprettyxml(indent=" ")
|
33 |
+
|
34 |
+
# # Save as XML
|
35 |
+
# output_path = 'texts/processed/dorian_grey.xml'
|
36 |
+
# with open(output_path, 'w', encoding='utf-8') as f:
|
37 |
+
# f.write(xml_str)
|
38 |
+
|
39 |
+
# print(f"Processed and saved to {output_path}")
|
40 |
+
|
41 |
+
# def process_time_machine():
|
42 |
+
# # Create processed directory if it doesn't exist
|
43 |
+
# os.makedirs('texts/processed', exist_ok=True)
|
44 |
+
|
45 |
+
# # Read the file
|
46 |
+
# with open('texts/time_machine.txt', 'r', encoding='utf-8') as f:
|
47 |
+
# text = f.read()
|
48 |
+
|
49 |
+
# # Create root XML element
|
50 |
+
# root = ET.Element("book")
|
51 |
+
# root.set("title", "The Time Machine")
|
52 |
+
|
53 |
+
# # Split into chapters using 4 or more newlines as separator
|
54 |
+
# chapters = re.split(r'\n{4,}', text)
|
55 |
+
|
56 |
+
# # Track actual chapter number (no skipping)
|
57 |
+
# chapter_num = 1
|
58 |
+
|
59 |
+
# # Process chapters
|
60 |
+
# for content in chapters:
|
61 |
+
# if content.strip(): # Only process non-empty chapters
|
62 |
+
# # Create chapter element
|
63 |
+
# chapter = ET.SubElement(root, "chapter")
|
64 |
+
# chapter.set("id", f"chapter_{chapter_num-1}") # Keep 0-based ids
|
65 |
+
# chapter.set("title", f"Chapter {chapter_num}")
|
66 |
+
# chapter.text = content.strip()
|
67 |
+
# chapter_num += 1
|
68 |
+
|
69 |
+
# # Pretty print XML
|
70 |
+
# xml_str = minidom.parseString(ET.tostring(root)).toprettyxml(indent=" ")
|
71 |
+
|
72 |
+
# # Save as XML
|
73 |
+
# output_path = 'texts/processed/time_machine.xml'
|
74 |
+
# with open(output_path, 'w', encoding='utf-8') as f:
|
75 |
+
# f.write(xml_str)
|
76 |
+
|
77 |
+
# print(f"Processed and saved to {output_path}")
|
78 |
+
|
79 |
+
# if __name__ == "__main__":
|
80 |
+
# process_time_machine()
|
voices/voices/mock_voice.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:912f5af0b31abadd4c60aae1d295f9f2b05bf925b35bb1bdc8b928fbf0dc052b
|
3 |
+
size 15
|