Spaces:
Runtime error
Runtime error
Matthijs Hollemans
commited on
Commit
·
1aa521f
1
Parent(s):
f539e6d
draw words one-by-one
Browse files
app.py
CHANGED
@@ -21,16 +21,18 @@ font = ImageFont.truetype("Lato-Regular.ttf", 40)
|
|
21 |
text_color = (255, 200, 200)
|
22 |
highlight_color = (255, 255, 255)
|
23 |
|
24 |
-
|
25 |
# checkpoint = "openai/whisper-base"
|
26 |
-
checkpoint = "openai/whisper-small"
|
27 |
pipe = pipeline(model=checkpoint)
|
28 |
|
29 |
# TODO: no longer need to set these manually once the models have been updated on the Hub
|
|
|
|
|
30 |
# whisper-base
|
31 |
-
# pipe.model.
|
32 |
# whisper-small
|
33 |
-
pipe.model.
|
34 |
|
35 |
chunks = []
|
36 |
|
@@ -53,7 +55,6 @@ def make_frame(t):
|
|
53 |
# for debugging: draw frame time
|
54 |
#draw.text((20, 20), str(t), fill=text_color, font=font)
|
55 |
|
56 |
-
space_length = draw.textlength(" ", font)
|
57 |
x = margin_left
|
58 |
y = margin_top
|
59 |
|
@@ -63,24 +64,13 @@ def make_frame(t):
|
|
63 |
if chunk_end is None: chunk_end = max_duration
|
64 |
|
65 |
if chunk_start <= t <= chunk_end:
|
66 |
-
|
67 |
-
|
68 |
|
69 |
-
|
70 |
-
|
71 |
-
if x + word_length >= video_width - margin_right:
|
72 |
-
x = margin_left
|
73 |
-
y += line_height
|
74 |
-
|
75 |
-
if times[0] <= t <= times[1]:
|
76 |
-
color = highlight_color
|
77 |
-
draw.rectangle([x, y + line_height, x + word_length, y + line_height + 4], fill=color)
|
78 |
-
else:
|
79 |
-
color = text_color
|
80 |
-
|
81 |
-
draw.text((x, y), word, fill=color, font=font)
|
82 |
-
x += word_length + space_length
|
83 |
|
|
|
84 |
break
|
85 |
|
86 |
return np.array(image)
|
|
|
21 |
text_color = (255, 200, 200)
|
22 |
highlight_color = (255, 255, 255)
|
23 |
|
24 |
+
checkpoint = "openai/whisper-tiny"
|
25 |
# checkpoint = "openai/whisper-base"
|
26 |
+
# checkpoint = "openai/whisper-small"
|
27 |
pipe = pipeline(model=checkpoint)
|
28 |
|
29 |
# TODO: no longer need to set these manually once the models have been updated on the Hub
|
30 |
+
# whisper-tiny
|
31 |
+
pipe.model.generation_config.alignment_heads = [[2, 2], [3, 0], [3, 2], [3, 3], [3, 4], [3, 5]]
|
32 |
# whisper-base
|
33 |
+
# pipe.model.generation_config.alignment_heads = [[3, 1], [4, 2], [4, 3], [4, 7], [5, 1], [5, 2], [5, 4], [5, 6]]
|
34 |
# whisper-small
|
35 |
+
# pipe.model.generation_config.alignment_heads = [[5, 3], [5, 9], [8, 0], [8, 4], [8, 7], [8, 8], [9, 0], [9, 7], [9, 9], [10, 5]]
|
36 |
|
37 |
chunks = []
|
38 |
|
|
|
55 |
# for debugging: draw frame time
|
56 |
#draw.text((20, 20), str(t), fill=text_color, font=font)
|
57 |
|
|
|
58 |
x = margin_left
|
59 |
y = margin_top
|
60 |
|
|
|
64 |
if chunk_end is None: chunk_end = max_duration
|
65 |
|
66 |
if chunk_start <= t <= chunk_end:
|
67 |
+
word = chunk["text"]
|
68 |
+
word_length = draw.textlength(word, font)
|
69 |
|
70 |
+
x = (video_width - word_length) / 2
|
71 |
+
y = video_height / 2 - 20
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
|
73 |
+
draw.text((x, y), word, fill=highlight_color, font=font)
|
74 |
break
|
75 |
|
76 |
return np.array(image)
|