Spaces:

Matthijs
/

whisper_word_timestamps

Runtime error

App Files Files Community

Matthijs Hollemans commited on Jun 20, 2023

Commit

1aa521f

1 Parent(s): f539e6d

draw words one-by-one

Browse files

Files changed (1) hide show

app.py +11 -21

app.py CHANGED Viewed

@@ -21,16 +21,18 @@ font = ImageFont.truetype("Lato-Regular.ttf", 40)
 text_color = (255, 200, 200)
 highlight_color = (255, 255, 255)
-# checkpoint = "openai/whisper-tiny"
 # checkpoint = "openai/whisper-base"
-checkpoint = "openai/whisper-small"
 pipe = pipeline(model=checkpoint)
 # TODO: no longer need to set these manually once the models have been updated on the Hub
 # whisper-base
-# pipe.model.config.alignment_heads = [[3, 1], [4, 2], [4, 3], [4, 7], [5, 1], [5, 2], [5, 4], [5, 6]]
 # whisper-small
-pipe.model.config.alignment_heads = [[5, 3], [5, 9], [8, 0], [8, 4], [8, 7], [8, 8], [9, 0], [9, 7], [9, 9], [10, 5]]
 chunks = []
@@ -53,7 +55,6 @@ def make_frame(t):
     # for debugging: draw frame time
     #draw.text((20, 20), str(t), fill=text_color, font=font)
-    space_length = draw.textlength(" ", font)
     x = margin_left
     y = margin_top
@@ -63,24 +64,13 @@ def make_frame(t):
         if chunk_end is None: chunk_end = max_duration
         if chunk_start <= t <= chunk_end:
-            words = [x["text"] for x in chunk["words"]]
-            word_times = [x["timestamp"] for x in chunk["words"]]
-            for (word, times) in zip(words, word_times):
-                word_length = draw.textlength(word + " ", font) - space_length
-                if x + word_length >= video_width - margin_right:
-                    x = margin_left
-                    y += line_height
-                if times[0] <= t <= times[1]:
-                    color = highlight_color
-                    draw.rectangle([x, y + line_height, x + word_length, y + line_height + 4], fill=color)
-                else:
-                    color = text_color
-                draw.text((x, y), word, fill=color, font=font)
-                x += word_length + space_length
             break
     return np.array(image)

 text_color = (255, 200, 200)
 highlight_color = (255, 255, 255)
+checkpoint = "openai/whisper-tiny"
 # checkpoint = "openai/whisper-base"
+# checkpoint = "openai/whisper-small"
 pipe = pipeline(model=checkpoint)
 # TODO: no longer need to set these manually once the models have been updated on the Hub
+# whisper-tiny
+pipe.model.generation_config.alignment_heads = [[2, 2], [3, 0], [3, 2], [3, 3], [3, 4], [3, 5]]
 # whisper-base
+# pipe.model.generation_config.alignment_heads = [[3, 1], [4, 2], [4, 3], [4, 7], [5, 1], [5, 2], [5, 4], [5, 6]]
 # whisper-small
+# pipe.model.generation_config.alignment_heads = [[5, 3], [5, 9], [8, 0], [8, 4], [8, 7], [8, 8], [9, 0], [9, 7], [9, 9], [10, 5]]
 chunks = []
     # for debugging: draw frame time
     #draw.text((20, 20), str(t), fill=text_color, font=font)
     x = margin_left
     y = margin_top
         if chunk_end is None: chunk_end = max_duration
         if chunk_start <= t <= chunk_end:
+            word = chunk["text"]
+            word_length = draw.textlength(word, font)
+            x = (video_width - word_length) / 2
+            y = video_height / 2 - 20
+            draw.text((x, y), word, fill=highlight_color, font=font)
             break
     return np.array(image)