Matthijs Hollemans commited on
Commit
1aa521f
·
1 Parent(s): f539e6d

draw words one-by-one

Browse files
Files changed (1) hide show
  1. app.py +11 -21
app.py CHANGED
@@ -21,16 +21,18 @@ font = ImageFont.truetype("Lato-Regular.ttf", 40)
21
  text_color = (255, 200, 200)
22
  highlight_color = (255, 255, 255)
23
 
24
- # checkpoint = "openai/whisper-tiny"
25
  # checkpoint = "openai/whisper-base"
26
- checkpoint = "openai/whisper-small"
27
  pipe = pipeline(model=checkpoint)
28
 
29
  # TODO: no longer need to set these manually once the models have been updated on the Hub
 
 
30
  # whisper-base
31
- # pipe.model.config.alignment_heads = [[3, 1], [4, 2], [4, 3], [4, 7], [5, 1], [5, 2], [5, 4], [5, 6]]
32
  # whisper-small
33
- pipe.model.config.alignment_heads = [[5, 3], [5, 9], [8, 0], [8, 4], [8, 7], [8, 8], [9, 0], [9, 7], [9, 9], [10, 5]]
34
 
35
  chunks = []
36
 
@@ -53,7 +55,6 @@ def make_frame(t):
53
  # for debugging: draw frame time
54
  #draw.text((20, 20), str(t), fill=text_color, font=font)
55
 
56
- space_length = draw.textlength(" ", font)
57
  x = margin_left
58
  y = margin_top
59
 
@@ -63,24 +64,13 @@ def make_frame(t):
63
  if chunk_end is None: chunk_end = max_duration
64
 
65
  if chunk_start <= t <= chunk_end:
66
- words = [x["text"] for x in chunk["words"]]
67
- word_times = [x["timestamp"] for x in chunk["words"]]
68
 
69
- for (word, times) in zip(words, word_times):
70
- word_length = draw.textlength(word + " ", font) - space_length
71
- if x + word_length >= video_width - margin_right:
72
- x = margin_left
73
- y += line_height
74
-
75
- if times[0] <= t <= times[1]:
76
- color = highlight_color
77
- draw.rectangle([x, y + line_height, x + word_length, y + line_height + 4], fill=color)
78
- else:
79
- color = text_color
80
-
81
- draw.text((x, y), word, fill=color, font=font)
82
- x += word_length + space_length
83
 
 
84
  break
85
 
86
  return np.array(image)
 
21
  text_color = (255, 200, 200)
22
  highlight_color = (255, 255, 255)
23
 
24
+ checkpoint = "openai/whisper-tiny"
25
  # checkpoint = "openai/whisper-base"
26
+ # checkpoint = "openai/whisper-small"
27
  pipe = pipeline(model=checkpoint)
28
 
29
  # TODO: no longer need to set these manually once the models have been updated on the Hub
30
+ # whisper-tiny
31
+ pipe.model.generation_config.alignment_heads = [[2, 2], [3, 0], [3, 2], [3, 3], [3, 4], [3, 5]]
32
  # whisper-base
33
+ # pipe.model.generation_config.alignment_heads = [[3, 1], [4, 2], [4, 3], [4, 7], [5, 1], [5, 2], [5, 4], [5, 6]]
34
  # whisper-small
35
+ # pipe.model.generation_config.alignment_heads = [[5, 3], [5, 9], [8, 0], [8, 4], [8, 7], [8, 8], [9, 0], [9, 7], [9, 9], [10, 5]]
36
 
37
  chunks = []
38
 
 
55
  # for debugging: draw frame time
56
  #draw.text((20, 20), str(t), fill=text_color, font=font)
57
 
 
58
  x = margin_left
59
  y = margin_top
60
 
 
64
  if chunk_end is None: chunk_end = max_duration
65
 
66
  if chunk_start <= t <= chunk_end:
67
+ word = chunk["text"]
68
+ word_length = draw.textlength(word, font)
69
 
70
+ x = (video_width - word_length) / 2
71
+ y = video_height / 2 - 20
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
+ draw.text((x, y), word, fill=highlight_color, font=font)
74
  break
75
 
76
  return np.array(image)