Spaces:
Runtime error
Runtime error
stupidog04
commited on
Commit
•
1211632
1
Parent(s):
64387f8
accelerate easyocr for bbox
Browse files
app.py
CHANGED
@@ -50,32 +50,40 @@ def inference(video, lang, time_step):
|
|
50 |
frame_rate = vidcap.get(cv2.CAP_PROP_FPS)
|
51 |
output_frames = []
|
52 |
temporal_profiles = []
|
53 |
-
|
54 |
-
compress_mp4 = True
|
55 |
|
56 |
# Get the positions of the largest boxes in the first frame
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
positions = [box_position(b) for b in largest_boxes]
|
64 |
-
temporal_profiles = [[] for _ in range(len(largest_boxes))]
|
65 |
-
success, frame = vidcap.read()
|
66 |
-
count += 1
|
67 |
|
68 |
# Match bboxes to position and store the text read by OCR
|
69 |
while success:
|
70 |
if count % (int(frame_rate * time_step)) == 0:
|
71 |
-
|
72 |
-
|
|
|
|
|
|
|
|
|
73 |
bbox_pos = box_position(box)
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
im = PIL.Image.fromarray(frame)
|
80 |
im_with_boxes = draw_boxes(im, bounds)
|
81 |
output_frames.append(np.array(im_with_boxes))
|
@@ -129,7 +137,7 @@ def inference(video, lang, time_step):
|
|
129 |
|
130 |
|
131 |
title = '🖼️Video to Multilingual OCR👁️Gradio'
|
132 |
-
description = 'Multilingual OCR which works conveniently on all devices in multiple languages.
|
133 |
article = "<p style='text-align: center'></p>"
|
134 |
|
135 |
examples = [
|
|
|
50 |
frame_rate = vidcap.get(cv2.CAP_PROP_FPS)
|
51 |
output_frames = []
|
52 |
temporal_profiles = []
|
53 |
+
compress_mp4 = False
|
|
|
54 |
|
55 |
# Get the positions of the largest boxes in the first frame
|
56 |
+
bounds = reader.readtext(frame)
|
57 |
+
im = PIL.Image.fromarray(frame)
|
58 |
+
im_with_boxes = draw_boxes(im, bounds)
|
59 |
+
largest_boxes = sorted(bounds, key=lambda x: box_size(x), reverse=True)
|
60 |
+
positions = [box_position(b) for b in largest_boxes]
|
61 |
+
temporal_profiles = [[] for _ in range(len(largest_boxes))]
|
|
|
|
|
|
|
|
|
62 |
|
63 |
# Match bboxes to position and store the text read by OCR
|
64 |
while success:
|
65 |
if count % (int(frame_rate * time_step)) == 0:
|
66 |
+
if count % (int(frame_rate * time_step) * 30) == 0:
|
67 |
+
# update the largest boxes every 30 frames
|
68 |
+
bounds = reader.readtext(frame)
|
69 |
+
largest_boxes = sorted(bounds, key=lambda x: box_size(x), reverse=True)
|
70 |
+
positions = [box_position(b) for b in largest_boxes]
|
71 |
+
for i, box in enumerate(largest_boxes):
|
72 |
bbox_pos = box_position(box)
|
73 |
+
if np.linalg.norm(np.array(bbox_pos) - np.array(positions[i])) < 50:
|
74 |
+
x1, y1 = box[0][0]
|
75 |
+
x2, y2 = box[0][2]
|
76 |
+
box_width = x2 - x1
|
77 |
+
box_height = y2 - y1
|
78 |
+
ratio = 0.2
|
79 |
+
x1 = max(0, int(x1 - ratio * box_width))
|
80 |
+
x2 = min(frame.shape[1], int(x2 + ratio * box_width))
|
81 |
+
y1 = max(0, int(y1 - ratio * box_height))
|
82 |
+
y2 = min(frame.shape[0], int(y2 + ratio * box_height))
|
83 |
+
cropped_frame = frame[y1:y2, x1:x2]
|
84 |
+
text = reader.readtext(cropped_frame)
|
85 |
+
if text:
|
86 |
+
temporal_profiles[i].append((count / frame_rate, text[0][1]))
|
87 |
im = PIL.Image.fromarray(frame)
|
88 |
im_with_boxes = draw_boxes(im, bounds)
|
89 |
output_frames.append(np.array(im_with_boxes))
|
|
|
137 |
|
138 |
|
139 |
title = '🖼️Video to Multilingual OCR👁️Gradio'
|
140 |
+
description = 'Multilingual OCR which works conveniently on all devices in multiple languages.'
|
141 |
article = "<p style='text-align: center'></p>"
|
142 |
|
143 |
examples = [
|