Spaces:
Runtime error
Runtime error
stupidog04
commited on
Commit
•
38e090f
1
Parent(s):
18bc0db
Update app.py
Browse files
app.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
import numpy as np
|
2 |
import PIL
|
3 |
-
from PIL import Image, ImageDraw
|
4 |
import gradio as gr
|
5 |
import torch
|
6 |
import easyocr
|
@@ -50,8 +50,9 @@ def inference(video, lang, time_step):
|
|
50 |
frame_rate = vidcap.get(cv2.CAP_PROP_FPS)
|
51 |
output_frames = []
|
52 |
temporal_profiles = []
|
53 |
-
max_boxes =
|
54 |
-
|
|
|
55 |
# Get the positions of the largest boxes in the first frame
|
56 |
while success and not bounds:
|
57 |
if count == 0:
|
@@ -89,30 +90,42 @@ def inference(video, lang, time_step):
|
|
89 |
frames_total = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
|
90 |
|
91 |
# Define the codec and create VideoWriter object.
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
|
|
|
|
97 |
for frame in output_frames:
|
98 |
output_video.write(frame)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
99 |
output_video.release()
|
100 |
vidcap.release()
|
101 |
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
|
|
107 |
|
108 |
# Format temporal profiles as a DataFrame
|
109 |
-
|
110 |
for i, profile in enumerate(temporal_profiles):
|
111 |
for t, text in profile:
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
|
117 |
|
118 |
title = '🖼️Video to Multilingual OCR👁️Gradio'
|
@@ -120,8 +133,7 @@ description = 'Multilingual OCR which works conveniently on all devices in multi
|
|
120 |
article = "<p style='text-align: center'></p>"
|
121 |
|
122 |
examples = [
|
123 |
-
|
124 |
-
['20-Books.jpg',['en']],['COVID.png',['en']],['chinese.jpg',['ch_sim', 'en']],['japanese.jpg',['ja', 'en']],['Hindi.jpeg',['hi', 'en']]
|
125 |
]
|
126 |
|
127 |
css = ".output_image, .input_image {height: 40rem !important; width: 100% !important;}"
|
@@ -140,19 +152,19 @@ choices = [
|
|
140 |
gr.Interface(
|
141 |
inference,
|
142 |
[
|
143 |
-
# gr.inputs.Image(type='file', label='Input Image'),
|
144 |
gr.inputs.Video(label='Input Video'),
|
145 |
gr.inputs.CheckboxGroup(choices, type="value", default=['en'], label='Language'),
|
146 |
gr.inputs.Number(label='Time Step (in seconds)', default=1.0)
|
147 |
],
|
148 |
[
|
149 |
gr.outputs.Video(label='Output Video'),
|
150 |
-
gr.outputs.
|
|
|
151 |
],
|
152 |
title=title,
|
153 |
description=description,
|
154 |
article=article,
|
155 |
-
|
156 |
css=css,
|
157 |
enable_queue=True
|
158 |
).launch(debug=True)
|
|
|
1 |
import numpy as np
|
2 |
import PIL
|
3 |
+
from PIL import Image, ImageDraw, ImageFont
|
4 |
import gradio as gr
|
5 |
import torch
|
6 |
import easyocr
|
|
|
50 |
frame_rate = vidcap.get(cv2.CAP_PROP_FPS)
|
51 |
output_frames = []
|
52 |
temporal_profiles = []
|
53 |
+
max_boxes = 6
|
54 |
+
compress_mp4 = True
|
55 |
+
|
56 |
# Get the positions of the largest boxes in the first frame
|
57 |
while success and not bounds:
|
58 |
if count == 0:
|
|
|
90 |
frames_total = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
|
91 |
|
92 |
# Define the codec and create VideoWriter object.
|
93 |
+
if compress_mp4:
|
94 |
+
temp = f"{Path(output).stem}_temp{Path(output).suffix}"
|
95 |
+
output_video = cv2.VideoWriter(
|
96 |
+
temp, cv2.VideoWriter_fourcc(*"mp4v"), fps, (width, height)
|
97 |
+
)
|
98 |
+
else:
|
99 |
+
output_video = cv2.VideoWriter(output, cv2.VideoWriter_fourcc(*"mp4v"), fps, (width, height))
|
100 |
for frame in output_frames:
|
101 |
output_video.write(frame)
|
102 |
+
|
103 |
+
# Draw boxes with box indices in the first frame of the output video
|
104 |
+
im = Image.fromarray(output_frames[0])
|
105 |
+
draw = ImageDraw.Draw(im)
|
106 |
+
font_size = 30
|
107 |
+
font_path = "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf"
|
108 |
+
for i, box in enumerate(largest_boxes):
|
109 |
+
draw.text((box_position(box)), f"Box {i+1}", fill='red', font=ImageFont.truetype(font_path, font_size))
|
110 |
+
|
111 |
output_video.release()
|
112 |
vidcap.release()
|
113 |
|
114 |
+
if compress_mp4:
|
115 |
+
# Compressing the video for smaller size and web compatibility.
|
116 |
+
os.system(
|
117 |
+
f"ffmpeg -y -i {temp} -c:v libx264 -b:v 5000k -minrate 1000k -maxrate 8000k -pass 1 -c:a aac -f mp4 /dev/null && ffmpeg -y -i {temp} -c:v libx264 -b:v 5000k -minrate 1000k -maxrate 8000k -pass 2 -c:a aac -movflags faststart {output}"
|
118 |
+
)
|
119 |
+
os.system(f"rm -rf {temp} ffmpeg2pass-0.log ffmpeg2pass-0.log.mbtree")
|
120 |
|
121 |
# Format temporal profiles as a DataFrame
|
122 |
+
df_list = []
|
123 |
for i, profile in enumerate(temporal_profiles):
|
124 |
for t, text in profile:
|
125 |
+
df_list.append({"Box": f"Box {i+1}", "Time (s)": t, "Text": text})
|
126 |
+
df_list.append({"Box": f"", "Time (s)": "", "Text": ""})
|
127 |
+
df = pd.concat([pd.DataFrame(df_list)])
|
128 |
+
return output, im, df
|
129 |
|
130 |
|
131 |
title = '🖼️Video to Multilingual OCR👁️Gradio'
|
|
|
133 |
article = "<p style='text-align: center'></p>"
|
134 |
|
135 |
examples = [
|
136 |
+
['test.mp4',['en']]
|
|
|
137 |
]
|
138 |
|
139 |
css = ".output_image, .input_image {height: 40rem !important; width: 100% !important;}"
|
|
|
152 |
gr.Interface(
|
153 |
inference,
|
154 |
[
|
|
|
155 |
gr.inputs.Video(label='Input Video'),
|
156 |
gr.inputs.CheckboxGroup(choices, type="value", default=['en'], label='Language'),
|
157 |
gr.inputs.Number(label='Time Step (in seconds)', default=1.0)
|
158 |
],
|
159 |
[
|
160 |
gr.outputs.Video(label='Output Video'),
|
161 |
+
gr.outputs.Image(label='Output Preview', type='numpy'),
|
162 |
+
gr.outputs.Dataframe(headers=['Box', 'Time (s)', 'Text'], type='pandas')
|
163 |
],
|
164 |
title=title,
|
165 |
description=description,
|
166 |
article=article,
|
167 |
+
examples=examples,
|
168 |
css=css,
|
169 |
enable_queue=True
|
170 |
).launch(debug=True)
|