stupidog04 commited on
Commit
38e090f
1 Parent(s): 18bc0db

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -23
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import numpy as np
2
  import PIL
3
- from PIL import Image, ImageDraw
4
  import gradio as gr
5
  import torch
6
  import easyocr
@@ -50,8 +50,9 @@ def inference(video, lang, time_step):
50
  frame_rate = vidcap.get(cv2.CAP_PROP_FPS)
51
  output_frames = []
52
  temporal_profiles = []
53
- max_boxes = 10
54
-
 
55
  # Get the positions of the largest boxes in the first frame
56
  while success and not bounds:
57
  if count == 0:
@@ -89,30 +90,42 @@ def inference(video, lang, time_step):
89
  frames_total = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
90
 
91
  # Define the codec and create VideoWriter object.
92
- temp = f"{Path(output).stem}_temp{Path(output).suffix}"
93
- output_video = cv2.VideoWriter(
94
- temp, cv2.VideoWriter_fourcc(*"mp4v"), fps, (width, height)
95
- )
96
- # output_video = cv2.VideoWriter(output, cv2.VideoWriter_fourcc(*"mp4v"), fps, (width, height))
 
 
97
  for frame in output_frames:
98
  output_video.write(frame)
 
 
 
 
 
 
 
 
 
99
  output_video.release()
100
  vidcap.release()
101
 
102
- # Compressing the video for smaller size and web compatibility.
103
- os.system(
104
- f"ffmpeg -y -i {temp} -c:v libx264 -b:v 5000k -minrate 1000k -maxrate 8000k -pass 1 -c:a aac -f mp4 /dev/null && ffmpeg -y -i {temp} -c:v libx264 -b:v 5000k -minrate 1000k -maxrate 8000k -pass 2 -c:a aac -movflags faststart {output}"
105
- )
106
- os.system(f"rm -rf {temp} ffmpeg2pass-0.log ffmpeg2pass-0.log.mbtree")
 
107
 
108
  # Format temporal profiles as a DataFrame
109
- df = pd.DataFrame(columns=["Box", "Time (s)", "Text"])
110
  for i, profile in enumerate(temporal_profiles):
111
  for t, text in profile:
112
- df = df.append({"Box": f"Box {i+1}", "Time (s)": t, "Text": text}, ignore_index=True)
113
-
114
- return output, df
115
-
116
 
117
 
118
  title = '🖼️Video to Multilingual OCR👁️Gradio'
@@ -120,8 +133,7 @@ description = 'Multilingual OCR which works conveniently on all devices in multi
120
  article = "<p style='text-align: center'></p>"
121
 
122
  examples = [
123
- #['PleaseRepeatLouder.jpg',['ja']],['ProhibitedInWhiteHouse.JPG',['en']],['BeautyIsTruthTruthisBeauty.JPG',['en']],
124
- ['20-Books.jpg',['en']],['COVID.png',['en']],['chinese.jpg',['ch_sim', 'en']],['japanese.jpg',['ja', 'en']],['Hindi.jpeg',['hi', 'en']]
125
  ]
126
 
127
  css = ".output_image, .input_image {height: 40rem !important; width: 100% !important;}"
@@ -140,19 +152,19 @@ choices = [
140
  gr.Interface(
141
  inference,
142
  [
143
- # gr.inputs.Image(type='file', label='Input Image'),
144
  gr.inputs.Video(label='Input Video'),
145
  gr.inputs.CheckboxGroup(choices, type="value", default=['en'], label='Language'),
146
  gr.inputs.Number(label='Time Step (in seconds)', default=1.0)
147
  ],
148
  [
149
  gr.outputs.Video(label='Output Video'),
150
- gr.outputs.Dataframe(headers=['Box', 'Time (s)', 'Text'])
 
151
  ],
152
  title=title,
153
  description=description,
154
  article=article,
155
- # examples=examples,
156
  css=css,
157
  enable_queue=True
158
  ).launch(debug=True)
 
1
  import numpy as np
2
  import PIL
3
+ from PIL import Image, ImageDraw, ImageFont
4
  import gradio as gr
5
  import torch
6
  import easyocr
 
50
  frame_rate = vidcap.get(cv2.CAP_PROP_FPS)
51
  output_frames = []
52
  temporal_profiles = []
53
+ max_boxes = 6
54
+ compress_mp4 = True
55
+
56
  # Get the positions of the largest boxes in the first frame
57
  while success and not bounds:
58
  if count == 0:
 
90
  frames_total = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
91
 
92
  # Define the codec and create VideoWriter object.
93
+ if compress_mp4:
94
+ temp = f"{Path(output).stem}_temp{Path(output).suffix}"
95
+ output_video = cv2.VideoWriter(
96
+ temp, cv2.VideoWriter_fourcc(*"mp4v"), fps, (width, height)
97
+ )
98
+ else:
99
+ output_video = cv2.VideoWriter(output, cv2.VideoWriter_fourcc(*"mp4v"), fps, (width, height))
100
  for frame in output_frames:
101
  output_video.write(frame)
102
+
103
+ # Draw boxes with box indices in the first frame of the output video
104
+ im = Image.fromarray(output_frames[0])
105
+ draw = ImageDraw.Draw(im)
106
+ font_size = 30
107
+ font_path = "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf"
108
+ for i, box in enumerate(largest_boxes):
109
+ draw.text((box_position(box)), f"Box {i+1}", fill='red', font=ImageFont.truetype(font_path, font_size))
110
+
111
  output_video.release()
112
  vidcap.release()
113
 
114
+ if compress_mp4:
115
+ # Compressing the video for smaller size and web compatibility.
116
+ os.system(
117
+ f"ffmpeg -y -i {temp} -c:v libx264 -b:v 5000k -minrate 1000k -maxrate 8000k -pass 1 -c:a aac -f mp4 /dev/null && ffmpeg -y -i {temp} -c:v libx264 -b:v 5000k -minrate 1000k -maxrate 8000k -pass 2 -c:a aac -movflags faststart {output}"
118
+ )
119
+ os.system(f"rm -rf {temp} ffmpeg2pass-0.log ffmpeg2pass-0.log.mbtree")
120
 
121
  # Format temporal profiles as a DataFrame
122
+ df_list = []
123
  for i, profile in enumerate(temporal_profiles):
124
  for t, text in profile:
125
+ df_list.append({"Box": f"Box {i+1}", "Time (s)": t, "Text": text})
126
+ df_list.append({"Box": f"", "Time (s)": "", "Text": ""})
127
+ df = pd.concat([pd.DataFrame(df_list)])
128
+ return output, im, df
129
 
130
 
131
  title = '🖼️Video to Multilingual OCR👁️Gradio'
 
133
  article = "<p style='text-align: center'></p>"
134
 
135
  examples = [
136
+ ['test.mp4',['en']]
 
137
  ]
138
 
139
  css = ".output_image, .input_image {height: 40rem !important; width: 100% !important;}"
 
152
  gr.Interface(
153
  inference,
154
  [
 
155
  gr.inputs.Video(label='Input Video'),
156
  gr.inputs.CheckboxGroup(choices, type="value", default=['en'], label='Language'),
157
  gr.inputs.Number(label='Time Step (in seconds)', default=1.0)
158
  ],
159
  [
160
  gr.outputs.Video(label='Output Video'),
161
+ gr.outputs.Image(label='Output Preview', type='numpy'),
162
+ gr.outputs.Dataframe(headers=['Box', 'Time (s)', 'Text'], type='pandas')
163
  ],
164
  title=title,
165
  description=description,
166
  article=article,
167
+ examples=examples,
168
  css=css,
169
  enable_queue=True
170
  ).launch(debug=True)