stupidog04 commited on
Commit
32f9f47
1 Parent(s): 5017f0e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -6
app.py CHANGED
@@ -7,6 +7,7 @@ import easyocr
7
  import os
8
  from pathlib import Path
9
  import cv2
 
10
 
11
 
12
  #torch.hub.download_url_to_file('https://github.com/AaronCWacker/Yggdrasil/blob/main/images/BeautyIsTruthTruthisBeauty.JPG', 'BeautyIsTruthTruthisBeauty.JPG')
@@ -26,10 +27,21 @@ def draw_boxes(image, bounds, color='yellow', width=2):
26
  draw.line([*p0, *p1, *p2, *p3, *p0], fill=color, width=width)
27
  return image
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  def inference(video, lang, time_step):
30
- # output = f"{Path(video).stem}_detected{Path(src).suffix}"
31
  output = 'results.mp4'
32
-
33
  reader = easyocr.Reader(lang)
34
  bounds = []
35
  vidcap = cv2.VideoCapture(video)
@@ -37,12 +49,35 @@ def inference(video, lang, time_step):
37
  count = 0
38
  frame_rate = vidcap.get(cv2.CAP_PROP_FPS)
39
  output_frames = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  while success:
41
  if count % (int(frame_rate * time_step)) == 0:
42
  bounds = reader.readtext(frame)
 
 
 
 
 
 
 
43
  im = PIL.Image.fromarray(frame)
44
- draw_boxes(im, bounds)
45
- output_frames.append(np.array(im))
46
  success, frame = vidcap.read()
47
  count += 1
48
 
@@ -69,7 +104,15 @@ def inference(video, lang, time_step):
69
  f"ffmpeg -y -i {temp} -c:v libx264 -b:v 5000k -minrate 1000k -maxrate 8000k -pass 1 -c:a aac -f mp4 /dev/null && ffmpeg -y -i {temp} -c:v libx264 -b:v 5000k -minrate 1000k -maxrate 8000k -pass 2 -c:a aac -movflags faststart {output}"
70
  )
71
  os.system(f"rm -rf {temp} ffmpeg2pass-0.log ffmpeg2pass-0.log.mbtree")
72
- return output
 
 
 
 
 
 
 
 
73
 
74
 
75
  title = '🖼️Video to Multilingual OCR👁️Gradio'
@@ -104,7 +147,7 @@ gr.Interface(
104
  ],
105
  [
106
  gr.outputs.Video(label='Output Video'),
107
- # gr.outputs.Dataframe(headers=['Text', 'Confidence'])
108
  ],
109
  title=title,
110
  description=description,
 
7
  import os
8
  from pathlib import Path
9
  import cv2
10
+ import pandas as pd
11
 
12
 
13
  #torch.hub.download_url_to_file('https://github.com/AaronCWacker/Yggdrasil/blob/main/images/BeautyIsTruthTruthisBeauty.JPG', 'BeautyIsTruthTruthisBeauty.JPG')
 
27
  draw.line([*p0, *p1, *p2, *p3, *p0], fill=color, width=width)
28
  return image
29
 
30
+ def box_size(box):
31
+ points = box[0]
32
+ if len(points) == 4:
33
+ x1, y1 = points[0]
34
+ x2, y2 = points[2]
35
+ return abs(x1 - x2) * abs(y1 - y2)
36
+ else:
37
+ return 0
38
+
39
+ def box_position(box):
40
+ return (box[0][0][0] + box[0][2][0]) / 2, (box[0][0][1] + box[0][2][1]) / 2
41
+
42
+
43
  def inference(video, lang, time_step):
 
44
  output = 'results.mp4'
 
45
  reader = easyocr.Reader(lang)
46
  bounds = []
47
  vidcap = cv2.VideoCapture(video)
 
49
  count = 0
50
  frame_rate = vidcap.get(cv2.CAP_PROP_FPS)
51
  output_frames = []
52
+ temporal_profiles = []
53
+ max_boxes = 10
54
+
55
+ # Get the positions of the largest boxes in the first frame
56
+ while success and not bounds:
57
+ if count == 0:
58
+ bounds = reader.readtext(frame)
59
+ im = PIL.Image.fromarray(frame)
60
+ im_with_boxes = draw_boxes(im, bounds)
61
+ largest_boxes = sorted(bounds, key=lambda x: box_size(x), reverse=True)[:max_boxes]
62
+ positions = [box_position(b) for b in largest_boxes]
63
+ temporal_profiles = [[] for _ in range(len(largest_boxes))]
64
+ success, frame = vidcap.read()
65
+ count += 1
66
+
67
+ # Match bboxes to position and store the text read by OCR
68
  while success:
69
  if count % (int(frame_rate * time_step)) == 0:
70
  bounds = reader.readtext(frame)
71
+ for box in bounds:
72
+ bbox_pos = box_position(box)
73
+ for i, position in enumerate(positions):
74
+ distance = np.linalg.norm(np.array(bbox_pos) - np.array(position))
75
+ if distance < 50:
76
+ temporal_profiles[i].append((count / frame_rate, box[1]))
77
+ break
78
  im = PIL.Image.fromarray(frame)
79
+ im_with_boxes = draw_boxes(im, bounds)
80
+ output_frames.append(np.array(im_with_boxes))
81
  success, frame = vidcap.read()
82
  count += 1
83
 
 
104
  f"ffmpeg -y -i {temp} -c:v libx264 -b:v 5000k -minrate 1000k -maxrate 8000k -pass 1 -c:a aac -f mp4 /dev/null && ffmpeg -y -i {temp} -c:v libx264 -b:v 5000k -minrate 1000k -maxrate 8000k -pass 2 -c:a aac -movflags faststart {output}"
105
  )
106
  os.system(f"rm -rf {temp} ffmpeg2pass-0.log ffmpeg2pass-0.log.mbtree")
107
+
108
+ # Format temporal profiles as a DataFrame
109
+ df = pd.DataFrame(columns=["Box", "Time (s)", "Text"])
110
+ for i, profile in enumerate(temporal_profiles):
111
+ for t, text in profile:
112
+ df = df.append({"Box": f"Box {i+1}", "Time (s)": t, "Text": text}, ignore_index=True)
113
+
114
+ return output, df
115
+
116
 
117
 
118
  title = '🖼️Video to Multilingual OCR👁️Gradio'
 
147
  ],
148
  [
149
  gr.outputs.Video(label='Output Video'),
150
+ gr.outputs.Dataframe(headers=['Box', 'Time (s)', 'Text'])
151
  ],
152
  title=title,
153
  description=description,