Spaces:
Runtime error
Runtime error
dragonSwing
commited on
Commit
•
e0cedf5
1
Parent(s):
54696a3
Use tqdm for processing
Browse files- app.py +3 -3
- bg_modeling.py +10 -13
- convert_to_pdf.py +22 -0
- download_video.py +4 -4
- frame_differencing.py +8 -13
- post_process.py +25 -25
- requirements.txt +3 -3
app.py
CHANGED
@@ -114,8 +114,8 @@ with gr.Blocks(css="style.css") as demo:
|
|
114 |
You can browse your video from the local file system, or enter a video URL/YouTube video link to start processing.
|
115 |
|
116 |
**Note**:
|
117 |
-
- It will take
|
118 |
-
- If the YouTube
|
119 |
- Remember to press Enter if you are using an external URL
|
120 |
""",
|
121 |
elem_id="container",
|
@@ -176,7 +176,7 @@ with gr.Blocks(css="style.css") as demo:
|
|
176 |
file_url = gr.Textbox(
|
177 |
value="",
|
178 |
label="Upload your file",
|
179 |
-
placeholder="Enter a video url or YouTube link",
|
180 |
show_label=False,
|
181 |
)
|
182 |
with gr.Column(scale=1, min_width=160):
|
|
|
114 |
You can browse your video from the local file system, or enter a video URL/YouTube video link to start processing.
|
115 |
|
116 |
**Note**:
|
117 |
+
- It will take some time to complete (~ half of the original video length), so stay tuned!
|
118 |
+
- If the YouTube video link doesn't work, you can try again later or download video to your computer and then upload it to the app
|
119 |
- Remember to press Enter if you are using an external URL
|
120 |
""",
|
121 |
elem_id="container",
|
|
|
176 |
file_url = gr.Textbox(
|
177 |
value="",
|
178 |
label="Upload your file",
|
179 |
+
placeholder="Enter a video url or YouTube video link",
|
180 |
show_label=False,
|
181 |
)
|
182 |
with gr.Column(scale=1, min_width=160):
|
bg_modeling.py
CHANGED
@@ -1,7 +1,7 @@
|
|
|
|
1 |
import os
|
2 |
-
import time
|
3 |
import sys
|
4 |
-
import
|
5 |
from utils import resize_image_frame
|
6 |
|
7 |
|
@@ -38,7 +38,9 @@ def capture_slides_bg_modeling(
|
|
38 |
print("Unable to open video file: ", video_path)
|
39 |
sys.exit()
|
40 |
|
41 |
-
|
|
|
|
|
42 |
# Loop over subsequent frames.
|
43 |
while cap.isOpened():
|
44 |
ret, frame = cap.read()
|
@@ -66,21 +68,16 @@ def capture_slides_bg_modeling(
|
|
66 |
|
67 |
png_filename = f"{screenshots_count:03}.jpg"
|
68 |
out_file_path = os.path.join(output_dir_path, png_filename)
|
69 |
-
print(f"Saving file at: {out_file_path}")
|
70 |
cv2.imwrite(out_file_path, orig_frame, [cv2.IMWRITE_JPEG_QUALITY, 75])
|
|
|
71 |
|
72 |
# p_non_zero >= MIN_PERCENT_THRESH, indicates motion/animations.
|
73 |
# Hence wait till the motion across subsequent frames has settled down.
|
74 |
elif capture_frame and p_non_zero >= MIN_PERCENT_THRESH:
|
75 |
capture_frame = False
|
|
|
|
|
76 |
|
77 |
-
|
78 |
-
|
79 |
-
print("Statistics:")
|
80 |
-
print("---" * 10)
|
81 |
-
print(f"Total Time taken: {round(end_time-start, 3)} secs")
|
82 |
-
print(f"Total Screenshots captured: {screenshots_count}")
|
83 |
-
print("---" * 10, "\n")
|
84 |
-
|
85 |
-
# Release Video Capture object.
|
86 |
cap.release()
|
|
|
1 |
+
import cv2
|
2 |
import os
|
|
|
3 |
import sys
|
4 |
+
from tqdm import tqdm
|
5 |
from utils import resize_image_frame
|
6 |
|
7 |
|
|
|
38 |
print("Unable to open video file: ", video_path)
|
39 |
sys.exit()
|
40 |
|
41 |
+
num_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
42 |
+
prog_bar = tqdm(total=num_frames)
|
43 |
+
|
44 |
# Loop over subsequent frames.
|
45 |
while cap.isOpened():
|
46 |
ret, frame = cap.read()
|
|
|
68 |
|
69 |
png_filename = f"{screenshots_count:03}.jpg"
|
70 |
out_file_path = os.path.join(output_dir_path, png_filename)
|
|
|
71 |
cv2.imwrite(out_file_path, orig_frame, [cv2.IMWRITE_JPEG_QUALITY, 75])
|
72 |
+
prog_bar.set_postfix_str(f"Total Screenshots: {screenshots_count}")
|
73 |
|
74 |
# p_non_zero >= MIN_PERCENT_THRESH, indicates motion/animations.
|
75 |
# Hence wait till the motion across subsequent frames has settled down.
|
76 |
elif capture_frame and p_non_zero >= MIN_PERCENT_THRESH:
|
77 |
capture_frame = False
|
78 |
+
|
79 |
+
prog_bar.update(1)
|
80 |
|
81 |
+
# Release progress bar and video capture object.
|
82 |
+
prog_bar.close()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
cap.release()
|
convert_to_pdf.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
from utils import convert_slides_to_pdf
|
3 |
+
|
4 |
+
if __name__ == "__main__":
|
5 |
+
parser = argparse.ArgumentParser(
|
6 |
+
description="This script is used to convert video frames into slide PDFs."
|
7 |
+
)
|
8 |
+
parser.add_argument(
|
9 |
+
"-f", "--folder", help="Path to the image folder", type=str
|
10 |
+
)
|
11 |
+
parser.add_argument(
|
12 |
+
"-o",
|
13 |
+
"--out_path",
|
14 |
+
help="Path to the output PDF file. If None, the image directory will be used to store the output file.",
|
15 |
+
type=str,
|
16 |
+
)
|
17 |
+
args = parser.parse_args()
|
18 |
+
|
19 |
+
img_dir = args.folder
|
20 |
+
output_path = args.out_path
|
21 |
+
|
22 |
+
convert_slides_to_pdf(img_dir, output_path)
|
download_video.py
CHANGED
@@ -15,8 +15,8 @@ def download_video_from_url(url, output_dir=DOWNLOAD_DIR):
|
|
15 |
|
16 |
content_type = response.headers.get("content-type")
|
17 |
if "video" not in content_type:
|
18 |
-
print("The given URL is not a valid video")
|
19 |
-
return
|
20 |
file_extension = mimetypes.guess_extension(content_type)
|
21 |
|
22 |
os.makedirs(output_dir, exist_ok=True)
|
@@ -32,7 +32,7 @@ def download_video_from_url(url, output_dir=DOWNLOAD_DIR):
|
|
32 |
|
33 |
except requests.exceptions.RequestException as e:
|
34 |
print("An error occurred while downloading the video:", str(e))
|
35 |
-
return
|
36 |
|
37 |
|
38 |
def download_video_from_youtube(url, output_dir=DOWNLOAD_DIR):
|
@@ -52,7 +52,7 @@ def download_video_from_youtube(url, output_dir=DOWNLOAD_DIR):
|
|
52 |
|
53 |
except Exception as e:
|
54 |
print("An error occurred while downloading the video:", str(e))
|
55 |
-
return
|
56 |
|
57 |
|
58 |
def download_video(url, output_dir=DOWNLOAD_DIR):
|
|
|
15 |
|
16 |
content_type = response.headers.get("content-type")
|
17 |
if "video" not in content_type:
|
18 |
+
print("The given URL is not a valid video URL")
|
19 |
+
return
|
20 |
file_extension = mimetypes.guess_extension(content_type)
|
21 |
|
22 |
os.makedirs(output_dir, exist_ok=True)
|
|
|
32 |
|
33 |
except requests.exceptions.RequestException as e:
|
34 |
print("An error occurred while downloading the video:", str(e))
|
35 |
+
return
|
36 |
|
37 |
|
38 |
def download_video_from_youtube(url, output_dir=DOWNLOAD_DIR):
|
|
|
52 |
|
53 |
except Exception as e:
|
54 |
print("An error occurred while downloading the video:", str(e))
|
55 |
+
return
|
56 |
|
57 |
|
58 |
def download_video(url, output_dir=DOWNLOAD_DIR):
|
frame_differencing.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
import cv2
|
2 |
import os
|
3 |
-
import time
|
4 |
import sys
|
|
|
5 |
|
6 |
|
7 |
def capture_slides_frame_diff(
|
@@ -24,12 +24,12 @@ def capture_slides_frame_diff(
|
|
24 |
sys.exit()
|
25 |
|
26 |
success, first_frame = cap.read()
|
|
|
|
|
27 |
|
28 |
print("Using frame differencing for Background Subtraction...")
|
29 |
print("---" * 10)
|
30 |
|
31 |
-
start = time.time()
|
32 |
-
|
33 |
# The 1st frame should always be present in the output directory.
|
34 |
# Hence capture and save the 1st frame.
|
35 |
if success:
|
@@ -42,10 +42,10 @@ def capture_slides_frame_diff(
|
|
42 |
|
43 |
filename = f"{screenshots_count:03}.jpg"
|
44 |
out_file_path = os.path.join(output_dir_path, filename)
|
45 |
-
print(f"Saving file at: {out_file_path}")
|
46 |
|
47 |
# Save frame.
|
48 |
cv2.imwrite(out_file_path, first_frame, [cv2.IMWRITE_JPEG_QUALITY, 75])
|
|
|
49 |
|
50 |
# Loop over subsequent frames.
|
51 |
while cap.isOpened():
|
@@ -79,18 +79,13 @@ def capture_slides_frame_diff(
|
|
79 |
|
80 |
filename = f"{screenshots_count:03}.jpg"
|
81 |
out_file_path = os.path.join(output_dir_path, filename)
|
82 |
-
print(f"Saving file at: {out_file_path}")
|
83 |
|
84 |
cv2.imwrite(out_file_path, frame, [cv2.IMWRITE_JPEG_QUALITY, 75])
|
|
|
85 |
|
86 |
prev_frame = curr_frame
|
|
|
87 |
|
88 |
-
|
89 |
-
|
90 |
-
print("Statistics:")
|
91 |
-
print("---" * 5)
|
92 |
-
print(f"Total Time taken: {round(end_time-start, 3)} secs")
|
93 |
-
print(f"Total Screenshots captured: {screenshots_count}")
|
94 |
-
print("---" * 10, "\n")
|
95 |
-
|
96 |
cap.release()
|
|
|
1 |
import cv2
|
2 |
import os
|
|
|
3 |
import sys
|
4 |
+
from tqdm import tqdm
|
5 |
|
6 |
|
7 |
def capture_slides_frame_diff(
|
|
|
24 |
sys.exit()
|
25 |
|
26 |
success, first_frame = cap.read()
|
27 |
+
num_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
28 |
+
prog_bar = tqdm(total=num_frames)
|
29 |
|
30 |
print("Using frame differencing for Background Subtraction...")
|
31 |
print("---" * 10)
|
32 |
|
|
|
|
|
33 |
# The 1st frame should always be present in the output directory.
|
34 |
# Hence capture and save the 1st frame.
|
35 |
if success:
|
|
|
42 |
|
43 |
filename = f"{screenshots_count:03}.jpg"
|
44 |
out_file_path = os.path.join(output_dir_path, filename)
|
|
|
45 |
|
46 |
# Save frame.
|
47 |
cv2.imwrite(out_file_path, first_frame, [cv2.IMWRITE_JPEG_QUALITY, 75])
|
48 |
+
prog_bar.update(1)
|
49 |
|
50 |
# Loop over subsequent frames.
|
51 |
while cap.isOpened():
|
|
|
79 |
|
80 |
filename = f"{screenshots_count:03}.jpg"
|
81 |
out_file_path = os.path.join(output_dir_path, filename)
|
|
|
82 |
|
83 |
cv2.imwrite(out_file_path, frame, [cv2.IMWRITE_JPEG_QUALITY, 75])
|
84 |
+
prog_bar.set_postfix_str(f"Total Screenshots: {screenshots_count}")
|
85 |
|
86 |
prev_frame = curr_frame
|
87 |
+
prog_bar.update(1)
|
88 |
|
89 |
+
# Release progress bar and video capture object.
|
90 |
+
prog_bar.close()
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
cap.release()
|
post_process.py
CHANGED
@@ -2,6 +2,7 @@ import imagehash
|
|
2 |
import os
|
3 |
from collections import deque
|
4 |
from PIL import Image
|
|
|
5 |
|
6 |
|
7 |
def find_similar_images(
|
@@ -16,31 +17,30 @@ def find_similar_images(
|
|
16 |
|
17 |
print("---" * 5, "Finding similar files", "---" * 5)
|
18 |
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
print("-----" * 10)
|
44 |
return hash_dict, duplicates
|
45 |
|
46 |
|
|
|
2 |
import os
|
3 |
from collections import deque
|
4 |
from PIL import Image
|
5 |
+
from tqdm import tqdm
|
6 |
|
7 |
|
8 |
def find_similar_images(
|
|
|
17 |
|
18 |
print("---" * 5, "Finding similar files", "---" * 5)
|
19 |
|
20 |
+
with tqdm(snapshots_files) as t:
|
21 |
+
for file in t:
|
22 |
+
read_file = Image.open(os.path.join(base_dir, file))
|
23 |
+
comp_hash = hashfunc(read_file, hash_size=hash_size)
|
24 |
+
duplicate = False
|
25 |
+
|
26 |
+
if comp_hash not in hash_dict:
|
27 |
+
hash_dict[comp_hash] = file
|
28 |
+
# Compare with hash queue to find out potential duplicates
|
29 |
+
for img_hash in hash_queue:
|
30 |
+
if img_hash - comp_hash <= threshold:
|
31 |
+
duplicate = True
|
32 |
+
break
|
33 |
+
|
34 |
+
if not duplicate:
|
35 |
+
hash_queue.append(comp_hash)
|
36 |
+
else:
|
37 |
+
duplicate = True
|
38 |
+
|
39 |
+
if duplicate:
|
40 |
+
duplicates.append(file)
|
41 |
+
num_duplicates += 1
|
42 |
+
t.set_postfix_str(f"Duplicate files: {num_duplicates}")
|
43 |
+
|
|
|
44 |
return hash_dict, duplicates
|
45 |
|
46 |
|
requirements.txt
CHANGED
@@ -1,11 +1,11 @@
|
|
1 |
-
opencv-contrib-python==4.7.0.72
|
2 |
numpy
|
3 |
Pillow
|
4 |
scipy
|
5 |
six
|
6 |
ImageHash
|
7 |
-
imutils
|
8 |
img2pdf
|
|
|
9 |
pytube
|
10 |
validators
|
11 |
-
requests
|
|
|
|
|
|
1 |
numpy
|
2 |
Pillow
|
3 |
scipy
|
4 |
six
|
5 |
ImageHash
|
|
|
6 |
img2pdf
|
7 |
+
imutils
|
8 |
pytube
|
9 |
validators
|
10 |
+
requests
|
11 |
+
tqdm
|