|
import cv2 |
|
import gradio as gr |
|
import numpy as np |
|
import os |
|
import datetime |
|
|
|
|
|
net = cv2.dnn.readNetFromDarknet('yolov3.cfg', 'yolov3.weights') |
|
|
|
|
|
classes = [] |
|
with open('coco.names', 'r') as f: |
|
classes = [line.strip() for line in f.readlines()] |
|
|
|
|
|
def detect_birds(video_file): |
|
cap = cv2.VideoCapture(video_file) |
|
frame_count = 0 |
|
output_frames = [] |
|
|
|
|
|
object_counts = {class_name: 0 for class_name in classes} |
|
object_durations = {class_name: datetime.timedelta() for class_name in classes} |
|
last_frame_time = None |
|
|
|
while True: |
|
ret, frame = cap.read() |
|
if not ret: |
|
break |
|
|
|
if frame is None: |
|
continue |
|
|
|
height, width, _ = frame.shape |
|
|
|
|
|
blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416), swapRB=True, crop=False) |
|
net.setInput(blob) |
|
layer_names = net.getLayerNames() |
|
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()] |
|
detections = net.forward(output_layers) |
|
|
|
|
|
boxes = [] |
|
confidences = [] |
|
class_ids = [] |
|
for detection in detections: |
|
for detection_result in detection: |
|
scores = detection_result[5:] |
|
class_id = np.argmax(scores) |
|
confidence = scores[class_id] |
|
|
|
if confidence > 0.5: |
|
center_x = int(detection_result[0] * width) |
|
center_y = int(detection_result[1] * height) |
|
w = int(detection_result[2] * width) |
|
h = int(detection_result[3] * height) |
|
|
|
x = int(center_x - w / 2) |
|
y = int(center_y - h / 2) |
|
|
|
boxes.append([x, y, w, h]) |
|
confidences.append(float(confidence)) |
|
class_ids.append(class_id) |
|
|
|
|
|
indices = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4) |
|
|
|
|
|
if len(indices) > 0: |
|
for i in indices.flatten(): |
|
x, y, w, h = boxes[i] |
|
label = classes[class_ids[i]] |
|
confidence = confidences[i] |
|
|
|
cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2) |
|
cv2.putText(frame, f'{label} {confidence:.2f}', (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, |
|
(0, 255, 0), 2) |
|
|
|
|
|
object_counts[label] += 1 |
|
if last_frame_time is not None: |
|
duration = datetime.datetime.now() - last_frame_time |
|
object_durations[label] += duration |
|
last_frame_time = datetime.datetime.now() |
|
|
|
|
|
output_frame_path = f'output_frames/frame_{frame_count:04d}.jpg' |
|
cv2.imwrite(output_frame_path, frame) |
|
output_frames.append(output_frame_path) |
|
|
|
frame_count += 1 |
|
|
|
cap.release() |
|
|
|
|
|
output_video_path = 'output.mp4' |
|
if frame_count > 0: |
|
frame = cv2.imread(output_frames[0]) |
|
if frame is not None: |
|
height, width, _ = frame.shape |
|
|
|
fourcc = cv2.VideoWriter_fourcc(*'mp4v') |
|
writer = cv2.VideoWriter(output_video_path, fourcc, 30, (width, height)) |
|
|
|
for frame_path in output_frames: |
|
frame = cv2.imread(frame_path) |
|
if frame is not None: |
|
writer.write(frame) |
|
|
|
writer.release() |
|
else: |
|
output_video_path = None |
|
else: |
|
output_video_path = None |
|
|
|
cv2.destroyAllWindows() |
|
|
|
|
|
for frame_path in output_frames: |
|
os.remove(frame_path) |
|
|
|
|
|
count_text = '\n'.join([f'{label}: {count}' for label, count in object_counts.items() if count > 0]) |
|
duration_text = '\n'.join([f'{label}: {str(duration).split(".")[0]}' for label, duration in object_durations.items() if duration.total_seconds() > 0]) |
|
|
|
return output_video_path, count_text, duration_text |
|
|
|
|
|
inputs = gr.inputs.Video(label='Input Video') |
|
outputs = [ |
|
gr.outputs.Video(label='Output Video'), |
|
gr.outputs.Textbox(label='Object Count', type='text'), |
|
gr.outputs.Textbox(label='Duration', type='text') |
|
] |
|
|
|
gr.Interface(fn=detect_birds, inputs=inputs, outputs=outputs, capture_session=True, share=True).launch() |
|
|