Scarecrow / app.py
Rgascoin's picture
add project
f2dfae0
import cv2
import gradio as gr
import numpy as np
import os
import datetime
# Load YOLO model
net = cv2.dnn.readNetFromDarknet('yolov3.cfg', 'yolov3.weights')
# Set classes
classes = []
with open('coco.names', 'r') as f:
classes = [line.strip() for line in f.readlines()]
# Function to detect objects in a video frame
def detect_birds(video_file):
cap = cv2.VideoCapture(video_file)
frame_count = 0
output_frames = []
# Variables for object count and duration
object_counts = {class_name: 0 for class_name in classes}
object_durations = {class_name: datetime.timedelta() for class_name in classes}
last_frame_time = None
while True:
ret, frame = cap.read()
if not ret:
break
if frame is None:
continue
height, width, _ = frame.shape
# Create a blob from the frame and pass it through the network
blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416), swapRB=True, crop=False)
net.setInput(blob)
layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]
detections = net.forward(output_layers)
# Process detections
boxes = []
confidences = []
class_ids = []
for detection in detections:
for detection_result in detection:
scores = detection_result[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.5:
center_x = int(detection_result[0] * width)
center_y = int(detection_result[1] * height)
w = int(detection_result[2] * width)
h = int(detection_result[3] * height)
x = int(center_x - w / 2)
y = int(center_y - h / 2)
boxes.append([x, y, w, h])
confidences.append(float(confidence))
class_ids.append(class_id)
# Apply non-maxima suppression to eliminate redundant overlapping boxes
indices = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
# Draw bounding boxes and labels
if len(indices) > 0:
for i in indices.flatten():
x, y, w, h = boxes[i]
label = classes[class_ids[i]]
confidence = confidences[i]
cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
cv2.putText(frame, f'{label} {confidence:.2f}', (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
(0, 255, 0), 2)
# Update object count and duration
object_counts[label] += 1
if last_frame_time is not None:
duration = datetime.datetime.now() - last_frame_time
object_durations[label] += duration
last_frame_time = datetime.datetime.now()
# Save the frame with bounding boxes as an image
output_frame_path = f'output_frames/frame_{frame_count:04d}.jpg'
cv2.imwrite(output_frame_path, frame)
output_frames.append(output_frame_path)
frame_count += 1
cap.release()
# Combine the output frames into a video file
output_video_path = 'output.mp4'
if frame_count > 0:
frame = cv2.imread(output_frames[0])
if frame is not None:
height, width, _ = frame.shape
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
writer = cv2.VideoWriter(output_video_path, fourcc, 30, (width, height))
for frame_path in output_frames:
frame = cv2.imread(frame_path)
if frame is not None:
writer.write(frame)
writer.release()
else:
output_video_path = None
else:
output_video_path = None
cv2.destroyAllWindows()
# Remove the output frames directory
for frame_path in output_frames:
os.remove(frame_path)
# Format object count and duration as text
count_text = '\n'.join([f'{label}: {count}' for label, count in object_counts.items() if count > 0])
duration_text = '\n'.join([f'{label}: {str(duration).split(".")[0]}' for label, duration in object_durations.items() if duration.total_seconds() > 0])
return output_video_path, count_text, duration_text
# Create a Gradio interface
inputs = gr.inputs.Video(label='Input Video')
outputs = [
gr.outputs.Video(label='Output Video'),
gr.outputs.Textbox(label='Object Count', type='text'),
gr.outputs.Textbox(label='Duration', type='text')
]
gr.Interface(fn=detect_birds, inputs=inputs, outputs=outputs, capture_session=True, share=True).launch()