import cv2 import gradio as gr import numpy as np import os import datetime # Load YOLO model net = cv2.dnn.readNetFromDarknet('yolov3.cfg', 'yolov3.weights') # Set classes classes = [] with open('coco.names', 'r') as f: classes = [line.strip() for line in f.readlines()] # Function to detect objects in a video frame def detect_birds(video_file): cap = cv2.VideoCapture(video_file) frame_count = 0 output_frames = [] # Variables for object count and duration object_counts = {class_name: 0 for class_name in classes} object_durations = {class_name: datetime.timedelta() for class_name in classes} last_frame_time = None while True: ret, frame = cap.read() if not ret: break if frame is None: continue height, width, _ = frame.shape # Create a blob from the frame and pass it through the network blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416), swapRB=True, crop=False) net.setInput(blob) layer_names = net.getLayerNames() output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()] detections = net.forward(output_layers) # Process detections boxes = [] confidences = [] class_ids = [] for detection in detections: for detection_result in detection: scores = detection_result[5:] class_id = np.argmax(scores) confidence = scores[class_id] if confidence > 0.5: center_x = int(detection_result[0] * width) center_y = int(detection_result[1] * height) w = int(detection_result[2] * width) h = int(detection_result[3] * height) x = int(center_x - w / 2) y = int(center_y - h / 2) boxes.append([x, y, w, h]) confidences.append(float(confidence)) class_ids.append(class_id) # Apply non-maxima suppression to eliminate redundant overlapping boxes indices = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4) # Draw bounding boxes and labels if len(indices) > 0: for i in indices.flatten(): x, y, w, h = boxes[i] label = classes[class_ids[i]] confidence = confidences[i] cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2) cv2.putText(frame, f'{label} {confidence:.2f}', (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) # Update object count and duration object_counts[label] += 1 if last_frame_time is not None: duration = datetime.datetime.now() - last_frame_time object_durations[label] += duration last_frame_time = datetime.datetime.now() # Save the frame with bounding boxes as an image output_frame_path = f'output_frames/frame_{frame_count:04d}.jpg' cv2.imwrite(output_frame_path, frame) output_frames.append(output_frame_path) frame_count += 1 cap.release() # Combine the output frames into a video file output_video_path = 'output.mp4' if frame_count > 0: frame = cv2.imread(output_frames[0]) if frame is not None: height, width, _ = frame.shape fourcc = cv2.VideoWriter_fourcc(*'mp4v') writer = cv2.VideoWriter(output_video_path, fourcc, 30, (width, height)) for frame_path in output_frames: frame = cv2.imread(frame_path) if frame is not None: writer.write(frame) writer.release() else: output_video_path = None else: output_video_path = None cv2.destroyAllWindows() # Remove the output frames directory for frame_path in output_frames: os.remove(frame_path) # Format object count and duration as text count_text = '\n'.join([f'{label}: {count}' for label, count in object_counts.items() if count > 0]) duration_text = '\n'.join([f'{label}: {str(duration).split(".")[0]}' for label, duration in object_durations.items() if duration.total_seconds() > 0]) return output_video_path, count_text, duration_text # Create a Gradio interface inputs = gr.inputs.Video(label='Input Video') outputs = [ gr.outputs.Video(label='Output Video'), gr.outputs.Textbox(label='Object Count', type='text'), gr.outputs.Textbox(label='Duration', type='text') ] gr.Interface(fn=detect_birds, inputs=inputs, outputs=outputs, capture_session=True, share=True).launch()