From 7671c10434c337db162953fbca2671b0e947a759 Mon Sep 17 00:00:00 2001 From: ishan raskar <99751979+Killmonger2002@users.noreply.github.com> Date: Thu, 24 Apr 2025 14:50:25 +0530 Subject: [PATCH] added a mini code that helps track humans (ssd300) --- miniproject.py | 124 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 124 insertions(+) create mode 100644 miniproject.py diff --git a/miniproject.py b/miniproject.py new file mode 100644 index 0000000..f0e7f3b --- /dev/null +++ b/miniproject.py @@ -0,0 +1,124 @@ +import torch +import cv2 +import torchvision.transforms as T +from torchvision.models.detection import ssd300_vgg16 +import numpy as np +import time # Added for time tracking + +# Load the pre-trained SSD model with VGG16 backbone +model = ssd300_vgg16(pretrained=False) +model.load_state_dict(torch.load('ssd300_vgg16_pretrained.pth')) +model.eval() +print("Model weights loaded successfully.") + +# Set the device +device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') +model.to(device) + +# Define a transform to resize the frame for model input +input_size = (300, 300) # SSD300 expects 300x300 input +transform = T.Compose([ + T.ToPILImage(), + T.Resize(input_size), + T.ToTensor() +]) + +# Function to get detections +def get_detections(frame, original_width, original_height, threshold=0.3): + frame_transformed = transform(frame).unsqueeze(0).to(device) + with torch.no_grad(): + predictions = model(frame_transformed)[0] + + boxes = predictions['boxes'].cpu().numpy() + scores = predictions['scores'].cpu().numpy() + + high_conf_indices = scores > threshold + boxes = boxes[high_conf_indices] + + rescaled_boxes = [] + for box in boxes: + xmin = int(box[0] * original_width / input_size[0]) + ymin = int(box[1] * original_height / input_size[1]) + xmax = int(box[2] * original_width / input_size[0]) + ymax = int(box[3] * original_height / input_size[1]) + rescaled_boxes.append((xmin, ymin, xmax, ymax)) + + return rescaled_boxes, scores[high_conf_indices] + +# MultiTracker setup +multi_tracker = cv2.legacy.MultiTracker_create() +tracking = False +detected_boxes = [] # Store detected bounding boxes +tracking_data = {} # Stores entry time of tracked objects + +# Mouse callback to select bounding boxes for tracking +def select_bbox(event, x, y, flags, param): + global tracking, detected_boxes + if event == cv2.EVENT_LBUTTONDOWN: + for box in detected_boxes: + x_min, y_min, x_max, y_max = box + if x_min <= x <= x_max and y_min <= y <= y_max: + selected_box = (x_min, y_min, x_max - x_min, y_max - y_min) + tracker = cv2.legacy.TrackerCSRT_create() + multi_tracker.add(tracker, frame, selected_box) + tracking_data[selected_box] = {"start_time": time.time()} # Initialize time tracking + tracking = True + break + +# Set up video capture +video_path = 'myvideo1.mp4' +cap = cv2.VideoCapture(video_path) +cv2.namedWindow("Video") + +frame_count = 0 +while cap.isOpened(): + ret, frame = cap.read() + if not ret: + break + + if frame_count % 4 != 0: # Process every 4th frame + frame_count += 1 + continue + + original_height, original_width = frame.shape[:2] + + # Update tracking + active_boxes = [] + if tracking: + success, tracked_boxes = multi_tracker.update(frame) + for box in tracked_boxes: + x, y, w, h = [int(v) for v in box] + current_time = time.time() + box_tuple = (x, y, x + w, y + h) + + # Check if object is still within screen & time limit + if ( + x > 0 and y > 0 and (x + w) < original_width and (y + h) < original_height + and (current_time - tracking_data.get(box_tuple, {"start_time": current_time})["start_time"]) < 12 + ): + active_boxes.append(box_tuple) + cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 255), 2) # Purple for tracking + cv2.putText(frame, "Tracking", (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 255), 2) + + # Remove objects that have exceeded 12 sec or exited screen + tracking_data = {box: tracking_data.get(box, {"start_time": time.time()}) for box in active_boxes} + + # Perform detection for new objects + if not tracking or frame_count % 12 == 0: # Refresh detections periodically + detected_boxes, scores = get_detections(frame, original_width, original_height, threshold=0.3) + for box in detected_boxes: + xmin, ymin, xmax, ymax = box + cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2) # Green for detection + cv2.putText(frame, "Detection", (xmin, ymin - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) + + # Reset mouse callback to select new boxes + cv2.setMouseCallback("Video", select_bbox, detected_boxes) + + cv2.imshow("Video", frame) + if cv2.waitKey(1) & 0xFF == ord('q'): + break + + frame_count += 1 + +cap.release() +cv2.destroyAllWindows() \ No newline at end of file