diff --git a/4-Object_Detection/SSD/core/backbone.py b/4-Object_Detection/SSD/core/backbone.py new file mode 100644 index 00000000..5115df8a --- /dev/null +++ b/4-Object_Detection/SSD/core/backbone.py @@ -0,0 +1,50 @@ +#! /usr/bin/env python +# coding=utf-8 +#================================================================ +# Copyright (C) 2019 * Ltd. All rights reserved. +# +# Editor : VIM +# File name : backbone.py +# Author : YunYang1994 +# Created date: 2019-07-11 23:37:51 +# Description : +# +#================================================================ + +import tensorflow as tf + +def vgg16(input_data): + +#======================================VGG16_start=================================================== + # conv1 + conv = tf.keras.layers.Conv2D(64, 3, activation='relu', padding='same')(input_data) #conv1_1 + conv = tf.keras.layers.Conv2D(64, 3, activation='relu', padding='same')(conv) #conv1_2 + conv = tf.keras.layers.MaxPooling2D(2, strides=2, padding='same')(conv) #pool1 + + # conv2 + conv = tf.keras.layers.Conv2D(128, 3, activation='relu', padding='same')(conv)#conv2_1 + conv = tf.keras.layers.Conv2D(128, 3, activation='relu', padding='same')(conv)#conv2_2 + conv = tf.keras.layers.MaxPooling2D(2, strides=2, padding='same')(conv)#pool2 + + # conv3 + conv = tf.keras.layers.Conv2D(256, 3, activation='relu', padding='same')(conv)#conv3_1 + conv = tf.keras.layers.Conv2D(256, 3, activation='relu', padding='same')(conv)#conv3_2 + conv = tf.keras.layers.Conv2D(256, 3, activation='relu', padding='same')(conv)#conv3_3 + conv = tf.keras.layers.MaxPooling2D(2, strides=2, padding='same')(conv)#pool3 + + # conv4 + conv = tf.keras.layers.Conv2D(512, 3, activation='relu', padding='same')(conv) + conv = tf.keras.layers.Conv2D(512, 3, activation='relu', padding='same')(conv) + conv = tf.keras.layers.Conv2D(512, 3, activation='relu', padding='same')(conv) + conv4 = conv + conv = tf.keras.layers.MaxPooling2D(2, strides=2, padding='same')(conv) + + # conv5 + conv = tf.keras.layers.Conv2D(512, 3, activation='relu', padding='same')(conv) + conv = tf.keras.layers.Conv2D(512, 3, activation='relu', padding='same')(conv) + conv = tf.keras.layers.Conv2D(512, 3, activation='relu', padding='same')(conv) + conv = tf.keras.layers.MaxPooling2D(3, strides=1, padding='same')(conv) + + return conv4, conv + + diff --git a/4-Object_Detection/SSD/core/common.py b/4-Object_Detection/SSD/core/common.py new file mode 100644 index 00000000..88cc8884 --- /dev/null +++ b/4-Object_Detection/SSD/core/common.py @@ -0,0 +1,58 @@ +#! /usr/bin/env python +# coding=utf-8 +#================================================================ +# Copyright (C) 2019 * Ltd. All rights reserved. +# +# Editor : VIM +# File name : common.py +# Author : YunYang1994 +# Created date: 2019-07-11 23:12:53 +# Description : +# +#================================================================ + +import tensorflow as tf + +class BatchNormalization(tf.keras.layers.BatchNormalization): + """ + "Frozen state" and "inference mode" are two separate concepts. + `layer.trainable = False` is to freeze the layer, so the layer will use + stored moving `var` and `mean` in the "inference mode", and both `gama` + and `beta` will not be updated ! + """ + def call(self, x, training=False): + if not training: + training = tf.constant(False) + training = tf.logical_and(training, self.trainable) + return super().call(x, training) + +def convolutional(input_layer, filters_shape, downsample=False, activate=True, bn=True): + if downsample: + input_layer = tf.keras.layers.ZeroPadding2D(((1, 0), (1, 0)))(input_layer) + padding = 'valid' + strides = 2 + else: + strides = 1 + padding = 'same' + + conv = tf.keras.layers.Conv2D(filters=filters_shape[-1], kernel_size = filters_shape[0], strides=strides, padding=padding, + use_bias=not bn, kernel_regularizer=tf.keras.regularizers.l2(0.0005), + kernel_initializer=tf.random_normal_initializer(stddev=0.01), + bias_initializer=tf.constant_initializer(0.))(input_layer) + + if bn: conv = BatchNormalization()(conv) + if activate == True: conv = tf.nn.leaky_relu(conv, alpha=0.1) + + return conv + +def residual_block(input_layer, input_channel, filter_num1, filter_num2): + short_cut = input_layer + conv = convolutional(input_layer, filters_shape=(1, 1, input_channel, filter_num1)) + conv = convolutional(conv , filters_shape=(3, 3, filter_num1, filter_num2)) + + residual_output = short_cut + conv + return residual_output + +def upsample(input_layer): + return tf.image.resize(input_layer, (input_layer.shape[1] * 2, input_layer.shape[2] * 2), method='nearest') + diff --git a/4-Object_Detection/SSD/core/config.py b/4-Object_Detection/SSD/core/config.py new file mode 100644 index 00000000..f23265ca --- /dev/null +++ b/4-Object_Detection/SSD/core/config.py @@ -0,0 +1,58 @@ +#! /usr/bin/env python +# coding=utf-8 +#================================================================ +# Copyright (C) 2019 * Ltd. All rights reserved. +# +# Editor : VIM +# File name : config.py +# Author : YunYang1994 +# Created date: 2019-02-28 13:06:54 +# Description : +# +#================================================================ + +from easydict import EasyDict as edict + + +__C = edict() +# Consumers can get config by: from config import cfg + +cfg = __C + +# YOLO options +__C.YOLO = edict() + +# Set the class name +__C.YOLO.CLASSES = "./data/classes/coco.names" +__C.YOLO.ANCHORS = "./data/anchors/basline_anchors.txt" +__C.YOLO.STRIDES = [8, 16, 32] +__C.YOLO.ANCHOR_PER_SCALE = 3 +__C.YOLO.IOU_LOSS_THRESH = 0.5 + +# Train options +__C.TRAIN = edict() + +__C.TRAIN.ANNOT_PATH = "./data/dataset/yymnist_train.txt" +__C.TRAIN.BATCH_SIZE = 4 +# __C.TRAIN.INPUT_SIZE = [320, 352, 384, 416, 448, 480, 512, 544, 576, 608] +__C.TRAIN.INPUT_SIZE = [416] +__C.TRAIN.DATA_AUG = True +__C.TRAIN.LR_INIT = 1e-3 +__C.TRAIN.LR_END = 1e-6 +__C.TRAIN.WARMUP_EPOCHS = 2 +__C.TRAIN.EPOCHS = 30 + + + +# TEST options +__C.TEST = edict() + +__C.TEST.ANNOT_PATH = "./data/dataset/yymnist_test.txt" +__C.TEST.BATCH_SIZE = 2 +__C.TEST.INPUT_SIZE = 544 +__C.TEST.DATA_AUG = False +__C.TEST.DECTECTED_IMAGE_PATH = "./data/detection/" +__C.TEST.SCORE_THRESHOLD = 0.3 +__C.TEST.IOU_THRESHOLD = 0.45 + + diff --git a/4-Object_Detection/SSD/core/dataset.py b/4-Object_Detection/SSD/core/dataset.py new file mode 100644 index 00000000..f2708d8b --- /dev/null +++ b/4-Object_Detection/SSD/core/dataset.py @@ -0,0 +1,265 @@ +#! /usr/bin/env python +# coding=utf-8 +#================================================================ +# Copyright (C) 2019 * Ltd. All rights reserved. +# +# Editor : VIM +# File name : dataset.py +# Author : YunYang1994 +# Created date: 2019-03-15 18:05:03 +# Description : +# +#================================================================ + +import os +import cv2 +import random +import numpy as np +import tensorflow as tf +import core.utils as utils +from core.config import cfg + + + +class Dataset(object): + """implement Dataset here""" + def __init__(self, dataset_type): + self.annot_path = cfg.TRAIN.ANNOT_PATH if dataset_type == 'train' else cfg.TEST.ANNOT_PATH + self.input_sizes = cfg.TRAIN.INPUT_SIZE if dataset_type == 'train' else cfg.TEST.INPUT_SIZE + self.batch_size = cfg.TRAIN.BATCH_SIZE if dataset_type == 'train' else cfg.TEST.BATCH_SIZE + self.data_aug = cfg.TRAIN.DATA_AUG if dataset_type == 'train' else cfg.TEST.DATA_AUG + + self.train_input_sizes = cfg.TRAIN.INPUT_SIZE + self.strides = np.array(cfg.YOLO.STRIDES) + self.classes = utils.read_class_names(cfg.YOLO.CLASSES) + self.num_classes = len(self.classes) + self.anchors = np.array(utils.get_anchors(cfg.YOLO.ANCHORS)) + self.anchor_per_scale = cfg.YOLO.ANCHOR_PER_SCALE + self.max_bbox_per_scale = 150 + + self.annotations = self.load_annotations(dataset_type) + self.num_samples = len(self.annotations) + self.num_batchs = int(np.ceil(self.num_samples / self.batch_size)) + self.batch_count = 0 + + + def load_annotations(self, dataset_type): + with open(self.annot_path, 'r') as f: + txt = f.readlines() + annotations = [line.strip() for line in txt if len(line.strip().split()[1:]) != 0] + np.random.shuffle(annotations) + return annotations + + def __iter__(self): + return self + + def __next__(self): + + with tf.device('/cpu:0'): + self.train_input_size = random.choice(self.train_input_sizes) + self.train_output_sizes = self.train_input_size // self.strides + + batch_image = np.zeros((self.batch_size, self.train_input_size, self.train_input_size, 3), dtype=np.float32) + + batch_label_sbbox = np.zeros((self.batch_size, self.train_output_sizes[0], self.train_output_sizes[0], + self.anchor_per_scale, 5 + self.num_classes), dtype=np.float32) + batch_label_mbbox = np.zeros((self.batch_size, self.train_output_sizes[1], self.train_output_sizes[1], + self.anchor_per_scale, 5 + self.num_classes), dtype=np.float32) + batch_label_lbbox = np.zeros((self.batch_size, self.train_output_sizes[2], self.train_output_sizes[2], + self.anchor_per_scale, 5 + self.num_classes), dtype=np.float32) + + batch_sbboxes = np.zeros((self.batch_size, self.max_bbox_per_scale, 4), dtype=np.float32) + batch_mbboxes = np.zeros((self.batch_size, self.max_bbox_per_scale, 4), dtype=np.float32) + batch_lbboxes = np.zeros((self.batch_size, self.max_bbox_per_scale, 4), dtype=np.float32) + + num = 0 + if self.batch_count < self.num_batchs: + while num < self.batch_size: + index = self.batch_count * self.batch_size + num + if index >= self.num_samples: index -= self.num_samples + annotation = self.annotations[index] + image, bboxes = self.parse_annotation(annotation) + label_sbbox, label_mbbox, label_lbbox, sbboxes, mbboxes, lbboxes = self.preprocess_true_boxes(bboxes) + + batch_image[num, :, :, :] = image + batch_label_sbbox[num, :, :, :, :] = label_sbbox + batch_label_mbbox[num, :, :, :, :] = label_mbbox + batch_label_lbbox[num, :, :, :, :] = label_lbbox + batch_sbboxes[num, :, :] = sbboxes + batch_mbboxes[num, :, :] = mbboxes + batch_lbboxes[num, :, :] = lbboxes + num += 1 + self.batch_count += 1 + batch_smaller_target = batch_label_sbbox, batch_sbboxes + batch_medium_target = batch_label_mbbox, batch_mbboxes + batch_larger_target = batch_label_lbbox, batch_lbboxes + + return batch_image, (batch_smaller_target, batch_medium_target, batch_larger_target) + else: + self.batch_count = 0 + np.random.shuffle(self.annotations) + raise StopIteration + + def random_horizontal_flip(self, image, bboxes): + + if random.random() < 0.5: + _, w, _ = image.shape + image = image[:, ::-1, :] + bboxes[:, [0,2]] = w - bboxes[:, [2,0]] + + return image, bboxes + + def random_crop(self, image, bboxes): + + if random.random() < 0.5: + h, w, _ = image.shape + max_bbox = np.concatenate([np.min(bboxes[:, 0:2], axis=0), np.max(bboxes[:, 2:4], axis=0)], axis=-1) + + max_l_trans = max_bbox[0] + max_u_trans = max_bbox[1] + max_r_trans = w - max_bbox[2] + max_d_trans = h - max_bbox[3] + + crop_xmin = max(0, int(max_bbox[0] - random.uniform(0, max_l_trans))) + crop_ymin = max(0, int(max_bbox[1] - random.uniform(0, max_u_trans))) + crop_xmax = max(w, int(max_bbox[2] + random.uniform(0, max_r_trans))) + crop_ymax = max(h, int(max_bbox[3] + random.uniform(0, max_d_trans))) + + image = image[crop_ymin : crop_ymax, crop_xmin : crop_xmax] + + bboxes[:, [0, 2]] = bboxes[:, [0, 2]] - crop_xmin + bboxes[:, [1, 3]] = bboxes[:, [1, 3]] - crop_ymin + + return image, bboxes + + def random_translate(self, image, bboxes): + + if random.random() < 0.5: + h, w, _ = image.shape + max_bbox = np.concatenate([np.min(bboxes[:, 0:2], axis=0), np.max(bboxes[:, 2:4], axis=0)], axis=-1) + + max_l_trans = max_bbox[0] + max_u_trans = max_bbox[1] + max_r_trans = w - max_bbox[2] + max_d_trans = h - max_bbox[3] + + tx = random.uniform(-(max_l_trans - 1), (max_r_trans - 1)) + ty = random.uniform(-(max_u_trans - 1), (max_d_trans - 1)) + + M = np.array([[1, 0, tx], [0, 1, ty]]) + image = cv2.warpAffine(image, M, (w, h)) + + bboxes[:, [0, 2]] = bboxes[:, [0, 2]] + tx + bboxes[:, [1, 3]] = bboxes[:, [1, 3]] + ty + + return image, bboxes + + def parse_annotation(self, annotation): + + line = annotation.split() + image_path = line[0] + if not os.path.exists(image_path): + raise KeyError("%s does not exist ... " %image_path) + image = cv2.imread(image_path) + bboxes = np.array([list(map(int, box.split(','))) for box in line[1:]]) + + if self.data_aug: + image, bboxes = self.random_horizontal_flip(np.copy(image), np.copy(bboxes)) + image, bboxes = self.random_crop(np.copy(image), np.copy(bboxes)) + image, bboxes = self.random_translate(np.copy(image), np.copy(bboxes)) + + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + image, bboxes = utils.image_preporcess(np.copy(image), [self.train_input_size, self.train_input_size], np.copy(bboxes)) + return image, bboxes + + def bbox_iou(self, boxes1, boxes2): + + boxes1 = np.array(boxes1) + boxes2 = np.array(boxes2) + + boxes1_area = boxes1[..., 2] * boxes1[..., 3] + boxes2_area = boxes2[..., 2] * boxes2[..., 3] + + boxes1 = np.concatenate([boxes1[..., :2] - boxes1[..., 2:] * 0.5, + boxes1[..., :2] + boxes1[..., 2:] * 0.5], axis=-1) + boxes2 = np.concatenate([boxes2[..., :2] - boxes2[..., 2:] * 0.5, + boxes2[..., :2] + boxes2[..., 2:] * 0.5], axis=-1) + + left_up = np.maximum(boxes1[..., :2], boxes2[..., :2]) + right_down = np.minimum(boxes1[..., 2:], boxes2[..., 2:]) + + inter_section = np.maximum(right_down - left_up, 0.0) + inter_area = inter_section[..., 0] * inter_section[..., 1] + union_area = boxes1_area + boxes2_area - inter_area + + return inter_area / union_area + + def preprocess_true_boxes(self, bboxes): + + label = [np.zeros((self.train_output_sizes[i], self.train_output_sizes[i], self.anchor_per_scale, + 5 + self.num_classes)) for i in range(3)] + bboxes_xywh = [np.zeros((self.max_bbox_per_scale, 4)) for _ in range(3)] + bbox_count = np.zeros((3,)) + + for bbox in bboxes: + bbox_coor = bbox[:4] + bbox_class_ind = bbox[4] + + onehot = np.zeros(self.num_classes, dtype=np.float) + onehot[bbox_class_ind] = 1.0 + uniform_distribution = np.full(self.num_classes, 1.0 / self.num_classes) + deta = 0.01 + smooth_onehot = onehot * (1 - deta) + deta * uniform_distribution + + bbox_xywh = np.concatenate([(bbox_coor[2:] + bbox_coor[:2]) * 0.5, bbox_coor[2:] - bbox_coor[:2]], axis=-1) + bbox_xywh_scaled = 1.0 * bbox_xywh[np.newaxis, :] / self.strides[:, np.newaxis] + + iou = [] + exist_positive = False + for i in range(3): + anchors_xywh = np.zeros((self.anchor_per_scale, 4)) + anchors_xywh[:, 0:2] = np.floor(bbox_xywh_scaled[i, 0:2]).astype(np.int32) + 0.5 + anchors_xywh[:, 2:4] = self.anchors[i] + + iou_scale = self.bbox_iou(bbox_xywh_scaled[i][np.newaxis, :], anchors_xywh) + iou.append(iou_scale) + iou_mask = iou_scale > 0.3 + + if np.any(iou_mask): + xind, yind = np.floor(bbox_xywh_scaled[i, 0:2]).astype(np.int32) + + label[i][yind, xind, iou_mask, :] = 0 + label[i][yind, xind, iou_mask, 0:4] = bbox_xywh + label[i][yind, xind, iou_mask, 4:5] = 1.0 + label[i][yind, xind, iou_mask, 5:] = smooth_onehot + + bbox_ind = int(bbox_count[i] % self.max_bbox_per_scale) + bboxes_xywh[i][bbox_ind, :4] = bbox_xywh + bbox_count[i] += 1 + + exist_positive = True + + if not exist_positive: + best_anchor_ind = np.argmax(np.array(iou).reshape(-1), axis=-1) + best_detect = int(best_anchor_ind / self.anchor_per_scale) + best_anchor = int(best_anchor_ind % self.anchor_per_scale) + xind, yind = np.floor(bbox_xywh_scaled[best_detect, 0:2]).astype(np.int32) + + label[best_detect][yind, xind, best_anchor, :] = 0 + label[best_detect][yind, xind, best_anchor, 0:4] = bbox_xywh + label[best_detect][yind, xind, best_anchor, 4:5] = 1.0 + label[best_detect][yind, xind, best_anchor, 5:] = smooth_onehot + + bbox_ind = int(bbox_count[best_detect] % self.max_bbox_per_scale) + bboxes_xywh[best_detect][bbox_ind, :4] = bbox_xywh + bbox_count[best_detect] += 1 + label_sbbox, label_mbbox, label_lbbox = label + sbboxes, mbboxes, lbboxes = bboxes_xywh + return label_sbbox, label_mbbox, label_lbbox, sbboxes, mbboxes, lbboxes + + def __len__(self): + return self.num_batchs + + + + diff --git a/4-Object_Detection/SSD/core/utils.py b/4-Object_Detection/SSD/core/utils.py new file mode 100644 index 00000000..4e4b6762 --- /dev/null +++ b/4-Object_Detection/SSD/core/utils.py @@ -0,0 +1,240 @@ +#! /usr/bin/env python +# coding=utf-8 +#================================================================ +# Copyright (C) 2019 * Ltd. All rights reserved. +# +# Editor : VIM +# File name : utils.py +# Author : YunYang1994 +# Created date: 2019-07-12 01:33:38 +# Description : +# +#================================================================ + +import cv2 +import random +import colorsys +import numpy as np +from core.config import cfg + +def load_weights(model, weights_file): + """ + I agree that this code is very ugly, but I don’t know any better way of doing it. + """ + wf = open(weights_file, 'rb') + major, minor, revision, seen, _ = np.fromfile(wf, dtype=np.int32, count=5) + + j = 0 + for i in range(75): + conv_layer_name = 'conv2d_%d' %i if i > 0 else 'conv2d' + bn_layer_name = 'batch_normalization_%d' %j if j > 0 else 'batch_normalization' + + conv_layer = model.get_layer(conv_layer_name) + filters = conv_layer.filters + k_size = conv_layer.kernel_size[0] + in_dim = conv_layer.input_shape[-1] + + if i not in [58, 66, 74]: + # darknet weights: [beta, gamma, mean, variance] + bn_weights = np.fromfile(wf, dtype=np.float32, count=4 * filters) + # tf weights: [gamma, beta, mean, variance] + bn_weights = bn_weights.reshape((4, filters))[[1, 0, 2, 3]] + bn_layer = model.get_layer(bn_layer_name) + j += 1 + else: + conv_bias = np.fromfile(wf, dtype=np.float32, count=filters) + + # darknet shape (out_dim, in_dim, height, width) + conv_shape = (filters, in_dim, k_size, k_size) + conv_weights = np.fromfile(wf, dtype=np.float32, count=np.product(conv_shape)) + # tf shape (height, width, in_dim, out_dim) + conv_weights = conv_weights.reshape(conv_shape).transpose([2, 3, 1, 0]) + + if i not in [58, 66, 74]: + conv_layer.set_weights([conv_weights]) + bn_layer.set_weights(bn_weights) + else: + conv_layer.set_weights([conv_weights, conv_bias]) + + assert len(wf.read()) == 0, 'failed to read all data' + wf.close() + + +def read_class_names(class_file_name): + '''loads class name from a file''' + names = {} + with open(class_file_name, 'r') as data: + for ID, name in enumerate(data): + names[ID] = name.strip('\n') + return names + + +def get_anchors(anchors_path): + '''loads the anchors from a file''' + with open(anchors_path) as f: + anchors = f.readline() + anchors = np.array(anchors.split(','), dtype=np.float32) + return anchors.reshape(3, 3, 2) + + +def image_preporcess(image, target_size, gt_boxes=None): + + ih, iw = target_size + h, w, _ = image.shape + + scale = min(iw/w, ih/h) + nw, nh = int(scale * w), int(scale * h) + image_resized = cv2.resize(image, (nw, nh)) + + image_paded = np.full(shape=[ih, iw, 3], fill_value=128.0) + dw, dh = (iw - nw) // 2, (ih-nh) // 2 + image_paded[dh:nh+dh, dw:nw+dw, :] = image_resized + image_paded = image_paded / 255. + + if gt_boxes is None: + return image_paded + + else: + gt_boxes[:, [0, 2]] = gt_boxes[:, [0, 2]] * scale + dw + gt_boxes[:, [1, 3]] = gt_boxes[:, [1, 3]] * scale + dh + return image_paded, gt_boxes + + +def draw_bbox(image, bboxes, classes=read_class_names(cfg.YOLO.CLASSES), show_label=True): + """ + bboxes: [x_min, y_min, x_max, y_max, probability, cls_id] format coordinates. + """ + + num_classes = len(classes) + image_h, image_w, _ = image.shape + hsv_tuples = [(1.0 * x / num_classes, 1., 1.) for x in range(num_classes)] + colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) + colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors)) + + random.seed(0) + random.shuffle(colors) + random.seed(None) + + for i, bbox in enumerate(bboxes): + coor = np.array(bbox[:4], dtype=np.int32) + fontScale = 0.5 + score = bbox[4] + class_ind = int(bbox[5]) + bbox_color = colors[class_ind] + bbox_thick = int(0.6 * (image_h + image_w) / 600) + c1, c2 = (coor[0], coor[1]), (coor[2], coor[3]) + cv2.rectangle(image, c1, c2, bbox_color, bbox_thick) + + if show_label: + bbox_mess = '%s: %.2f' % (classes[class_ind], score) + t_size = cv2.getTextSize(bbox_mess, 0, fontScale, thickness=bbox_thick//2)[0] + cv2.rectangle(image, c1, (c1[0] + t_size[0], c1[1] - t_size[1] - 3), bbox_color, -1) # filled + + cv2.putText(image, bbox_mess, (c1[0], c1[1]-2), cv2.FONT_HERSHEY_SIMPLEX, + fontScale, (0, 0, 0), bbox_thick//2, lineType=cv2.LINE_AA) + + return image + + + +def bboxes_iou(boxes1, boxes2): + + boxes1 = np.array(boxes1) + boxes2 = np.array(boxes2) + + boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1]) + boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1]) + + left_up = np.maximum(boxes1[..., :2], boxes2[..., :2]) + right_down = np.minimum(boxes1[..., 2:], boxes2[..., 2:]) + + inter_section = np.maximum(right_down - left_up, 0.0) + inter_area = inter_section[..., 0] * inter_section[..., 1] + union_area = boxes1_area + boxes2_area - inter_area + ious = np.maximum(1.0 * inter_area / union_area, np.finfo(np.float32).eps) + + return ious + + +def nms(bboxes, iou_threshold, sigma=0.3, method='nms'): + """ + :param bboxes: (xmin, ymin, xmax, ymax, score, class) + + Note: soft-nms, https://arxiv.org/pdf/1704.04503.pdf + https://github.com/bharatsingh430/soft-nms + """ + classes_in_img = list(set(bboxes[:, 5])) + best_bboxes = [] + + for cls in classes_in_img: + cls_mask = (bboxes[:, 5] == cls) + cls_bboxes = bboxes[cls_mask] + + while len(cls_bboxes) > 0: + max_ind = np.argmax(cls_bboxes[:, 4]) + best_bbox = cls_bboxes[max_ind] + best_bboxes.append(best_bbox) + cls_bboxes = np.concatenate([cls_bboxes[: max_ind], cls_bboxes[max_ind + 1:]]) + iou = bboxes_iou(best_bbox[np.newaxis, :4], cls_bboxes[:, :4]) + weight = np.ones((len(iou),), dtype=np.float32) + + assert method in ['nms', 'soft-nms'] + + if method == 'nms': + iou_mask = iou > iou_threshold + weight[iou_mask] = 0.0 + + if method == 'soft-nms': + weight = np.exp(-(1.0 * iou ** 2 / sigma)) + + cls_bboxes[:, 4] = cls_bboxes[:, 4] * weight + score_mask = cls_bboxes[:, 4] > 0. + cls_bboxes = cls_bboxes[score_mask] + + return best_bboxes + + +def postprocess_boxes(pred_bbox, org_img_shape, input_size, score_threshold): + + valid_scale=[0, np.inf] + pred_bbox = np.array(pred_bbox) + + pred_xywh = pred_bbox[:, 0:4] + pred_conf = pred_bbox[:, 4] + pred_prob = pred_bbox[:, 5:] + + # # (1) (x, y, w, h) --> (xmin, ymin, xmax, ymax) + pred_coor = np.concatenate([pred_xywh[:, :2] - pred_xywh[:, 2:] * 0.5, + pred_xywh[:, :2] + pred_xywh[:, 2:] * 0.5], axis=-1) + # # (2) (xmin, ymin, xmax, ymax) -> (xmin_org, ymin_org, xmax_org, ymax_org) + org_h, org_w = org_img_shape + resize_ratio = min(input_size / org_w, input_size / org_h) + + dw = (input_size - resize_ratio * org_w) / 2 + dh = (input_size - resize_ratio * org_h) / 2 + + pred_coor[:, 0::2] = 1.0 * (pred_coor[:, 0::2] - dw) / resize_ratio + pred_coor[:, 1::2] = 1.0 * (pred_coor[:, 1::2] - dh) / resize_ratio + + # # (3) clip some boxes those are out of range + pred_coor = np.concatenate([np.maximum(pred_coor[:, :2], [0, 0]), + np.minimum(pred_coor[:, 2:], [org_w - 1, org_h - 1])], axis=-1) + invalid_mask = np.logical_or((pred_coor[:, 0] > pred_coor[:, 2]), (pred_coor[:, 1] > pred_coor[:, 3])) + pred_coor[invalid_mask] = 0 + + # # (4) discard some invalid boxes + bboxes_scale = np.sqrt(np.multiply.reduce(pred_coor[:, 2:4] - pred_coor[:, 0:2], axis=-1)) + scale_mask = np.logical_and((valid_scale[0] < bboxes_scale), (bboxes_scale < valid_scale[1])) + + # # (5) discard some boxes with low scores + classes = np.argmax(pred_prob, axis=-1) + scores = pred_conf * pred_prob[np.arange(len(pred_coor)), classes] + score_mask = scores > score_threshold + mask = np.logical_and(scale_mask, score_mask) + coors, scores, classes = pred_coor[mask], scores[mask], classes[mask] + + return np.concatenate([coors, scores[:, np.newaxis], classes[:, np.newaxis]], axis=-1) + + + + diff --git a/4-Object_Detection/SSD/ssd.py b/4-Object_Detection/SSD/ssd.py index a2360580..f049b17c 100644 --- a/4-Object_Detection/SSD/ssd.py +++ b/4-Object_Detection/SSD/ssd.py @@ -12,107 +12,48 @@ #================================================================ import tensorflow as tf - +from core.backbone import vgg16 class SSD(tf.keras.Model): - def __init__(self, num_class=21): + def __init__(self, input_data, num_class=21): super(SSD, self).__init__() # conv1 - self.conv1_1 = tf.keras.layers.Conv2D(64, 3, activation='relu', padding='same') - self.conv1_2 = tf.keras.layers.Conv2D(64, 3, activation='relu', padding='same') - self.pool1 = tf.keras.layers.MaxPooling2D(2, strides=2, padding='same') - - # conv2 - self.conv2_1 = tf.keras.layers.Conv2D(128, 3, activation='relu', padding='same') - self.conv2_2 = tf.keras.layers.Conv2D(128, 3, activation='relu', padding='same') - self.pool2 = tf.keras.layers.MaxPooling2D(2, strides=2, padding='same') - - # conv3 - self.conv3_1 = tf.keras.layers.Conv2D(256, 3, activation='relu', padding='same') - self.conv3_2 = tf.keras.layers.Conv2D(256, 3, activation='relu', padding='same') - self.conv3_3 = tf.keras.layers.Conv2D(256, 3, activation='relu', padding='same') - self.pool3 = tf.keras.layers.MaxPooling2D(2, strides=2, padding='same') - - # conv4 - self.conv4_1 = tf.keras.layers.Conv2D(512, 3, activation='relu', padding='same') - self.conv4_2 = tf.keras.layers.Conv2D(512, 3, activation='relu', padding='same') - self.conv4_3 = tf.keras.layers.Conv2D(512, 3, activation='relu', padding='same') - self.pool4 = tf.keras.layers.MaxPooling2D(2, strides=2, padding='same') - - # conv5 - self.conv5_1 = tf.keras.layers.Conv2D(512, 3, activation='relu', padding='same') - self.conv5_2 = tf.keras.layers.Conv2D(512, 3, activation='relu', padding='same') - self.conv5_3 = tf.keras.layers.Conv2D(512, 3, activation='relu', padding='same') - self.pool5 = tf.keras.layers.MaxPooling2D(3, strides=1, padding='same') - - # fc6, => vgg backbone is finished. now they are all SSD blocks - self.fc6 = tf.keras.layers.Conv2D(1024, 3, dilation_rate=6, activation='relu', padding='same') + conv4, conv = vgg16(input_data) + self.conv4 = tf.keras.layers.Conv2D(4*(num_class + 5),3, padding='same')(conv4) + # fc6, from now they are all SSD blocks + conv = tf.keras.layers.Conv2D(1024, 3, dilation_rate=6, activation='relu', padding='same')(conv)#fc6 # fc7 - self.fc7 = tf.keras.layers.Conv2D(1024, 1, activation='relu', padding='same') - # Block 8/9/10/11: 1x1 and 3x3 convolutions strides 2 (except lasts) + conv = tf.keras.layers.Conv2D(1024, 1, activation='relu', padding='same')(conv)#fc7 + self.conv7 = tf.keras.layers.Conv2D(6*(num_class + 5), 3, padding='same')(conv) + # Block 8/9/10/11: 1x1 and 3x3 convolutions strides 2 (except the last 2 layers) # conv8 - self.conv8_1 = tf.keras.layers.Conv2D(256, 1, activation='relu', padding='same') - self.conv8_2 = tf.keras.layers.Conv2D(512, 3, strides=2, activation='relu', padding='same') + conv = tf.keras.layers.Conv2D(256, 1, activation='relu', padding='same')(conv) + conv = tf.keras.layers.Conv2D(512, 3, strides=2, activation='relu', padding='same')(conv) + self.conv8 = tf.keras.layers.Conv2D(6*(num_class + 5),3, padding='same')(conv) # conv9 - self.conv9_1 = tf.keras.layers.Conv2D(128, 1, activation='relu', padding='same') - self.conv9_2 = tf.keras.layers.Conv2D(256, 3, strides=2, activation='relu', padding='same') + conv = tf.keras.layers.Conv2D(128, 1, activation='relu', padding='same')(conv) + conv = tf.keras.layers.Conv2D(256, 3, strides=2, activation='relu', padding='same')(conv) + self.conv9 = tf.keras.layers.Conv2D(6*(num_class + 5),3, padding='same')(conv) # conv10 - self.conv10_1 = tf.keras.layers.Conv2D(128, 1, activation='relu', padding='same') - self.conv10_2 = tf.keras.layers.Conv2D(256, 3, activation='relu', padding='valid') + conv = tf.keras.layers.Conv2D(128, 1, activation='relu', padding='same')(conv) + conv = tf.keras.layers.Conv2D(256, 3, activation='relu', padding='valid')(conv) + self.conv10 = tf.keras.layers.Conv2D(4*(num_class + 5),3, padding='same')(conv) # conv11 - self.conv11_1 = tf.keras.layers.Conv2D(128, 1, activation='relu', padding='same') - self.conv11_2 = tf.keras.layers.Conv2D(256, 3, activation='relu', padding='valid') - - - - def call(self, x, training=False): - h = self.conv1_1(x) - h = self.conv1_2(h) - h = self.pool1(h) - - h = self.conv2_1(h) - h = self.conv2_2(h) - h = self.pool2(h) - - h = self.conv3_1(h) - h = self.conv3_2(h) - h = self.conv3_3(h) - h = self.pool3(h) - - h = self.conv4_1(h) - h = self.conv4_2(h) - h = self.conv4_3(h) - print(h.shape) - h = self.pool4(h) - - h = self.conv5_1(h) - h = self.conv5_2(h) - h = self.conv5_3(h) - h = self.pool5(h) - - h = self.fc6(h) # [1,19,19,1024] - h = self.fc7(h) # [1,19,19,1024] - print(h.shape) - - h = self.conv8_1(h) - h = self.conv8_2(h) # [1,10,10, 512] - print(h.shape) - - h = self.conv9_1(h) - h = self.conv9_2(h) # [1, 5, 5, 256] - print(h.shape) - - h = self.conv10_1(h) - h = self.conv10_2(h) # [1, 3, 3, 256] - print(h.shape) - - h = self.conv11_1(h) - h = self.conv11_2(h) # [1, 1, 1, 256] - print(h.shape) - return h - -model = SSD(21) -x = model(tf.ones(shape=[1,300,300,3])) + conv = tf.keras.layers.Conv2D(128, 1, activation='relu', padding='same')(conv) + conv = tf.keras.layers.Conv2D(256, 3, activation='relu', padding='valid')(conv) + self.conv11 = tf.keras.layers.Conv2D(4*(num_class + 5),3, padding='same')(conv) + + def display(self): + print(self.conv4.shape) + print(self.conv7.shape) + print(self.conv8.shape) + print(self.conv9.shape) + print(self.conv10.shape) + print(self.conv11.shape) + return self.conv4, self.conv7, self.conv8, self.conv9, self.conv10, self.conv11 + +model = SSD(tf.ones(shape=[1,300,300,3]),21) +model.display()