From f8d9ddb962e3f02a22fd2d30abd6a6724b509024 Mon Sep 17 00:00:00 2001 From: WeikangWang Date: Sat, 14 Dec 2019 16:23:03 -0500 Subject: [PATCH 1/5] adapter the darknet53 model to a much simpler model to fit the easy data for power system --- .../YOLOV3/core/backbone_fnet.py | 50 +++++ 4-Object_Detection/YOLOV3/core/yolov3_fnet.py | 195 ++++++++++++++++++ 2 files changed, 245 insertions(+) create mode 100644 4-Object_Detection/YOLOV3/core/backbone_fnet.py create mode 100644 4-Object_Detection/YOLOV3/core/yolov3_fnet.py diff --git a/4-Object_Detection/YOLOV3/core/backbone_fnet.py b/4-Object_Detection/YOLOV3/core/backbone_fnet.py new file mode 100644 index 00000000..c56d4ce7 --- /dev/null +++ b/4-Object_Detection/YOLOV3/core/backbone_fnet.py @@ -0,0 +1,50 @@ +#! /usr/bin/env python +# coding=utf-8 +#================================================================ +# Copyright (C) 2019 * Ltd. All rights reserved. +# +# Editor : VIM +# File name : backbone.py +# Author : YunYang1994 +# Created date: 2019-07-11 23:37:51 +# Description : +# +#================================================================ + +import tensorflow as tf +import core.common as common + + +def grid_eye_net_18(input_data): + + input_data = common.convolutional(input_data, (3, 3, 3, 32)) + input_data = common.convolutional(input_data, (3, 3, 32, 64), downsample=True) + + for i in range(1): + input_data = common.residual_block(input_data, 64, 32, 64) + + input_data = common.convolutional(input_data, (3, 3, 64, 128), downsample=True) + + for i in range(1): + input_data = common.residual_block(input_data, 128, 64, 128) + + input_data = common.convolutional(input_data, (3, 3, 128, 256), downsample=True) + + for i in range(2): + input_data = common.residual_block(input_data, 256, 128, 256) + + route_1 = input_data + input_data = common.convolutional(input_data, (3, 3, 256, 512), downsample=True) + + for i in range(2): + input_data = common.residual_block(input_data, 512, 256, 512) + + route_2 = input_data + input_data = common.convolutional(input_data, (3, 3, 512, 1024), downsample=True) + + for i in range(1): + input_data = common.residual_block(input_data, 1024, 512, 512) + + return route_1, route_2, input_data + + diff --git a/4-Object_Detection/YOLOV3/core/yolov3_fnet.py b/4-Object_Detection/YOLOV3/core/yolov3_fnet.py new file mode 100644 index 00000000..3066cb0e --- /dev/null +++ b/4-Object_Detection/YOLOV3/core/yolov3_fnet.py @@ -0,0 +1,195 @@ +#! /usr/bin/env python +# coding=utf-8 +#================================================================ +# Copyright (C) 2019 * Ltd. All rights reserved. +# +# Editor : VIM +# File name : yolov3.py +# Author : YunYang1994 +# Created date: 2019-07-12 13:47:10 +# Description : +# +#================================================================ + +import numpy as np +import tensorflow as tf +import core.utils as utils +import core.common as common +import core.backbone as backbone +from core.config import cfg + + +NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) +ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS) +STRIDES = np.array(cfg.YOLO.STRIDES) +IOU_LOSS_THRESH = cfg.YOLO.IOU_LOSS_THRESH + +def YOLOv3(input_layer): + route_1, route_2, conv = backbone.darknet53(input_layer) + + conv = common.convolutional(conv, (1, 1, 512, 256)) + conv = common.convolutional(conv, (3, 3, 256, 512)) + conv = common.convolutional(conv, (1, 1, 512, 256)) + + conv_lobj_branch = common.convolutional(conv, (3, 3, 256, 512)) + conv_lbbox = common.convolutional(conv_lobj_branch, (1, 1, 512, 3*(NUM_CLASS + 5)), activate=False, bn=False) + + conv = common.convolutional(conv, (1, 1, 256, 128)) + conv = common.upsample(conv) + + conv = tf.concat([conv, route_2], axis=-1) + + conv = common.convolutional(conv, (1, 1, 768, 256))#512+256 + conv = common.convolutional(conv, (3, 3, 256, 512)) + conv = common.convolutional(conv, (1, 1, 512, 256)) + + conv_mobj_branch = common.convolutional(conv, (3, 3, 256, 512)) + conv_mbbox = common.convolutional(conv_mobj_branch, (1, 1, 512, 3*(NUM_CLASS + 5)), activate=False, bn=False) + + conv = common.convolutional(conv, (1, 1, 256, 128)) + conv = common.upsample(conv) + + conv = tf.concat([conv, route_1], axis=-1) + + conv = common.convolutional(conv, (1, 1, 512, 128))#256+256 + conv = common.convolutional(conv, (3, 3, 128, 256)) + conv = common.convolutional(conv, (1, 1, 256, 128)) + + conv_sobj_branch = common.convolutional(conv, (3, 3, 128, 256)) + conv_sbbox = common.convolutional(conv_sobj_branch, (1, 1, 256, 3*(NUM_CLASS +5)), activate=False, bn=False) + + return [conv_sbbox, conv_mbbox, conv_lbbox] + +def decode(conv_output, i=0): + """ + return tensor of shape [batch_size, output_size, output_size, anchor_per_scale, 5 + num_classes] + contains (x, y, w, h, score, probability) + """ + + conv_shape = tf.shape(conv_output) + batch_size = conv_shape[0] + output_size = conv_shape[1] + + conv_output = tf.reshape(conv_output, (batch_size, output_size, output_size, 3, 5 + NUM_CLASS)) + + conv_raw_dxdy = conv_output[:, :, :, :, 0:2] + conv_raw_dwdh = conv_output[:, :, :, :, 2:4] + conv_raw_conf = conv_output[:, :, :, :, 4:5] + conv_raw_prob = conv_output[:, :, :, :, 5: ] + + y = tf.tile(tf.range(output_size, dtype=tf.int32)[:, tf.newaxis], [1, output_size]) + x = tf.tile(tf.range(output_size, dtype=tf.int32)[tf.newaxis, :], [output_size, 1]) + + xy_grid = tf.concat([x[:, :, tf.newaxis], y[:, :, tf.newaxis]], axis=-1) + xy_grid = tf.tile(xy_grid[tf.newaxis, :, :, tf.newaxis, :], [batch_size, 1, 1, 3, 1]) + xy_grid = tf.cast(xy_grid, tf.float32) + + pred_xy = (tf.sigmoid(conv_raw_dxdy) + xy_grid) * STRIDES[i] + pred_wh = (tf.exp(conv_raw_dwdh) * ANCHORS[i]) * STRIDES[i] + pred_xywh = tf.concat([pred_xy, pred_wh], axis=-1) + + pred_conf = tf.sigmoid(conv_raw_conf) + pred_prob = tf.sigmoid(conv_raw_prob) + + return tf.concat([pred_xywh, pred_conf, pred_prob], axis=-1) + +def bbox_iou(boxes1, boxes2): + + boxes1_area = boxes1[..., 2] * boxes1[..., 3] + boxes2_area = boxes2[..., 2] * boxes2[..., 3] + + boxes1 = tf.concat([boxes1[..., :2] - boxes1[..., 2:] * 0.5, + boxes1[..., :2] + boxes1[..., 2:] * 0.5], axis=-1) + boxes2 = tf.concat([boxes2[..., :2] - boxes2[..., 2:] * 0.5, + boxes2[..., :2] + boxes2[..., 2:] * 0.5], axis=-1) + + left_up = tf.maximum(boxes1[..., :2], boxes2[..., :2]) + right_down = tf.minimum(boxes1[..., 2:], boxes2[..., 2:]) + + inter_section = tf.maximum(right_down - left_up, 0.0) + inter_area = inter_section[..., 0] * inter_section[..., 1] + union_area = boxes1_area + boxes2_area - inter_area + + return 1.0 * inter_area / union_area + +def bbox_giou(boxes1, boxes2): + + boxes1 = tf.concat([boxes1[..., :2] - boxes1[..., 2:] * 0.5, + boxes1[..., :2] + boxes1[..., 2:] * 0.5], axis=-1) + boxes2 = tf.concat([boxes2[..., :2] - boxes2[..., 2:] * 0.5, + boxes2[..., :2] + boxes2[..., 2:] * 0.5], axis=-1) + + boxes1 = tf.concat([tf.minimum(boxes1[..., :2], boxes1[..., 2:]), + tf.maximum(boxes1[..., :2], boxes1[..., 2:])], axis=-1) + boxes2 = tf.concat([tf.minimum(boxes2[..., :2], boxes2[..., 2:]), + tf.maximum(boxes2[..., :2], boxes2[..., 2:])], axis=-1) + + boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1]) + boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1]) + + left_up = tf.maximum(boxes1[..., :2], boxes2[..., :2]) + right_down = tf.minimum(boxes1[..., 2:], boxes2[..., 2:]) + + inter_section = tf.maximum(right_down - left_up, 0.0) + inter_area = inter_section[..., 0] * inter_section[..., 1] + union_area = boxes1_area + boxes2_area - inter_area + iou = inter_area / union_area + + enclose_left_up = tf.minimum(boxes1[..., :2], boxes2[..., :2]) + enclose_right_down = tf.maximum(boxes1[..., 2:], boxes2[..., 2:]) + enclose = tf.maximum(enclose_right_down - enclose_left_up, 0.0) + enclose_area = enclose[..., 0] * enclose[..., 1] + giou = iou - 1.0 * (enclose_area - union_area) / enclose_area + + return giou + + +def compute_loss(pred, conv, label, bboxes, i=0): + + conv_shape = tf.shape(conv) + batch_size = conv_shape[0] + output_size = conv_shape[1] + input_size = STRIDES[i] * output_size + conv = tf.reshape(conv, (batch_size, output_size, output_size, 3, 5 + NUM_CLASS)) + + conv_raw_conf = conv[:, :, :, :, 4:5] + conv_raw_prob = conv[:, :, :, :, 5:] + + pred_xywh = pred[:, :, :, :, 0:4] + pred_conf = pred[:, :, :, :, 4:5] + + label_xywh = label[:, :, :, :, 0:4] + respond_bbox = label[:, :, :, :, 4:5] + label_prob = label[:, :, :, :, 5:] + + giou = tf.expand_dims(bbox_giou(pred_xywh, label_xywh), axis=-1) + input_size = tf.cast(input_size, tf.float32) + + bbox_loss_scale = 2.0 - 1.0 * label_xywh[:, :, :, :, 2:3] * label_xywh[:, :, :, :, 3:4] / (input_size ** 2) + giou_loss = respond_bbox * bbox_loss_scale * (1- giou) + + iou = bbox_iou(pred_xywh[:, :, :, :, np.newaxis, :], bboxes[:, np.newaxis, np.newaxis, np.newaxis, :, :]) + max_iou = tf.expand_dims(tf.reduce_max(iou, axis=-1), axis=-1) + + respond_bgd = (1.0 - respond_bbox) * tf.cast( max_iou < IOU_LOSS_THRESH, tf.float32 ) + + conf_focal = tf.pow(respond_bbox - pred_conf, 2) + + conf_loss = conf_focal * ( + respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits(labels=respond_bbox, logits=conv_raw_conf) + + + respond_bgd * tf.nn.sigmoid_cross_entropy_with_logits(labels=respond_bbox, logits=conv_raw_conf) + ) + + prob_loss = respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits(labels=label_prob, logits=conv_raw_prob) + + giou_loss = tf.reduce_mean(tf.reduce_sum(giou_loss, axis=[1,2,3,4])) + conf_loss = tf.reduce_mean(tf.reduce_sum(conf_loss, axis=[1,2,3,4])) + prob_loss = tf.reduce_mean(tf.reduce_sum(prob_loss, axis=[1,2,3,4])) + + return giou_loss, conf_loss, prob_loss + + + + + From 688cf9c6329ac06d24450519c9d3728cae2fee82 Mon Sep 17 00:00:00 2001 From: WeikangWang Date: Sat, 14 Dec 2019 20:04:32 -0500 Subject: [PATCH 2/5] extract feature maps per the paper --- 4-Object_Detection/SSD/ssd.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/4-Object_Detection/SSD/ssd.py b/4-Object_Detection/SSD/ssd.py index a2360580..48b8ec0b 100644 --- a/4-Object_Detection/SSD/ssd.py +++ b/4-Object_Detection/SSD/ssd.py @@ -82,6 +82,7 @@ def call(self, x, training=False): h = self.conv4_1(h) h = self.conv4_2(h) h = self.conv4_3(h) + conv4 = h print(h.shape) h = self.pool4(h) @@ -92,27 +93,32 @@ def call(self, x, training=False): h = self.fc6(h) # [1,19,19,1024] h = self.fc7(h) # [1,19,19,1024] + conv7 = h print(h.shape) h = self.conv8_1(h) h = self.conv8_2(h) # [1,10,10, 512] + conv8 = h print(h.shape) h = self.conv9_1(h) h = self.conv9_2(h) # [1, 5, 5, 256] + conv9 = h print(h.shape) h = self.conv10_1(h) h = self.conv10_2(h) # [1, 3, 3, 256] + conv10 = h print(h.shape) h = self.conv11_1(h) h = self.conv11_2(h) # [1, 1, 1, 256] + conv11 = h print(h.shape) return h -model = SSD(21) -x = model(tf.ones(shape=[1,300,300,3])) +conv4, conv7, conv8, conv9, conv10, conv11 = SSD(21) +x = conv11(tf.ones(shape=[1,300,300,3])) From a6af75aa4d66e61187463657e27bb6430ce49edb Mon Sep 17 00:00:00 2001 From: WeikangWang Date: Sat, 14 Dec 2019 20:08:05 -0500 Subject: [PATCH 3/5] Revert "adapter the darknet53 model to a much simpler model to fit the easy data for power system" This reverts commit f8d9ddb962e3f02a22fd2d30abd6a6724b509024. --- .../YOLOV3/core/backbone_fnet.py | 50 ----- 4-Object_Detection/YOLOV3/core/yolov3_fnet.py | 195 ------------------ 2 files changed, 245 deletions(-) delete mode 100644 4-Object_Detection/YOLOV3/core/backbone_fnet.py delete mode 100644 4-Object_Detection/YOLOV3/core/yolov3_fnet.py diff --git a/4-Object_Detection/YOLOV3/core/backbone_fnet.py b/4-Object_Detection/YOLOV3/core/backbone_fnet.py deleted file mode 100644 index c56d4ce7..00000000 --- a/4-Object_Detection/YOLOV3/core/backbone_fnet.py +++ /dev/null @@ -1,50 +0,0 @@ -#! /usr/bin/env python -# coding=utf-8 -#================================================================ -# Copyright (C) 2019 * Ltd. All rights reserved. -# -# Editor : VIM -# File name : backbone.py -# Author : YunYang1994 -# Created date: 2019-07-11 23:37:51 -# Description : -# -#================================================================ - -import tensorflow as tf -import core.common as common - - -def grid_eye_net_18(input_data): - - input_data = common.convolutional(input_data, (3, 3, 3, 32)) - input_data = common.convolutional(input_data, (3, 3, 32, 64), downsample=True) - - for i in range(1): - input_data = common.residual_block(input_data, 64, 32, 64) - - input_data = common.convolutional(input_data, (3, 3, 64, 128), downsample=True) - - for i in range(1): - input_data = common.residual_block(input_data, 128, 64, 128) - - input_data = common.convolutional(input_data, (3, 3, 128, 256), downsample=True) - - for i in range(2): - input_data = common.residual_block(input_data, 256, 128, 256) - - route_1 = input_data - input_data = common.convolutional(input_data, (3, 3, 256, 512), downsample=True) - - for i in range(2): - input_data = common.residual_block(input_data, 512, 256, 512) - - route_2 = input_data - input_data = common.convolutional(input_data, (3, 3, 512, 1024), downsample=True) - - for i in range(1): - input_data = common.residual_block(input_data, 1024, 512, 512) - - return route_1, route_2, input_data - - diff --git a/4-Object_Detection/YOLOV3/core/yolov3_fnet.py b/4-Object_Detection/YOLOV3/core/yolov3_fnet.py deleted file mode 100644 index 3066cb0e..00000000 --- a/4-Object_Detection/YOLOV3/core/yolov3_fnet.py +++ /dev/null @@ -1,195 +0,0 @@ -#! /usr/bin/env python -# coding=utf-8 -#================================================================ -# Copyright (C) 2019 * Ltd. All rights reserved. -# -# Editor : VIM -# File name : yolov3.py -# Author : YunYang1994 -# Created date: 2019-07-12 13:47:10 -# Description : -# -#================================================================ - -import numpy as np -import tensorflow as tf -import core.utils as utils -import core.common as common -import core.backbone as backbone -from core.config import cfg - - -NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) -ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS) -STRIDES = np.array(cfg.YOLO.STRIDES) -IOU_LOSS_THRESH = cfg.YOLO.IOU_LOSS_THRESH - -def YOLOv3(input_layer): - route_1, route_2, conv = backbone.darknet53(input_layer) - - conv = common.convolutional(conv, (1, 1, 512, 256)) - conv = common.convolutional(conv, (3, 3, 256, 512)) - conv = common.convolutional(conv, (1, 1, 512, 256)) - - conv_lobj_branch = common.convolutional(conv, (3, 3, 256, 512)) - conv_lbbox = common.convolutional(conv_lobj_branch, (1, 1, 512, 3*(NUM_CLASS + 5)), activate=False, bn=False) - - conv = common.convolutional(conv, (1, 1, 256, 128)) - conv = common.upsample(conv) - - conv = tf.concat([conv, route_2], axis=-1) - - conv = common.convolutional(conv, (1, 1, 768, 256))#512+256 - conv = common.convolutional(conv, (3, 3, 256, 512)) - conv = common.convolutional(conv, (1, 1, 512, 256)) - - conv_mobj_branch = common.convolutional(conv, (3, 3, 256, 512)) - conv_mbbox = common.convolutional(conv_mobj_branch, (1, 1, 512, 3*(NUM_CLASS + 5)), activate=False, bn=False) - - conv = common.convolutional(conv, (1, 1, 256, 128)) - conv = common.upsample(conv) - - conv = tf.concat([conv, route_1], axis=-1) - - conv = common.convolutional(conv, (1, 1, 512, 128))#256+256 - conv = common.convolutional(conv, (3, 3, 128, 256)) - conv = common.convolutional(conv, (1, 1, 256, 128)) - - conv_sobj_branch = common.convolutional(conv, (3, 3, 128, 256)) - conv_sbbox = common.convolutional(conv_sobj_branch, (1, 1, 256, 3*(NUM_CLASS +5)), activate=False, bn=False) - - return [conv_sbbox, conv_mbbox, conv_lbbox] - -def decode(conv_output, i=0): - """ - return tensor of shape [batch_size, output_size, output_size, anchor_per_scale, 5 + num_classes] - contains (x, y, w, h, score, probability) - """ - - conv_shape = tf.shape(conv_output) - batch_size = conv_shape[0] - output_size = conv_shape[1] - - conv_output = tf.reshape(conv_output, (batch_size, output_size, output_size, 3, 5 + NUM_CLASS)) - - conv_raw_dxdy = conv_output[:, :, :, :, 0:2] - conv_raw_dwdh = conv_output[:, :, :, :, 2:4] - conv_raw_conf = conv_output[:, :, :, :, 4:5] - conv_raw_prob = conv_output[:, :, :, :, 5: ] - - y = tf.tile(tf.range(output_size, dtype=tf.int32)[:, tf.newaxis], [1, output_size]) - x = tf.tile(tf.range(output_size, dtype=tf.int32)[tf.newaxis, :], [output_size, 1]) - - xy_grid = tf.concat([x[:, :, tf.newaxis], y[:, :, tf.newaxis]], axis=-1) - xy_grid = tf.tile(xy_grid[tf.newaxis, :, :, tf.newaxis, :], [batch_size, 1, 1, 3, 1]) - xy_grid = tf.cast(xy_grid, tf.float32) - - pred_xy = (tf.sigmoid(conv_raw_dxdy) + xy_grid) * STRIDES[i] - pred_wh = (tf.exp(conv_raw_dwdh) * ANCHORS[i]) * STRIDES[i] - pred_xywh = tf.concat([pred_xy, pred_wh], axis=-1) - - pred_conf = tf.sigmoid(conv_raw_conf) - pred_prob = tf.sigmoid(conv_raw_prob) - - return tf.concat([pred_xywh, pred_conf, pred_prob], axis=-1) - -def bbox_iou(boxes1, boxes2): - - boxes1_area = boxes1[..., 2] * boxes1[..., 3] - boxes2_area = boxes2[..., 2] * boxes2[..., 3] - - boxes1 = tf.concat([boxes1[..., :2] - boxes1[..., 2:] * 0.5, - boxes1[..., :2] + boxes1[..., 2:] * 0.5], axis=-1) - boxes2 = tf.concat([boxes2[..., :2] - boxes2[..., 2:] * 0.5, - boxes2[..., :2] + boxes2[..., 2:] * 0.5], axis=-1) - - left_up = tf.maximum(boxes1[..., :2], boxes2[..., :2]) - right_down = tf.minimum(boxes1[..., 2:], boxes2[..., 2:]) - - inter_section = tf.maximum(right_down - left_up, 0.0) - inter_area = inter_section[..., 0] * inter_section[..., 1] - union_area = boxes1_area + boxes2_area - inter_area - - return 1.0 * inter_area / union_area - -def bbox_giou(boxes1, boxes2): - - boxes1 = tf.concat([boxes1[..., :2] - boxes1[..., 2:] * 0.5, - boxes1[..., :2] + boxes1[..., 2:] * 0.5], axis=-1) - boxes2 = tf.concat([boxes2[..., :2] - boxes2[..., 2:] * 0.5, - boxes2[..., :2] + boxes2[..., 2:] * 0.5], axis=-1) - - boxes1 = tf.concat([tf.minimum(boxes1[..., :2], boxes1[..., 2:]), - tf.maximum(boxes1[..., :2], boxes1[..., 2:])], axis=-1) - boxes2 = tf.concat([tf.minimum(boxes2[..., :2], boxes2[..., 2:]), - tf.maximum(boxes2[..., :2], boxes2[..., 2:])], axis=-1) - - boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1]) - boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1]) - - left_up = tf.maximum(boxes1[..., :2], boxes2[..., :2]) - right_down = tf.minimum(boxes1[..., 2:], boxes2[..., 2:]) - - inter_section = tf.maximum(right_down - left_up, 0.0) - inter_area = inter_section[..., 0] * inter_section[..., 1] - union_area = boxes1_area + boxes2_area - inter_area - iou = inter_area / union_area - - enclose_left_up = tf.minimum(boxes1[..., :2], boxes2[..., :2]) - enclose_right_down = tf.maximum(boxes1[..., 2:], boxes2[..., 2:]) - enclose = tf.maximum(enclose_right_down - enclose_left_up, 0.0) - enclose_area = enclose[..., 0] * enclose[..., 1] - giou = iou - 1.0 * (enclose_area - union_area) / enclose_area - - return giou - - -def compute_loss(pred, conv, label, bboxes, i=0): - - conv_shape = tf.shape(conv) - batch_size = conv_shape[0] - output_size = conv_shape[1] - input_size = STRIDES[i] * output_size - conv = tf.reshape(conv, (batch_size, output_size, output_size, 3, 5 + NUM_CLASS)) - - conv_raw_conf = conv[:, :, :, :, 4:5] - conv_raw_prob = conv[:, :, :, :, 5:] - - pred_xywh = pred[:, :, :, :, 0:4] - pred_conf = pred[:, :, :, :, 4:5] - - label_xywh = label[:, :, :, :, 0:4] - respond_bbox = label[:, :, :, :, 4:5] - label_prob = label[:, :, :, :, 5:] - - giou = tf.expand_dims(bbox_giou(pred_xywh, label_xywh), axis=-1) - input_size = tf.cast(input_size, tf.float32) - - bbox_loss_scale = 2.0 - 1.0 * label_xywh[:, :, :, :, 2:3] * label_xywh[:, :, :, :, 3:4] / (input_size ** 2) - giou_loss = respond_bbox * bbox_loss_scale * (1- giou) - - iou = bbox_iou(pred_xywh[:, :, :, :, np.newaxis, :], bboxes[:, np.newaxis, np.newaxis, np.newaxis, :, :]) - max_iou = tf.expand_dims(tf.reduce_max(iou, axis=-1), axis=-1) - - respond_bgd = (1.0 - respond_bbox) * tf.cast( max_iou < IOU_LOSS_THRESH, tf.float32 ) - - conf_focal = tf.pow(respond_bbox - pred_conf, 2) - - conf_loss = conf_focal * ( - respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits(labels=respond_bbox, logits=conv_raw_conf) - + - respond_bgd * tf.nn.sigmoid_cross_entropy_with_logits(labels=respond_bbox, logits=conv_raw_conf) - ) - - prob_loss = respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits(labels=label_prob, logits=conv_raw_prob) - - giou_loss = tf.reduce_mean(tf.reduce_sum(giou_loss, axis=[1,2,3,4])) - conf_loss = tf.reduce_mean(tf.reduce_sum(conf_loss, axis=[1,2,3,4])) - prob_loss = tf.reduce_mean(tf.reduce_sum(prob_loss, axis=[1,2,3,4])) - - return giou_loss, conf_loss, prob_loss - - - - - From c250f649590fa31b885e82b5cf64bf97a217e097 Mon Sep 17 00:00:00 2001 From: WeikangWang Date: Sat, 14 Dec 2019 21:16:55 -0500 Subject: [PATCH 4/5] 1.organized the ssd class 2.extracted the vgg16 block 3. put the vgg16 into a backbone per the style from YOLO_v3 4. added some other module for future use --- 4-Object_Detection/SSD/core/backbone.py | 50 +++++ 4-Object_Detection/SSD/core/config.py | 58 ++++++ 4-Object_Detection/SSD/core/dataset.py | 265 ++++++++++++++++++++++++ 4-Object_Detection/SSD/core/utils.py | 240 +++++++++++++++++++++ 4-Object_Detection/SSD/ssd.py | 132 +++--------- 5 files changed, 647 insertions(+), 98 deletions(-) create mode 100644 4-Object_Detection/SSD/core/backbone.py create mode 100644 4-Object_Detection/SSD/core/config.py create mode 100644 4-Object_Detection/SSD/core/dataset.py create mode 100644 4-Object_Detection/SSD/core/utils.py diff --git a/4-Object_Detection/SSD/core/backbone.py b/4-Object_Detection/SSD/core/backbone.py new file mode 100644 index 00000000..5115df8a --- /dev/null +++ b/4-Object_Detection/SSD/core/backbone.py @@ -0,0 +1,50 @@ +#! /usr/bin/env python +# coding=utf-8 +#================================================================ +# Copyright (C) 2019 * Ltd. All rights reserved. +# +# Editor : VIM +# File name : backbone.py +# Author : YunYang1994 +# Created date: 2019-07-11 23:37:51 +# Description : +# +#================================================================ + +import tensorflow as tf + +def vgg16(input_data): + +#======================================VGG16_start=================================================== + # conv1 + conv = tf.keras.layers.Conv2D(64, 3, activation='relu', padding='same')(input_data) #conv1_1 + conv = tf.keras.layers.Conv2D(64, 3, activation='relu', padding='same')(conv) #conv1_2 + conv = tf.keras.layers.MaxPooling2D(2, strides=2, padding='same')(conv) #pool1 + + # conv2 + conv = tf.keras.layers.Conv2D(128, 3, activation='relu', padding='same')(conv)#conv2_1 + conv = tf.keras.layers.Conv2D(128, 3, activation='relu', padding='same')(conv)#conv2_2 + conv = tf.keras.layers.MaxPooling2D(2, strides=2, padding='same')(conv)#pool2 + + # conv3 + conv = tf.keras.layers.Conv2D(256, 3, activation='relu', padding='same')(conv)#conv3_1 + conv = tf.keras.layers.Conv2D(256, 3, activation='relu', padding='same')(conv)#conv3_2 + conv = tf.keras.layers.Conv2D(256, 3, activation='relu', padding='same')(conv)#conv3_3 + conv = tf.keras.layers.MaxPooling2D(2, strides=2, padding='same')(conv)#pool3 + + # conv4 + conv = tf.keras.layers.Conv2D(512, 3, activation='relu', padding='same')(conv) + conv = tf.keras.layers.Conv2D(512, 3, activation='relu', padding='same')(conv) + conv = tf.keras.layers.Conv2D(512, 3, activation='relu', padding='same')(conv) + conv4 = conv + conv = tf.keras.layers.MaxPooling2D(2, strides=2, padding='same')(conv) + + # conv5 + conv = tf.keras.layers.Conv2D(512, 3, activation='relu', padding='same')(conv) + conv = tf.keras.layers.Conv2D(512, 3, activation='relu', padding='same')(conv) + conv = tf.keras.layers.Conv2D(512, 3, activation='relu', padding='same')(conv) + conv = tf.keras.layers.MaxPooling2D(3, strides=1, padding='same')(conv) + + return conv4, conv + + diff --git a/4-Object_Detection/SSD/core/config.py b/4-Object_Detection/SSD/core/config.py new file mode 100644 index 00000000..f23265ca --- /dev/null +++ b/4-Object_Detection/SSD/core/config.py @@ -0,0 +1,58 @@ +#! /usr/bin/env python +# coding=utf-8 +#================================================================ +# Copyright (C) 2019 * Ltd. All rights reserved. +# +# Editor : VIM +# File name : config.py +# Author : YunYang1994 +# Created date: 2019-02-28 13:06:54 +# Description : +# +#================================================================ + +from easydict import EasyDict as edict + + +__C = edict() +# Consumers can get config by: from config import cfg + +cfg = __C + +# YOLO options +__C.YOLO = edict() + +# Set the class name +__C.YOLO.CLASSES = "./data/classes/coco.names" +__C.YOLO.ANCHORS = "./data/anchors/basline_anchors.txt" +__C.YOLO.STRIDES = [8, 16, 32] +__C.YOLO.ANCHOR_PER_SCALE = 3 +__C.YOLO.IOU_LOSS_THRESH = 0.5 + +# Train options +__C.TRAIN = edict() + +__C.TRAIN.ANNOT_PATH = "./data/dataset/yymnist_train.txt" +__C.TRAIN.BATCH_SIZE = 4 +# __C.TRAIN.INPUT_SIZE = [320, 352, 384, 416, 448, 480, 512, 544, 576, 608] +__C.TRAIN.INPUT_SIZE = [416] +__C.TRAIN.DATA_AUG = True +__C.TRAIN.LR_INIT = 1e-3 +__C.TRAIN.LR_END = 1e-6 +__C.TRAIN.WARMUP_EPOCHS = 2 +__C.TRAIN.EPOCHS = 30 + + + +# TEST options +__C.TEST = edict() + +__C.TEST.ANNOT_PATH = "./data/dataset/yymnist_test.txt" +__C.TEST.BATCH_SIZE = 2 +__C.TEST.INPUT_SIZE = 544 +__C.TEST.DATA_AUG = False +__C.TEST.DECTECTED_IMAGE_PATH = "./data/detection/" +__C.TEST.SCORE_THRESHOLD = 0.3 +__C.TEST.IOU_THRESHOLD = 0.45 + + diff --git a/4-Object_Detection/SSD/core/dataset.py b/4-Object_Detection/SSD/core/dataset.py new file mode 100644 index 00000000..f2708d8b --- /dev/null +++ b/4-Object_Detection/SSD/core/dataset.py @@ -0,0 +1,265 @@ +#! /usr/bin/env python +# coding=utf-8 +#================================================================ +# Copyright (C) 2019 * Ltd. All rights reserved. +# +# Editor : VIM +# File name : dataset.py +# Author : YunYang1994 +# Created date: 2019-03-15 18:05:03 +# Description : +# +#================================================================ + +import os +import cv2 +import random +import numpy as np +import tensorflow as tf +import core.utils as utils +from core.config import cfg + + + +class Dataset(object): + """implement Dataset here""" + def __init__(self, dataset_type): + self.annot_path = cfg.TRAIN.ANNOT_PATH if dataset_type == 'train' else cfg.TEST.ANNOT_PATH + self.input_sizes = cfg.TRAIN.INPUT_SIZE if dataset_type == 'train' else cfg.TEST.INPUT_SIZE + self.batch_size = cfg.TRAIN.BATCH_SIZE if dataset_type == 'train' else cfg.TEST.BATCH_SIZE + self.data_aug = cfg.TRAIN.DATA_AUG if dataset_type == 'train' else cfg.TEST.DATA_AUG + + self.train_input_sizes = cfg.TRAIN.INPUT_SIZE + self.strides = np.array(cfg.YOLO.STRIDES) + self.classes = utils.read_class_names(cfg.YOLO.CLASSES) + self.num_classes = len(self.classes) + self.anchors = np.array(utils.get_anchors(cfg.YOLO.ANCHORS)) + self.anchor_per_scale = cfg.YOLO.ANCHOR_PER_SCALE + self.max_bbox_per_scale = 150 + + self.annotations = self.load_annotations(dataset_type) + self.num_samples = len(self.annotations) + self.num_batchs = int(np.ceil(self.num_samples / self.batch_size)) + self.batch_count = 0 + + + def load_annotations(self, dataset_type): + with open(self.annot_path, 'r') as f: + txt = f.readlines() + annotations = [line.strip() for line in txt if len(line.strip().split()[1:]) != 0] + np.random.shuffle(annotations) + return annotations + + def __iter__(self): + return self + + def __next__(self): + + with tf.device('/cpu:0'): + self.train_input_size = random.choice(self.train_input_sizes) + self.train_output_sizes = self.train_input_size // self.strides + + batch_image = np.zeros((self.batch_size, self.train_input_size, self.train_input_size, 3), dtype=np.float32) + + batch_label_sbbox = np.zeros((self.batch_size, self.train_output_sizes[0], self.train_output_sizes[0], + self.anchor_per_scale, 5 + self.num_classes), dtype=np.float32) + batch_label_mbbox = np.zeros((self.batch_size, self.train_output_sizes[1], self.train_output_sizes[1], + self.anchor_per_scale, 5 + self.num_classes), dtype=np.float32) + batch_label_lbbox = np.zeros((self.batch_size, self.train_output_sizes[2], self.train_output_sizes[2], + self.anchor_per_scale, 5 + self.num_classes), dtype=np.float32) + + batch_sbboxes = np.zeros((self.batch_size, self.max_bbox_per_scale, 4), dtype=np.float32) + batch_mbboxes = np.zeros((self.batch_size, self.max_bbox_per_scale, 4), dtype=np.float32) + batch_lbboxes = np.zeros((self.batch_size, self.max_bbox_per_scale, 4), dtype=np.float32) + + num = 0 + if self.batch_count < self.num_batchs: + while num < self.batch_size: + index = self.batch_count * self.batch_size + num + if index >= self.num_samples: index -= self.num_samples + annotation = self.annotations[index] + image, bboxes = self.parse_annotation(annotation) + label_sbbox, label_mbbox, label_lbbox, sbboxes, mbboxes, lbboxes = self.preprocess_true_boxes(bboxes) + + batch_image[num, :, :, :] = image + batch_label_sbbox[num, :, :, :, :] = label_sbbox + batch_label_mbbox[num, :, :, :, :] = label_mbbox + batch_label_lbbox[num, :, :, :, :] = label_lbbox + batch_sbboxes[num, :, :] = sbboxes + batch_mbboxes[num, :, :] = mbboxes + batch_lbboxes[num, :, :] = lbboxes + num += 1 + self.batch_count += 1 + batch_smaller_target = batch_label_sbbox, batch_sbboxes + batch_medium_target = batch_label_mbbox, batch_mbboxes + batch_larger_target = batch_label_lbbox, batch_lbboxes + + return batch_image, (batch_smaller_target, batch_medium_target, batch_larger_target) + else: + self.batch_count = 0 + np.random.shuffle(self.annotations) + raise StopIteration + + def random_horizontal_flip(self, image, bboxes): + + if random.random() < 0.5: + _, w, _ = image.shape + image = image[:, ::-1, :] + bboxes[:, [0,2]] = w - bboxes[:, [2,0]] + + return image, bboxes + + def random_crop(self, image, bboxes): + + if random.random() < 0.5: + h, w, _ = image.shape + max_bbox = np.concatenate([np.min(bboxes[:, 0:2], axis=0), np.max(bboxes[:, 2:4], axis=0)], axis=-1) + + max_l_trans = max_bbox[0] + max_u_trans = max_bbox[1] + max_r_trans = w - max_bbox[2] + max_d_trans = h - max_bbox[3] + + crop_xmin = max(0, int(max_bbox[0] - random.uniform(0, max_l_trans))) + crop_ymin = max(0, int(max_bbox[1] - random.uniform(0, max_u_trans))) + crop_xmax = max(w, int(max_bbox[2] + random.uniform(0, max_r_trans))) + crop_ymax = max(h, int(max_bbox[3] + random.uniform(0, max_d_trans))) + + image = image[crop_ymin : crop_ymax, crop_xmin : crop_xmax] + + bboxes[:, [0, 2]] = bboxes[:, [0, 2]] - crop_xmin + bboxes[:, [1, 3]] = bboxes[:, [1, 3]] - crop_ymin + + return image, bboxes + + def random_translate(self, image, bboxes): + + if random.random() < 0.5: + h, w, _ = image.shape + max_bbox = np.concatenate([np.min(bboxes[:, 0:2], axis=0), np.max(bboxes[:, 2:4], axis=0)], axis=-1) + + max_l_trans = max_bbox[0] + max_u_trans = max_bbox[1] + max_r_trans = w - max_bbox[2] + max_d_trans = h - max_bbox[3] + + tx = random.uniform(-(max_l_trans - 1), (max_r_trans - 1)) + ty = random.uniform(-(max_u_trans - 1), (max_d_trans - 1)) + + M = np.array([[1, 0, tx], [0, 1, ty]]) + image = cv2.warpAffine(image, M, (w, h)) + + bboxes[:, [0, 2]] = bboxes[:, [0, 2]] + tx + bboxes[:, [1, 3]] = bboxes[:, [1, 3]] + ty + + return image, bboxes + + def parse_annotation(self, annotation): + + line = annotation.split() + image_path = line[0] + if not os.path.exists(image_path): + raise KeyError("%s does not exist ... " %image_path) + image = cv2.imread(image_path) + bboxes = np.array([list(map(int, box.split(','))) for box in line[1:]]) + + if self.data_aug: + image, bboxes = self.random_horizontal_flip(np.copy(image), np.copy(bboxes)) + image, bboxes = self.random_crop(np.copy(image), np.copy(bboxes)) + image, bboxes = self.random_translate(np.copy(image), np.copy(bboxes)) + + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + image, bboxes = utils.image_preporcess(np.copy(image), [self.train_input_size, self.train_input_size], np.copy(bboxes)) + return image, bboxes + + def bbox_iou(self, boxes1, boxes2): + + boxes1 = np.array(boxes1) + boxes2 = np.array(boxes2) + + boxes1_area = boxes1[..., 2] * boxes1[..., 3] + boxes2_area = boxes2[..., 2] * boxes2[..., 3] + + boxes1 = np.concatenate([boxes1[..., :2] - boxes1[..., 2:] * 0.5, + boxes1[..., :2] + boxes1[..., 2:] * 0.5], axis=-1) + boxes2 = np.concatenate([boxes2[..., :2] - boxes2[..., 2:] * 0.5, + boxes2[..., :2] + boxes2[..., 2:] * 0.5], axis=-1) + + left_up = np.maximum(boxes1[..., :2], boxes2[..., :2]) + right_down = np.minimum(boxes1[..., 2:], boxes2[..., 2:]) + + inter_section = np.maximum(right_down - left_up, 0.0) + inter_area = inter_section[..., 0] * inter_section[..., 1] + union_area = boxes1_area + boxes2_area - inter_area + + return inter_area / union_area + + def preprocess_true_boxes(self, bboxes): + + label = [np.zeros((self.train_output_sizes[i], self.train_output_sizes[i], self.anchor_per_scale, + 5 + self.num_classes)) for i in range(3)] + bboxes_xywh = [np.zeros((self.max_bbox_per_scale, 4)) for _ in range(3)] + bbox_count = np.zeros((3,)) + + for bbox in bboxes: + bbox_coor = bbox[:4] + bbox_class_ind = bbox[4] + + onehot = np.zeros(self.num_classes, dtype=np.float) + onehot[bbox_class_ind] = 1.0 + uniform_distribution = np.full(self.num_classes, 1.0 / self.num_classes) + deta = 0.01 + smooth_onehot = onehot * (1 - deta) + deta * uniform_distribution + + bbox_xywh = np.concatenate([(bbox_coor[2:] + bbox_coor[:2]) * 0.5, bbox_coor[2:] - bbox_coor[:2]], axis=-1) + bbox_xywh_scaled = 1.0 * bbox_xywh[np.newaxis, :] / self.strides[:, np.newaxis] + + iou = [] + exist_positive = False + for i in range(3): + anchors_xywh = np.zeros((self.anchor_per_scale, 4)) + anchors_xywh[:, 0:2] = np.floor(bbox_xywh_scaled[i, 0:2]).astype(np.int32) + 0.5 + anchors_xywh[:, 2:4] = self.anchors[i] + + iou_scale = self.bbox_iou(bbox_xywh_scaled[i][np.newaxis, :], anchors_xywh) + iou.append(iou_scale) + iou_mask = iou_scale > 0.3 + + if np.any(iou_mask): + xind, yind = np.floor(bbox_xywh_scaled[i, 0:2]).astype(np.int32) + + label[i][yind, xind, iou_mask, :] = 0 + label[i][yind, xind, iou_mask, 0:4] = bbox_xywh + label[i][yind, xind, iou_mask, 4:5] = 1.0 + label[i][yind, xind, iou_mask, 5:] = smooth_onehot + + bbox_ind = int(bbox_count[i] % self.max_bbox_per_scale) + bboxes_xywh[i][bbox_ind, :4] = bbox_xywh + bbox_count[i] += 1 + + exist_positive = True + + if not exist_positive: + best_anchor_ind = np.argmax(np.array(iou).reshape(-1), axis=-1) + best_detect = int(best_anchor_ind / self.anchor_per_scale) + best_anchor = int(best_anchor_ind % self.anchor_per_scale) + xind, yind = np.floor(bbox_xywh_scaled[best_detect, 0:2]).astype(np.int32) + + label[best_detect][yind, xind, best_anchor, :] = 0 + label[best_detect][yind, xind, best_anchor, 0:4] = bbox_xywh + label[best_detect][yind, xind, best_anchor, 4:5] = 1.0 + label[best_detect][yind, xind, best_anchor, 5:] = smooth_onehot + + bbox_ind = int(bbox_count[best_detect] % self.max_bbox_per_scale) + bboxes_xywh[best_detect][bbox_ind, :4] = bbox_xywh + bbox_count[best_detect] += 1 + label_sbbox, label_mbbox, label_lbbox = label + sbboxes, mbboxes, lbboxes = bboxes_xywh + return label_sbbox, label_mbbox, label_lbbox, sbboxes, mbboxes, lbboxes + + def __len__(self): + return self.num_batchs + + + + diff --git a/4-Object_Detection/SSD/core/utils.py b/4-Object_Detection/SSD/core/utils.py new file mode 100644 index 00000000..4e4b6762 --- /dev/null +++ b/4-Object_Detection/SSD/core/utils.py @@ -0,0 +1,240 @@ +#! /usr/bin/env python +# coding=utf-8 +#================================================================ +# Copyright (C) 2019 * Ltd. All rights reserved. +# +# Editor : VIM +# File name : utils.py +# Author : YunYang1994 +# Created date: 2019-07-12 01:33:38 +# Description : +# +#================================================================ + +import cv2 +import random +import colorsys +import numpy as np +from core.config import cfg + +def load_weights(model, weights_file): + """ + I agree that this code is very ugly, but I don’t know any better way of doing it. + """ + wf = open(weights_file, 'rb') + major, minor, revision, seen, _ = np.fromfile(wf, dtype=np.int32, count=5) + + j = 0 + for i in range(75): + conv_layer_name = 'conv2d_%d' %i if i > 0 else 'conv2d' + bn_layer_name = 'batch_normalization_%d' %j if j > 0 else 'batch_normalization' + + conv_layer = model.get_layer(conv_layer_name) + filters = conv_layer.filters + k_size = conv_layer.kernel_size[0] + in_dim = conv_layer.input_shape[-1] + + if i not in [58, 66, 74]: + # darknet weights: [beta, gamma, mean, variance] + bn_weights = np.fromfile(wf, dtype=np.float32, count=4 * filters) + # tf weights: [gamma, beta, mean, variance] + bn_weights = bn_weights.reshape((4, filters))[[1, 0, 2, 3]] + bn_layer = model.get_layer(bn_layer_name) + j += 1 + else: + conv_bias = np.fromfile(wf, dtype=np.float32, count=filters) + + # darknet shape (out_dim, in_dim, height, width) + conv_shape = (filters, in_dim, k_size, k_size) + conv_weights = np.fromfile(wf, dtype=np.float32, count=np.product(conv_shape)) + # tf shape (height, width, in_dim, out_dim) + conv_weights = conv_weights.reshape(conv_shape).transpose([2, 3, 1, 0]) + + if i not in [58, 66, 74]: + conv_layer.set_weights([conv_weights]) + bn_layer.set_weights(bn_weights) + else: + conv_layer.set_weights([conv_weights, conv_bias]) + + assert len(wf.read()) == 0, 'failed to read all data' + wf.close() + + +def read_class_names(class_file_name): + '''loads class name from a file''' + names = {} + with open(class_file_name, 'r') as data: + for ID, name in enumerate(data): + names[ID] = name.strip('\n') + return names + + +def get_anchors(anchors_path): + '''loads the anchors from a file''' + with open(anchors_path) as f: + anchors = f.readline() + anchors = np.array(anchors.split(','), dtype=np.float32) + return anchors.reshape(3, 3, 2) + + +def image_preporcess(image, target_size, gt_boxes=None): + + ih, iw = target_size + h, w, _ = image.shape + + scale = min(iw/w, ih/h) + nw, nh = int(scale * w), int(scale * h) + image_resized = cv2.resize(image, (nw, nh)) + + image_paded = np.full(shape=[ih, iw, 3], fill_value=128.0) + dw, dh = (iw - nw) // 2, (ih-nh) // 2 + image_paded[dh:nh+dh, dw:nw+dw, :] = image_resized + image_paded = image_paded / 255. + + if gt_boxes is None: + return image_paded + + else: + gt_boxes[:, [0, 2]] = gt_boxes[:, [0, 2]] * scale + dw + gt_boxes[:, [1, 3]] = gt_boxes[:, [1, 3]] * scale + dh + return image_paded, gt_boxes + + +def draw_bbox(image, bboxes, classes=read_class_names(cfg.YOLO.CLASSES), show_label=True): + """ + bboxes: [x_min, y_min, x_max, y_max, probability, cls_id] format coordinates. + """ + + num_classes = len(classes) + image_h, image_w, _ = image.shape + hsv_tuples = [(1.0 * x / num_classes, 1., 1.) for x in range(num_classes)] + colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) + colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors)) + + random.seed(0) + random.shuffle(colors) + random.seed(None) + + for i, bbox in enumerate(bboxes): + coor = np.array(bbox[:4], dtype=np.int32) + fontScale = 0.5 + score = bbox[4] + class_ind = int(bbox[5]) + bbox_color = colors[class_ind] + bbox_thick = int(0.6 * (image_h + image_w) / 600) + c1, c2 = (coor[0], coor[1]), (coor[2], coor[3]) + cv2.rectangle(image, c1, c2, bbox_color, bbox_thick) + + if show_label: + bbox_mess = '%s: %.2f' % (classes[class_ind], score) + t_size = cv2.getTextSize(bbox_mess, 0, fontScale, thickness=bbox_thick//2)[0] + cv2.rectangle(image, c1, (c1[0] + t_size[0], c1[1] - t_size[1] - 3), bbox_color, -1) # filled + + cv2.putText(image, bbox_mess, (c1[0], c1[1]-2), cv2.FONT_HERSHEY_SIMPLEX, + fontScale, (0, 0, 0), bbox_thick//2, lineType=cv2.LINE_AA) + + return image + + + +def bboxes_iou(boxes1, boxes2): + + boxes1 = np.array(boxes1) + boxes2 = np.array(boxes2) + + boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1]) + boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1]) + + left_up = np.maximum(boxes1[..., :2], boxes2[..., :2]) + right_down = np.minimum(boxes1[..., 2:], boxes2[..., 2:]) + + inter_section = np.maximum(right_down - left_up, 0.0) + inter_area = inter_section[..., 0] * inter_section[..., 1] + union_area = boxes1_area + boxes2_area - inter_area + ious = np.maximum(1.0 * inter_area / union_area, np.finfo(np.float32).eps) + + return ious + + +def nms(bboxes, iou_threshold, sigma=0.3, method='nms'): + """ + :param bboxes: (xmin, ymin, xmax, ymax, score, class) + + Note: soft-nms, https://arxiv.org/pdf/1704.04503.pdf + https://github.com/bharatsingh430/soft-nms + """ + classes_in_img = list(set(bboxes[:, 5])) + best_bboxes = [] + + for cls in classes_in_img: + cls_mask = (bboxes[:, 5] == cls) + cls_bboxes = bboxes[cls_mask] + + while len(cls_bboxes) > 0: + max_ind = np.argmax(cls_bboxes[:, 4]) + best_bbox = cls_bboxes[max_ind] + best_bboxes.append(best_bbox) + cls_bboxes = np.concatenate([cls_bboxes[: max_ind], cls_bboxes[max_ind + 1:]]) + iou = bboxes_iou(best_bbox[np.newaxis, :4], cls_bboxes[:, :4]) + weight = np.ones((len(iou),), dtype=np.float32) + + assert method in ['nms', 'soft-nms'] + + if method == 'nms': + iou_mask = iou > iou_threshold + weight[iou_mask] = 0.0 + + if method == 'soft-nms': + weight = np.exp(-(1.0 * iou ** 2 / sigma)) + + cls_bboxes[:, 4] = cls_bboxes[:, 4] * weight + score_mask = cls_bboxes[:, 4] > 0. + cls_bboxes = cls_bboxes[score_mask] + + return best_bboxes + + +def postprocess_boxes(pred_bbox, org_img_shape, input_size, score_threshold): + + valid_scale=[0, np.inf] + pred_bbox = np.array(pred_bbox) + + pred_xywh = pred_bbox[:, 0:4] + pred_conf = pred_bbox[:, 4] + pred_prob = pred_bbox[:, 5:] + + # # (1) (x, y, w, h) --> (xmin, ymin, xmax, ymax) + pred_coor = np.concatenate([pred_xywh[:, :2] - pred_xywh[:, 2:] * 0.5, + pred_xywh[:, :2] + pred_xywh[:, 2:] * 0.5], axis=-1) + # # (2) (xmin, ymin, xmax, ymax) -> (xmin_org, ymin_org, xmax_org, ymax_org) + org_h, org_w = org_img_shape + resize_ratio = min(input_size / org_w, input_size / org_h) + + dw = (input_size - resize_ratio * org_w) / 2 + dh = (input_size - resize_ratio * org_h) / 2 + + pred_coor[:, 0::2] = 1.0 * (pred_coor[:, 0::2] - dw) / resize_ratio + pred_coor[:, 1::2] = 1.0 * (pred_coor[:, 1::2] - dh) / resize_ratio + + # # (3) clip some boxes those are out of range + pred_coor = np.concatenate([np.maximum(pred_coor[:, :2], [0, 0]), + np.minimum(pred_coor[:, 2:], [org_w - 1, org_h - 1])], axis=-1) + invalid_mask = np.logical_or((pred_coor[:, 0] > pred_coor[:, 2]), (pred_coor[:, 1] > pred_coor[:, 3])) + pred_coor[invalid_mask] = 0 + + # # (4) discard some invalid boxes + bboxes_scale = np.sqrt(np.multiply.reduce(pred_coor[:, 2:4] - pred_coor[:, 0:2], axis=-1)) + scale_mask = np.logical_and((valid_scale[0] < bboxes_scale), (bboxes_scale < valid_scale[1])) + + # # (5) discard some boxes with low scores + classes = np.argmax(pred_prob, axis=-1) + scores = pred_conf * pred_prob[np.arange(len(pred_coor)), classes] + score_mask = scores > score_threshold + mask = np.logical_and(scale_mask, score_mask) + coors, scores, classes = pred_coor[mask], scores[mask], classes[mask] + + return np.concatenate([coors, scores[:, np.newaxis], classes[:, np.newaxis]], axis=-1) + + + + diff --git a/4-Object_Detection/SSD/ssd.py b/4-Object_Detection/SSD/ssd.py index 48b8ec0b..1cdc08a6 100644 --- a/4-Object_Detection/SSD/ssd.py +++ b/4-Object_Detection/SSD/ssd.py @@ -12,113 +12,49 @@ #================================================================ import tensorflow as tf - +from core.backbone import vgg16 class SSD(tf.keras.Model): - def __init__(self, num_class=21): + def __init__(self, input_data, num_class=21): super(SSD, self).__init__() # conv1 - self.conv1_1 = tf.keras.layers.Conv2D(64, 3, activation='relu', padding='same') - self.conv1_2 = tf.keras.layers.Conv2D(64, 3, activation='relu', padding='same') - self.pool1 = tf.keras.layers.MaxPooling2D(2, strides=2, padding='same') - - # conv2 - self.conv2_1 = tf.keras.layers.Conv2D(128, 3, activation='relu', padding='same') - self.conv2_2 = tf.keras.layers.Conv2D(128, 3, activation='relu', padding='same') - self.pool2 = tf.keras.layers.MaxPooling2D(2, strides=2, padding='same') - - # conv3 - self.conv3_1 = tf.keras.layers.Conv2D(256, 3, activation='relu', padding='same') - self.conv3_2 = tf.keras.layers.Conv2D(256, 3, activation='relu', padding='same') - self.conv3_3 = tf.keras.layers.Conv2D(256, 3, activation='relu', padding='same') - self.pool3 = tf.keras.layers.MaxPooling2D(2, strides=2, padding='same') - - # conv4 - self.conv4_1 = tf.keras.layers.Conv2D(512, 3, activation='relu', padding='same') - self.conv4_2 = tf.keras.layers.Conv2D(512, 3, activation='relu', padding='same') - self.conv4_3 = tf.keras.layers.Conv2D(512, 3, activation='relu', padding='same') - self.pool4 = tf.keras.layers.MaxPooling2D(2, strides=2, padding='same') - - # conv5 - self.conv5_1 = tf.keras.layers.Conv2D(512, 3, activation='relu', padding='same') - self.conv5_2 = tf.keras.layers.Conv2D(512, 3, activation='relu', padding='same') - self.conv5_3 = tf.keras.layers.Conv2D(512, 3, activation='relu', padding='same') - self.pool5 = tf.keras.layers.MaxPooling2D(3, strides=1, padding='same') - - # fc6, => vgg backbone is finished. now they are all SSD blocks - self.fc6 = tf.keras.layers.Conv2D(1024, 3, dilation_rate=6, activation='relu', padding='same') + conv4, conv = vgg16(input_data) + self.conv4 = conv4 + # fc6, from now they are all SSD blocks + conv = tf.keras.layers.Conv2D(1024, 3, dilation_rate=6, activation='relu', padding='same')(conv)#fc6 # fc7 - self.fc7 = tf.keras.layers.Conv2D(1024, 1, activation='relu', padding='same') - # Block 8/9/10/11: 1x1 and 3x3 convolutions strides 2 (except lasts) + conv = tf.keras.layers.Conv2D(1024, 1, activation='relu', padding='same')(conv)#fc7 + self.conv7 = conv + # Block 8/9/10/11: 1x1 and 3x3 convolutions strides 2 (except the last 2 layers) + # conv8 - self.conv8_1 = tf.keras.layers.Conv2D(256, 1, activation='relu', padding='same') - self.conv8_2 = tf.keras.layers.Conv2D(512, 3, strides=2, activation='relu', padding='same') + conv = tf.keras.layers.Conv2D(256, 1, activation='relu', padding='same')(conv) + conv = tf.keras.layers.Conv2D(512, 3, strides=2, activation='relu', padding='same')(conv) + self.conv8 = conv # conv9 - self.conv9_1 = tf.keras.layers.Conv2D(128, 1, activation='relu', padding='same') - self.conv9_2 = tf.keras.layers.Conv2D(256, 3, strides=2, activation='relu', padding='same') + conv = tf.keras.layers.Conv2D(128, 1, activation='relu', padding='same')(conv) + conv = tf.keras.layers.Conv2D(256, 3, strides=2, activation='relu', padding='same')(conv) + self.conv9 = conv # conv10 - self.conv10_1 = tf.keras.layers.Conv2D(128, 1, activation='relu', padding='same') - self.conv10_2 = tf.keras.layers.Conv2D(256, 3, activation='relu', padding='valid') + conv = tf.keras.layers.Conv2D(128, 1, activation='relu', padding='same')(conv) + conv = tf.keras.layers.Conv2D(256, 3, activation='relu', padding='valid')(conv) + self.conv10 = conv # conv11 - self.conv11_1 = tf.keras.layers.Conv2D(128, 1, activation='relu', padding='same') - self.conv11_2 = tf.keras.layers.Conv2D(256, 3, activation='relu', padding='valid') - - - - def call(self, x, training=False): - h = self.conv1_1(x) - h = self.conv1_2(h) - h = self.pool1(h) - - h = self.conv2_1(h) - h = self.conv2_2(h) - h = self.pool2(h) - - h = self.conv3_1(h) - h = self.conv3_2(h) - h = self.conv3_3(h) - h = self.pool3(h) - - h = self.conv4_1(h) - h = self.conv4_2(h) - h = self.conv4_3(h) - conv4 = h - print(h.shape) - h = self.pool4(h) - - h = self.conv5_1(h) - h = self.conv5_2(h) - h = self.conv5_3(h) - h = self.pool5(h) - - h = self.fc6(h) # [1,19,19,1024] - h = self.fc7(h) # [1,19,19,1024] - conv7 = h - print(h.shape) - - h = self.conv8_1(h) - h = self.conv8_2(h) # [1,10,10, 512] - conv8 = h - print(h.shape) - - h = self.conv9_1(h) - h = self.conv9_2(h) # [1, 5, 5, 256] - conv9 = h - print(h.shape) - - h = self.conv10_1(h) - h = self.conv10_2(h) # [1, 3, 3, 256] - conv10 = h - print(h.shape) - - h = self.conv11_1(h) - h = self.conv11_2(h) # [1, 1, 1, 256] - conv11 = h - print(h.shape) - return h - -conv4, conv7, conv8, conv9, conv10, conv11 = SSD(21) -x = conv11(tf.ones(shape=[1,300,300,3])) + conv = tf.keras.layers.Conv2D(128, 1, activation='relu', padding='same')(conv) + conv = tf.keras.layers.Conv2D(256, 3, activation='relu', padding='valid')(conv) + self.conv11 = conv + + def display(self): + print(self.conv4.shape) + print(self.conv7.shape) + print(self.conv8.shape) + print(self.conv9.shape) + print(self.conv10.shape) + print(self.conv11.shape) + return self.conv4, self.conv7, self.conv8, self.conv9, self.conv10, self.conv11 + +model = SSD(tf.ones(shape=[1,300,300,3]),21) +model.display() From 19f9e694e4ba93fe1d1adca241d4d7991a277aee Mon Sep 17 00:00:00 2001 From: WeikangWang Date: Sat, 14 Dec 2019 23:14:15 -0500 Subject: [PATCH 5/5] 1. adjusted ssd model per the structure in the SSD paper (Fig. 2) 2. move the common file from YOLO_V3 to the core folder --- 4-Object_Detection/SSD/core/common.py | 58 +++++++++++++++++++++++++++ 4-Object_Detection/SSD/ssd.py | 13 +++--- 2 files changed, 64 insertions(+), 7 deletions(-) create mode 100644 4-Object_Detection/SSD/core/common.py diff --git a/4-Object_Detection/SSD/core/common.py b/4-Object_Detection/SSD/core/common.py new file mode 100644 index 00000000..88cc8884 --- /dev/null +++ b/4-Object_Detection/SSD/core/common.py @@ -0,0 +1,58 @@ +#! /usr/bin/env python +# coding=utf-8 +#================================================================ +# Copyright (C) 2019 * Ltd. All rights reserved. +# +# Editor : VIM +# File name : common.py +# Author : YunYang1994 +# Created date: 2019-07-11 23:12:53 +# Description : +# +#================================================================ + +import tensorflow as tf + +class BatchNormalization(tf.keras.layers.BatchNormalization): + """ + "Frozen state" and "inference mode" are two separate concepts. + `layer.trainable = False` is to freeze the layer, so the layer will use + stored moving `var` and `mean` in the "inference mode", and both `gama` + and `beta` will not be updated ! + """ + def call(self, x, training=False): + if not training: + training = tf.constant(False) + training = tf.logical_and(training, self.trainable) + return super().call(x, training) + +def convolutional(input_layer, filters_shape, downsample=False, activate=True, bn=True): + if downsample: + input_layer = tf.keras.layers.ZeroPadding2D(((1, 0), (1, 0)))(input_layer) + padding = 'valid' + strides = 2 + else: + strides = 1 + padding = 'same' + + conv = tf.keras.layers.Conv2D(filters=filters_shape[-1], kernel_size = filters_shape[0], strides=strides, padding=padding, + use_bias=not bn, kernel_regularizer=tf.keras.regularizers.l2(0.0005), + kernel_initializer=tf.random_normal_initializer(stddev=0.01), + bias_initializer=tf.constant_initializer(0.))(input_layer) + + if bn: conv = BatchNormalization()(conv) + if activate == True: conv = tf.nn.leaky_relu(conv, alpha=0.1) + + return conv + +def residual_block(input_layer, input_channel, filter_num1, filter_num2): + short_cut = input_layer + conv = convolutional(input_layer, filters_shape=(1, 1, input_channel, filter_num1)) + conv = convolutional(conv , filters_shape=(3, 3, filter_num1, filter_num2)) + + residual_output = short_cut + conv + return residual_output + +def upsample(input_layer): + return tf.image.resize(input_layer, (input_layer.shape[1] * 2, input_layer.shape[2] * 2), method='nearest') + diff --git a/4-Object_Detection/SSD/ssd.py b/4-Object_Detection/SSD/ssd.py index 1cdc08a6..f049b17c 100644 --- a/4-Object_Detection/SSD/ssd.py +++ b/4-Object_Detection/SSD/ssd.py @@ -19,30 +19,29 @@ def __init__(self, input_data, num_class=21): super(SSD, self).__init__() # conv1 conv4, conv = vgg16(input_data) - self.conv4 = conv4 + self.conv4 = tf.keras.layers.Conv2D(4*(num_class + 5),3, padding='same')(conv4) # fc6, from now they are all SSD blocks conv = tf.keras.layers.Conv2D(1024, 3, dilation_rate=6, activation='relu', padding='same')(conv)#fc6 # fc7 conv = tf.keras.layers.Conv2D(1024, 1, activation='relu', padding='same')(conv)#fc7 - self.conv7 = conv + self.conv7 = tf.keras.layers.Conv2D(6*(num_class + 5), 3, padding='same')(conv) # Block 8/9/10/11: 1x1 and 3x3 convolutions strides 2 (except the last 2 layers) - # conv8 conv = tf.keras.layers.Conv2D(256, 1, activation='relu', padding='same')(conv) conv = tf.keras.layers.Conv2D(512, 3, strides=2, activation='relu', padding='same')(conv) - self.conv8 = conv + self.conv8 = tf.keras.layers.Conv2D(6*(num_class + 5),3, padding='same')(conv) # conv9 conv = tf.keras.layers.Conv2D(128, 1, activation='relu', padding='same')(conv) conv = tf.keras.layers.Conv2D(256, 3, strides=2, activation='relu', padding='same')(conv) - self.conv9 = conv + self.conv9 = tf.keras.layers.Conv2D(6*(num_class + 5),3, padding='same')(conv) # conv10 conv = tf.keras.layers.Conv2D(128, 1, activation='relu', padding='same')(conv) conv = tf.keras.layers.Conv2D(256, 3, activation='relu', padding='valid')(conv) - self.conv10 = conv + self.conv10 = tf.keras.layers.Conv2D(4*(num_class + 5),3, padding='same')(conv) # conv11 conv = tf.keras.layers.Conv2D(128, 1, activation='relu', padding='same')(conv) conv = tf.keras.layers.Conv2D(256, 3, activation='relu', padding='valid')(conv) - self.conv11 = conv + self.conv11 = tf.keras.layers.Conv2D(4*(num_class + 5),3, padding='same')(conv) def display(self): print(self.conv4.shape)