From f8d9ddb962e3f02a22fd2d30abd6a6724b509024 Mon Sep 17 00:00:00 2001
From: WeikangWang <wwang72@vols.utk.edu>
Date: Sat, 14 Dec 2019 16:23:03 -0500
Subject: [PATCH 1/5] adapter the darknet53 model to a much simpler model to
 fit the easy data for power system

---
 .../YOLOV3/core/backbone_fnet.py              |  50 +++++
 4-Object_Detection/YOLOV3/core/yolov3_fnet.py | 195 ++++++++++++++++++
 2 files changed, 245 insertions(+)
 create mode 100644 4-Object_Detection/YOLOV3/core/backbone_fnet.py
 create mode 100644 4-Object_Detection/YOLOV3/core/yolov3_fnet.py

diff --git a/4-Object_Detection/YOLOV3/core/backbone_fnet.py b/4-Object_Detection/YOLOV3/core/backbone_fnet.py
new file mode 100644
index 00000000..c56d4ce7
--- /dev/null
+++ b/4-Object_Detection/YOLOV3/core/backbone_fnet.py
@@ -0,0 +1,50 @@
+#! /usr/bin/env python
+# coding=utf-8
+#================================================================
+#   Copyright (C) 2019 * Ltd. All rights reserved.
+#
+#   Editor      : VIM
+#   File name   : backbone.py
+#   Author      : YunYang1994
+#   Created date: 2019-07-11 23:37:51
+#   Description :
+#
+#================================================================
+
+import tensorflow as tf
+import core.common as common
+
+
+def grid_eye_net_18(input_data):
+
+    input_data = common.convolutional(input_data, (3, 3,  3,  32))
+    input_data = common.convolutional(input_data, (3, 3, 32,  64), downsample=True)
+
+    for i in range(1):
+        input_data = common.residual_block(input_data,  64,  32, 64)
+
+    input_data = common.convolutional(input_data, (3, 3,  64, 128), downsample=True)
+
+    for i in range(1):
+        input_data = common.residual_block(input_data, 128,  64, 128)
+
+    input_data = common.convolutional(input_data, (3, 3, 128, 256), downsample=True)
+
+    for i in range(2):
+        input_data = common.residual_block(input_data, 256, 128, 256)
+
+    route_1 = input_data
+    input_data = common.convolutional(input_data, (3, 3, 256, 512), downsample=True)
+
+    for i in range(2):
+        input_data = common.residual_block(input_data, 512, 256, 512)
+
+    route_2 = input_data
+    input_data = common.convolutional(input_data, (3, 3, 512, 1024), downsample=True)
+
+    for i in range(1):
+        input_data = common.residual_block(input_data, 1024, 512, 512)
+
+    return route_1, route_2, input_data
+
+
diff --git a/4-Object_Detection/YOLOV3/core/yolov3_fnet.py b/4-Object_Detection/YOLOV3/core/yolov3_fnet.py
new file mode 100644
index 00000000..3066cb0e
--- /dev/null
+++ b/4-Object_Detection/YOLOV3/core/yolov3_fnet.py
@@ -0,0 +1,195 @@
+#! /usr/bin/env python
+# coding=utf-8
+#================================================================
+#   Copyright (C) 2019 * Ltd. All rights reserved.
+#
+#   Editor      : VIM
+#   File name   : yolov3.py
+#   Author      : YunYang1994
+#   Created date: 2019-07-12 13:47:10
+#   Description :
+#
+#================================================================
+
+import numpy as np
+import tensorflow as tf
+import core.utils as utils
+import core.common as common
+import core.backbone as backbone
+from core.config import cfg
+
+
+NUM_CLASS       = len(utils.read_class_names(cfg.YOLO.CLASSES))
+ANCHORS         = utils.get_anchors(cfg.YOLO.ANCHORS)
+STRIDES         = np.array(cfg.YOLO.STRIDES)
+IOU_LOSS_THRESH = cfg.YOLO.IOU_LOSS_THRESH
+
+def YOLOv3(input_layer):
+    route_1, route_2, conv = backbone.darknet53(input_layer)
+
+    conv = common.convolutional(conv, (1, 1, 512,  256))
+    conv = common.convolutional(conv, (3, 3,  256, 512))
+    conv = common.convolutional(conv, (1, 1, 512,  256))
+
+    conv_lobj_branch = common.convolutional(conv, (3, 3, 256, 512))
+    conv_lbbox = common.convolutional(conv_lobj_branch, (1, 1, 512, 3*(NUM_CLASS + 5)), activate=False, bn=False)
+
+    conv = common.convolutional(conv, (1, 1,  256,  128))
+    conv = common.upsample(conv)
+
+    conv = tf.concat([conv, route_2], axis=-1)
+
+    conv = common.convolutional(conv, (1, 1, 768, 256))#512+256
+    conv = common.convolutional(conv, (3, 3, 256, 512))
+    conv = common.convolutional(conv, (1, 1, 512, 256))
+
+    conv_mobj_branch = common.convolutional(conv, (3, 3, 256, 512))
+    conv_mbbox = common.convolutional(conv_mobj_branch, (1, 1, 512, 3*(NUM_CLASS + 5)), activate=False, bn=False)
+
+    conv = common.convolutional(conv, (1, 1, 256, 128))
+    conv = common.upsample(conv)
+
+    conv = tf.concat([conv, route_1], axis=-1)
+
+    conv = common.convolutional(conv, (1, 1, 512, 128))#256+256
+    conv = common.convolutional(conv, (3, 3, 128, 256))
+    conv = common.convolutional(conv, (1, 1, 256, 128))
+
+    conv_sobj_branch = common.convolutional(conv, (3, 3, 128, 256))
+    conv_sbbox = common.convolutional(conv_sobj_branch, (1, 1, 256, 3*(NUM_CLASS +5)), activate=False, bn=False)
+
+    return [conv_sbbox, conv_mbbox, conv_lbbox]
+
+def decode(conv_output, i=0):
+    """
+    return tensor of shape [batch_size, output_size, output_size, anchor_per_scale, 5 + num_classes]
+            contains (x, y, w, h, score, probability)
+    """
+
+    conv_shape       = tf.shape(conv_output)
+    batch_size       = conv_shape[0]
+    output_size      = conv_shape[1]
+
+    conv_output = tf.reshape(conv_output, (batch_size, output_size, output_size, 3, 5 + NUM_CLASS))
+
+    conv_raw_dxdy = conv_output[:, :, :, :, 0:2]
+    conv_raw_dwdh = conv_output[:, :, :, :, 2:4]
+    conv_raw_conf = conv_output[:, :, :, :, 4:5]
+    conv_raw_prob = conv_output[:, :, :, :, 5: ]
+
+    y = tf.tile(tf.range(output_size, dtype=tf.int32)[:, tf.newaxis], [1, output_size])
+    x = tf.tile(tf.range(output_size, dtype=tf.int32)[tf.newaxis, :], [output_size, 1])
+
+    xy_grid = tf.concat([x[:, :, tf.newaxis], y[:, :, tf.newaxis]], axis=-1)
+    xy_grid = tf.tile(xy_grid[tf.newaxis, :, :, tf.newaxis, :], [batch_size, 1, 1, 3, 1])
+    xy_grid = tf.cast(xy_grid, tf.float32)
+
+    pred_xy = (tf.sigmoid(conv_raw_dxdy) + xy_grid) * STRIDES[i]
+    pred_wh = (tf.exp(conv_raw_dwdh) * ANCHORS[i]) * STRIDES[i]
+    pred_xywh = tf.concat([pred_xy, pred_wh], axis=-1)
+
+    pred_conf = tf.sigmoid(conv_raw_conf)
+    pred_prob = tf.sigmoid(conv_raw_prob)
+
+    return tf.concat([pred_xywh, pred_conf, pred_prob], axis=-1)
+
+def bbox_iou(boxes1, boxes2):
+
+    boxes1_area = boxes1[..., 2] * boxes1[..., 3]
+    boxes2_area = boxes2[..., 2] * boxes2[..., 3]
+
+    boxes1 = tf.concat([boxes1[..., :2] - boxes1[..., 2:] * 0.5,
+                        boxes1[..., :2] + boxes1[..., 2:] * 0.5], axis=-1)
+    boxes2 = tf.concat([boxes2[..., :2] - boxes2[..., 2:] * 0.5,
+                        boxes2[..., :2] + boxes2[..., 2:] * 0.5], axis=-1)
+
+    left_up = tf.maximum(boxes1[..., :2], boxes2[..., :2])
+    right_down = tf.minimum(boxes1[..., 2:], boxes2[..., 2:])
+
+    inter_section = tf.maximum(right_down - left_up, 0.0)
+    inter_area = inter_section[..., 0] * inter_section[..., 1]
+    union_area = boxes1_area + boxes2_area - inter_area
+
+    return 1.0 * inter_area / union_area
+
+def bbox_giou(boxes1, boxes2):
+
+    boxes1 = tf.concat([boxes1[..., :2] - boxes1[..., 2:] * 0.5,
+                        boxes1[..., :2] + boxes1[..., 2:] * 0.5], axis=-1)
+    boxes2 = tf.concat([boxes2[..., :2] - boxes2[..., 2:] * 0.5,
+                        boxes2[..., :2] + boxes2[..., 2:] * 0.5], axis=-1)
+
+    boxes1 = tf.concat([tf.minimum(boxes1[..., :2], boxes1[..., 2:]),
+                        tf.maximum(boxes1[..., :2], boxes1[..., 2:])], axis=-1)
+    boxes2 = tf.concat([tf.minimum(boxes2[..., :2], boxes2[..., 2:]),
+                        tf.maximum(boxes2[..., :2], boxes2[..., 2:])], axis=-1)
+
+    boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1])
+    boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1])
+
+    left_up = tf.maximum(boxes1[..., :2], boxes2[..., :2])
+    right_down = tf.minimum(boxes1[..., 2:], boxes2[..., 2:])
+
+    inter_section = tf.maximum(right_down - left_up, 0.0)
+    inter_area = inter_section[..., 0] * inter_section[..., 1]
+    union_area = boxes1_area + boxes2_area - inter_area
+    iou = inter_area / union_area
+
+    enclose_left_up = tf.minimum(boxes1[..., :2], boxes2[..., :2])
+    enclose_right_down = tf.maximum(boxes1[..., 2:], boxes2[..., 2:])
+    enclose = tf.maximum(enclose_right_down - enclose_left_up, 0.0)
+    enclose_area = enclose[..., 0] * enclose[..., 1]
+    giou = iou - 1.0 * (enclose_area - union_area) / enclose_area
+
+    return giou
+
+
+def compute_loss(pred, conv, label, bboxes, i=0):
+
+    conv_shape  = tf.shape(conv)
+    batch_size  = conv_shape[0]
+    output_size = conv_shape[1]
+    input_size  = STRIDES[i] * output_size
+    conv = tf.reshape(conv, (batch_size, output_size, output_size, 3, 5 + NUM_CLASS))
+
+    conv_raw_conf = conv[:, :, :, :, 4:5]
+    conv_raw_prob = conv[:, :, :, :, 5:]
+
+    pred_xywh     = pred[:, :, :, :, 0:4]
+    pred_conf     = pred[:, :, :, :, 4:5]
+
+    label_xywh    = label[:, :, :, :, 0:4]
+    respond_bbox  = label[:, :, :, :, 4:5]
+    label_prob    = label[:, :, :, :, 5:]
+
+    giou = tf.expand_dims(bbox_giou(pred_xywh, label_xywh), axis=-1)
+    input_size = tf.cast(input_size, tf.float32)
+
+    bbox_loss_scale = 2.0 - 1.0 * label_xywh[:, :, :, :, 2:3] * label_xywh[:, :, :, :, 3:4] / (input_size ** 2)
+    giou_loss = respond_bbox * bbox_loss_scale * (1- giou)
+
+    iou = bbox_iou(pred_xywh[:, :, :, :, np.newaxis, :], bboxes[:, np.newaxis, np.newaxis, np.newaxis, :, :])
+    max_iou = tf.expand_dims(tf.reduce_max(iou, axis=-1), axis=-1)
+
+    respond_bgd = (1.0 - respond_bbox) * tf.cast( max_iou < IOU_LOSS_THRESH, tf.float32 )
+
+    conf_focal = tf.pow(respond_bbox - pred_conf, 2)
+
+    conf_loss = conf_focal * (
+            respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits(labels=respond_bbox, logits=conv_raw_conf)
+            +
+            respond_bgd * tf.nn.sigmoid_cross_entropy_with_logits(labels=respond_bbox, logits=conv_raw_conf)
+    )
+
+    prob_loss = respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits(labels=label_prob, logits=conv_raw_prob)
+
+    giou_loss = tf.reduce_mean(tf.reduce_sum(giou_loss, axis=[1,2,3,4]))
+    conf_loss = tf.reduce_mean(tf.reduce_sum(conf_loss, axis=[1,2,3,4]))
+    prob_loss = tf.reduce_mean(tf.reduce_sum(prob_loss, axis=[1,2,3,4]))
+
+    return giou_loss, conf_loss, prob_loss
+
+
+
+
+

From 688cf9c6329ac06d24450519c9d3728cae2fee82 Mon Sep 17 00:00:00 2001
From: WeikangWang <wwang72@vols.utk.edu>
Date: Sat, 14 Dec 2019 20:04:32 -0500
Subject: [PATCH 2/5] extract feature maps per the paper

---
 4-Object_Detection/SSD/ssd.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/4-Object_Detection/SSD/ssd.py b/4-Object_Detection/SSD/ssd.py
index a2360580..48b8ec0b 100644
--- a/4-Object_Detection/SSD/ssd.py
+++ b/4-Object_Detection/SSD/ssd.py
@@ -82,6 +82,7 @@ def call(self, x, training=False):
         h = self.conv4_1(h)
         h = self.conv4_2(h)
         h = self.conv4_3(h)
+        conv4 = h
         print(h.shape)
         h = self.pool4(h)
 
@@ -92,27 +93,32 @@ def call(self, x, training=False):
 
         h = self.fc6(h)     # [1,19,19,1024]
         h = self.fc7(h)     # [1,19,19,1024]
+        conv7 = h
         print(h.shape)
 
         h = self.conv8_1(h)
         h = self.conv8_2(h) # [1,10,10, 512]
+        conv8 = h
         print(h.shape)
 
         h = self.conv9_1(h)
         h = self.conv9_2(h) # [1, 5, 5, 256]
+        conv9 = h
         print(h.shape)
 
         h = self.conv10_1(h)
         h = self.conv10_2(h) # [1, 3, 3, 256]
+        conv10 = h
         print(h.shape)
 
         h = self.conv11_1(h)
         h = self.conv11_2(h) # [1, 1, 1, 256]
+        conv11 = h
         print(h.shape)
         return h
 
-model = SSD(21)
-x = model(tf.ones(shape=[1,300,300,3]))
+conv4, conv7, conv8, conv9, conv10, conv11 = SSD(21)
+x = conv11(tf.ones(shape=[1,300,300,3]))
 
 
 

From a6af75aa4d66e61187463657e27bb6430ce49edb Mon Sep 17 00:00:00 2001
From: WeikangWang <wwang72@vols.utk.edu>
Date: Sat, 14 Dec 2019 20:08:05 -0500
Subject: [PATCH 3/5] Revert "adapter the darknet53 model to a much simpler
 model to fit the easy data for power system"

This reverts commit f8d9ddb962e3f02a22fd2d30abd6a6724b509024.
---
 .../YOLOV3/core/backbone_fnet.py              |  50 -----
 4-Object_Detection/YOLOV3/core/yolov3_fnet.py | 195 ------------------
 2 files changed, 245 deletions(-)
 delete mode 100644 4-Object_Detection/YOLOV3/core/backbone_fnet.py
 delete mode 100644 4-Object_Detection/YOLOV3/core/yolov3_fnet.py

diff --git a/4-Object_Detection/YOLOV3/core/backbone_fnet.py b/4-Object_Detection/YOLOV3/core/backbone_fnet.py
deleted file mode 100644
index c56d4ce7..00000000
--- a/4-Object_Detection/YOLOV3/core/backbone_fnet.py
+++ /dev/null
@@ -1,50 +0,0 @@
-#! /usr/bin/env python
-# coding=utf-8
-#================================================================
-#   Copyright (C) 2019 * Ltd. All rights reserved.
-#
-#   Editor      : VIM
-#   File name   : backbone.py
-#   Author      : YunYang1994
-#   Created date: 2019-07-11 23:37:51
-#   Description :
-#
-#================================================================
-
-import tensorflow as tf
-import core.common as common
-
-
-def grid_eye_net_18(input_data):
-
-    input_data = common.convolutional(input_data, (3, 3,  3,  32))
-    input_data = common.convolutional(input_data, (3, 3, 32,  64), downsample=True)
-
-    for i in range(1):
-        input_data = common.residual_block(input_data,  64,  32, 64)
-
-    input_data = common.convolutional(input_data, (3, 3,  64, 128), downsample=True)
-
-    for i in range(1):
-        input_data = common.residual_block(input_data, 128,  64, 128)
-
-    input_data = common.convolutional(input_data, (3, 3, 128, 256), downsample=True)
-
-    for i in range(2):
-        input_data = common.residual_block(input_data, 256, 128, 256)
-
-    route_1 = input_data
-    input_data = common.convolutional(input_data, (3, 3, 256, 512), downsample=True)
-
-    for i in range(2):
-        input_data = common.residual_block(input_data, 512, 256, 512)
-
-    route_2 = input_data
-    input_data = common.convolutional(input_data, (3, 3, 512, 1024), downsample=True)
-
-    for i in range(1):
-        input_data = common.residual_block(input_data, 1024, 512, 512)
-
-    return route_1, route_2, input_data
-
-
diff --git a/4-Object_Detection/YOLOV3/core/yolov3_fnet.py b/4-Object_Detection/YOLOV3/core/yolov3_fnet.py
deleted file mode 100644
index 3066cb0e..00000000
--- a/4-Object_Detection/YOLOV3/core/yolov3_fnet.py
+++ /dev/null
@@ -1,195 +0,0 @@
-#! /usr/bin/env python
-# coding=utf-8
-#================================================================
-#   Copyright (C) 2019 * Ltd. All rights reserved.
-#
-#   Editor      : VIM
-#   File name   : yolov3.py
-#   Author      : YunYang1994
-#   Created date: 2019-07-12 13:47:10
-#   Description :
-#
-#================================================================
-
-import numpy as np
-import tensorflow as tf
-import core.utils as utils
-import core.common as common
-import core.backbone as backbone
-from core.config import cfg
-
-
-NUM_CLASS       = len(utils.read_class_names(cfg.YOLO.CLASSES))
-ANCHORS         = utils.get_anchors(cfg.YOLO.ANCHORS)
-STRIDES         = np.array(cfg.YOLO.STRIDES)
-IOU_LOSS_THRESH = cfg.YOLO.IOU_LOSS_THRESH
-
-def YOLOv3(input_layer):
-    route_1, route_2, conv = backbone.darknet53(input_layer)
-
-    conv = common.convolutional(conv, (1, 1, 512,  256))
-    conv = common.convolutional(conv, (3, 3,  256, 512))
-    conv = common.convolutional(conv, (1, 1, 512,  256))
-
-    conv_lobj_branch = common.convolutional(conv, (3, 3, 256, 512))
-    conv_lbbox = common.convolutional(conv_lobj_branch, (1, 1, 512, 3*(NUM_CLASS + 5)), activate=False, bn=False)
-
-    conv = common.convolutional(conv, (1, 1,  256,  128))
-    conv = common.upsample(conv)
-
-    conv = tf.concat([conv, route_2], axis=-1)
-
-    conv = common.convolutional(conv, (1, 1, 768, 256))#512+256
-    conv = common.convolutional(conv, (3, 3, 256, 512))
-    conv = common.convolutional(conv, (1, 1, 512, 256))
-
-    conv_mobj_branch = common.convolutional(conv, (3, 3, 256, 512))
-    conv_mbbox = common.convolutional(conv_mobj_branch, (1, 1, 512, 3*(NUM_CLASS + 5)), activate=False, bn=False)
-
-    conv = common.convolutional(conv, (1, 1, 256, 128))
-    conv = common.upsample(conv)
-
-    conv = tf.concat([conv, route_1], axis=-1)
-
-    conv = common.convolutional(conv, (1, 1, 512, 128))#256+256
-    conv = common.convolutional(conv, (3, 3, 128, 256))
-    conv = common.convolutional(conv, (1, 1, 256, 128))
-
-    conv_sobj_branch = common.convolutional(conv, (3, 3, 128, 256))
-    conv_sbbox = common.convolutional(conv_sobj_branch, (1, 1, 256, 3*(NUM_CLASS +5)), activate=False, bn=False)
-
-    return [conv_sbbox, conv_mbbox, conv_lbbox]
-
-def decode(conv_output, i=0):
-    """
-    return tensor of shape [batch_size, output_size, output_size, anchor_per_scale, 5 + num_classes]
-            contains (x, y, w, h, score, probability)
-    """
-
-    conv_shape       = tf.shape(conv_output)
-    batch_size       = conv_shape[0]
-    output_size      = conv_shape[1]
-
-    conv_output = tf.reshape(conv_output, (batch_size, output_size, output_size, 3, 5 + NUM_CLASS))
-
-    conv_raw_dxdy = conv_output[:, :, :, :, 0:2]
-    conv_raw_dwdh = conv_output[:, :, :, :, 2:4]
-    conv_raw_conf = conv_output[:, :, :, :, 4:5]
-    conv_raw_prob = conv_output[:, :, :, :, 5: ]
-
-    y = tf.tile(tf.range(output_size, dtype=tf.int32)[:, tf.newaxis], [1, output_size])
-    x = tf.tile(tf.range(output_size, dtype=tf.int32)[tf.newaxis, :], [output_size, 1])
-
-    xy_grid = tf.concat([x[:, :, tf.newaxis], y[:, :, tf.newaxis]], axis=-1)
-    xy_grid = tf.tile(xy_grid[tf.newaxis, :, :, tf.newaxis, :], [batch_size, 1, 1, 3, 1])
-    xy_grid = tf.cast(xy_grid, tf.float32)
-
-    pred_xy = (tf.sigmoid(conv_raw_dxdy) + xy_grid) * STRIDES[i]
-    pred_wh = (tf.exp(conv_raw_dwdh) * ANCHORS[i]) * STRIDES[i]
-    pred_xywh = tf.concat([pred_xy, pred_wh], axis=-1)
-
-    pred_conf = tf.sigmoid(conv_raw_conf)
-    pred_prob = tf.sigmoid(conv_raw_prob)
-
-    return tf.concat([pred_xywh, pred_conf, pred_prob], axis=-1)
-
-def bbox_iou(boxes1, boxes2):
-
-    boxes1_area = boxes1[..., 2] * boxes1[..., 3]
-    boxes2_area = boxes2[..., 2] * boxes2[..., 3]
-
-    boxes1 = tf.concat([boxes1[..., :2] - boxes1[..., 2:] * 0.5,
-                        boxes1[..., :2] + boxes1[..., 2:] * 0.5], axis=-1)
-    boxes2 = tf.concat([boxes2[..., :2] - boxes2[..., 2:] * 0.5,
-                        boxes2[..., :2] + boxes2[..., 2:] * 0.5], axis=-1)
-
-    left_up = tf.maximum(boxes1[..., :2], boxes2[..., :2])
-    right_down = tf.minimum(boxes1[..., 2:], boxes2[..., 2:])
-
-    inter_section = tf.maximum(right_down - left_up, 0.0)
-    inter_area = inter_section[..., 0] * inter_section[..., 1]
-    union_area = boxes1_area + boxes2_area - inter_area
-
-    return 1.0 * inter_area / union_area
-
-def bbox_giou(boxes1, boxes2):
-
-    boxes1 = tf.concat([boxes1[..., :2] - boxes1[..., 2:] * 0.5,
-                        boxes1[..., :2] + boxes1[..., 2:] * 0.5], axis=-1)
-    boxes2 = tf.concat([boxes2[..., :2] - boxes2[..., 2:] * 0.5,
-                        boxes2[..., :2] + boxes2[..., 2:] * 0.5], axis=-1)
-
-    boxes1 = tf.concat([tf.minimum(boxes1[..., :2], boxes1[..., 2:]),
-                        tf.maximum(boxes1[..., :2], boxes1[..., 2:])], axis=-1)
-    boxes2 = tf.concat([tf.minimum(boxes2[..., :2], boxes2[..., 2:]),
-                        tf.maximum(boxes2[..., :2], boxes2[..., 2:])], axis=-1)
-
-    boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1])
-    boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1])
-
-    left_up = tf.maximum(boxes1[..., :2], boxes2[..., :2])
-    right_down = tf.minimum(boxes1[..., 2:], boxes2[..., 2:])
-
-    inter_section = tf.maximum(right_down - left_up, 0.0)
-    inter_area = inter_section[..., 0] * inter_section[..., 1]
-    union_area = boxes1_area + boxes2_area - inter_area
-    iou = inter_area / union_area
-
-    enclose_left_up = tf.minimum(boxes1[..., :2], boxes2[..., :2])
-    enclose_right_down = tf.maximum(boxes1[..., 2:], boxes2[..., 2:])
-    enclose = tf.maximum(enclose_right_down - enclose_left_up, 0.0)
-    enclose_area = enclose[..., 0] * enclose[..., 1]
-    giou = iou - 1.0 * (enclose_area - union_area) / enclose_area
-
-    return giou
-
-
-def compute_loss(pred, conv, label, bboxes, i=0):
-
-    conv_shape  = tf.shape(conv)
-    batch_size  = conv_shape[0]
-    output_size = conv_shape[1]
-    input_size  = STRIDES[i] * output_size
-    conv = tf.reshape(conv, (batch_size, output_size, output_size, 3, 5 + NUM_CLASS))
-
-    conv_raw_conf = conv[:, :, :, :, 4:5]
-    conv_raw_prob = conv[:, :, :, :, 5:]
-
-    pred_xywh     = pred[:, :, :, :, 0:4]
-    pred_conf     = pred[:, :, :, :, 4:5]
-
-    label_xywh    = label[:, :, :, :, 0:4]
-    respond_bbox  = label[:, :, :, :, 4:5]
-    label_prob    = label[:, :, :, :, 5:]
-
-    giou = tf.expand_dims(bbox_giou(pred_xywh, label_xywh), axis=-1)
-    input_size = tf.cast(input_size, tf.float32)
-
-    bbox_loss_scale = 2.0 - 1.0 * label_xywh[:, :, :, :, 2:3] * label_xywh[:, :, :, :, 3:4] / (input_size ** 2)
-    giou_loss = respond_bbox * bbox_loss_scale * (1- giou)
-
-    iou = bbox_iou(pred_xywh[:, :, :, :, np.newaxis, :], bboxes[:, np.newaxis, np.newaxis, np.newaxis, :, :])
-    max_iou = tf.expand_dims(tf.reduce_max(iou, axis=-1), axis=-1)
-
-    respond_bgd = (1.0 - respond_bbox) * tf.cast( max_iou < IOU_LOSS_THRESH, tf.float32 )
-
-    conf_focal = tf.pow(respond_bbox - pred_conf, 2)
-
-    conf_loss = conf_focal * (
-            respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits(labels=respond_bbox, logits=conv_raw_conf)
-            +
-            respond_bgd * tf.nn.sigmoid_cross_entropy_with_logits(labels=respond_bbox, logits=conv_raw_conf)
-    )
-
-    prob_loss = respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits(labels=label_prob, logits=conv_raw_prob)
-
-    giou_loss = tf.reduce_mean(tf.reduce_sum(giou_loss, axis=[1,2,3,4]))
-    conf_loss = tf.reduce_mean(tf.reduce_sum(conf_loss, axis=[1,2,3,4]))
-    prob_loss = tf.reduce_mean(tf.reduce_sum(prob_loss, axis=[1,2,3,4]))
-
-    return giou_loss, conf_loss, prob_loss
-
-
-
-
-

From c250f649590fa31b885e82b5cf64bf97a217e097 Mon Sep 17 00:00:00 2001
From: WeikangWang <wwang72@vols.utk.edu>
Date: Sat, 14 Dec 2019 21:16:55 -0500
Subject: [PATCH 4/5] 1.organized the ssd class 2.extracted the vgg16 block 3.
 put the vgg16 into a backbone per the style from YOLO_v3 4. added some other
 module for future use

---
 4-Object_Detection/SSD/core/backbone.py |  50 +++++
 4-Object_Detection/SSD/core/config.py   |  58 ++++++
 4-Object_Detection/SSD/core/dataset.py  | 265 ++++++++++++++++++++++++
 4-Object_Detection/SSD/core/utils.py    | 240 +++++++++++++++++++++
 4-Object_Detection/SSD/ssd.py           | 132 +++---------
 5 files changed, 647 insertions(+), 98 deletions(-)
 create mode 100644 4-Object_Detection/SSD/core/backbone.py
 create mode 100644 4-Object_Detection/SSD/core/config.py
 create mode 100644 4-Object_Detection/SSD/core/dataset.py
 create mode 100644 4-Object_Detection/SSD/core/utils.py

diff --git a/4-Object_Detection/SSD/core/backbone.py b/4-Object_Detection/SSD/core/backbone.py
new file mode 100644
index 00000000..5115df8a
--- /dev/null
+++ b/4-Object_Detection/SSD/core/backbone.py
@@ -0,0 +1,50 @@
+#! /usr/bin/env python
+# coding=utf-8
+#================================================================
+#   Copyright (C) 2019 * Ltd. All rights reserved.
+#
+#   Editor      : VIM
+#   File name   : backbone.py
+#   Author      : YunYang1994
+#   Created date: 2019-07-11 23:37:51
+#   Description :
+#
+#================================================================
+
+import tensorflow as tf
+
+def vgg16(input_data):
+
+#======================================VGG16_start===================================================
+    # conv1
+    conv = tf.keras.layers.Conv2D(64, 3, activation='relu', padding='same')(input_data) #conv1_1
+    conv = tf.keras.layers.Conv2D(64, 3, activation='relu', padding='same')(conv) #conv1_2
+    conv   = tf.keras.layers.MaxPooling2D(2, strides=2, padding='same')(conv) #pool1
+
+    # conv2
+    conv = tf.keras.layers.Conv2D(128, 3, activation='relu', padding='same')(conv)#conv2_1
+    conv = tf.keras.layers.Conv2D(128, 3, activation='relu', padding='same')(conv)#conv2_2
+    conv = tf.keras.layers.MaxPooling2D(2, strides=2, padding='same')(conv)#pool2
+
+    # conv3
+    conv = tf.keras.layers.Conv2D(256, 3, activation='relu', padding='same')(conv)#conv3_1
+    conv = tf.keras.layers.Conv2D(256, 3, activation='relu', padding='same')(conv)#conv3_2
+    conv = tf.keras.layers.Conv2D(256, 3, activation='relu', padding='same')(conv)#conv3_3
+    conv   = tf.keras.layers.MaxPooling2D(2, strides=2, padding='same')(conv)#pool3
+
+    # conv4
+    conv = tf.keras.layers.Conv2D(512, 3, activation='relu', padding='same')(conv)
+    conv = tf.keras.layers.Conv2D(512, 3, activation='relu', padding='same')(conv)
+    conv = tf.keras.layers.Conv2D(512, 3, activation='relu', padding='same')(conv)
+    conv4 = conv
+    conv = tf.keras.layers.MaxPooling2D(2, strides=2, padding='same')(conv)
+
+    # conv5
+    conv = tf.keras.layers.Conv2D(512, 3, activation='relu', padding='same')(conv)
+    conv = tf.keras.layers.Conv2D(512, 3, activation='relu', padding='same')(conv)
+    conv = tf.keras.layers.Conv2D(512, 3, activation='relu', padding='same')(conv)
+    conv = tf.keras.layers.MaxPooling2D(3, strides=1, padding='same')(conv)
+
+    return conv4, conv
+
+
diff --git a/4-Object_Detection/SSD/core/config.py b/4-Object_Detection/SSD/core/config.py
new file mode 100644
index 00000000..f23265ca
--- /dev/null
+++ b/4-Object_Detection/SSD/core/config.py
@@ -0,0 +1,58 @@
+#! /usr/bin/env python
+# coding=utf-8
+#================================================================
+#   Copyright (C) 2019 * Ltd. All rights reserved.
+#
+#   Editor      : VIM
+#   File name   : config.py
+#   Author      : YunYang1994
+#   Created date: 2019-02-28 13:06:54
+#   Description :
+#
+#================================================================
+
+from easydict import EasyDict as edict
+
+
+__C                           = edict()
+# Consumers can get config by: from config import cfg
+
+cfg                           = __C
+
+# YOLO options
+__C.YOLO                      = edict()
+
+# Set the class name
+__C.YOLO.CLASSES              = "./data/classes/coco.names"
+__C.YOLO.ANCHORS              = "./data/anchors/basline_anchors.txt"
+__C.YOLO.STRIDES              = [8, 16, 32]
+__C.YOLO.ANCHOR_PER_SCALE     = 3
+__C.YOLO.IOU_LOSS_THRESH      = 0.5
+
+# Train options
+__C.TRAIN                     = edict()
+
+__C.TRAIN.ANNOT_PATH          = "./data/dataset/yymnist_train.txt"
+__C.TRAIN.BATCH_SIZE          = 4
+# __C.TRAIN.INPUT_SIZE            = [320, 352, 384, 416, 448, 480, 512, 544, 576, 608]
+__C.TRAIN.INPUT_SIZE          = [416]
+__C.TRAIN.DATA_AUG            = True
+__C.TRAIN.LR_INIT             = 1e-3
+__C.TRAIN.LR_END              = 1e-6
+__C.TRAIN.WARMUP_EPOCHS       = 2
+__C.TRAIN.EPOCHS              = 30
+
+
+
+# TEST options
+__C.TEST                      = edict()
+
+__C.TEST.ANNOT_PATH           = "./data/dataset/yymnist_test.txt"
+__C.TEST.BATCH_SIZE           = 2
+__C.TEST.INPUT_SIZE           = 544
+__C.TEST.DATA_AUG             = False
+__C.TEST.DECTECTED_IMAGE_PATH = "./data/detection/"
+__C.TEST.SCORE_THRESHOLD      = 0.3
+__C.TEST.IOU_THRESHOLD        = 0.45
+
+
diff --git a/4-Object_Detection/SSD/core/dataset.py b/4-Object_Detection/SSD/core/dataset.py
new file mode 100644
index 00000000..f2708d8b
--- /dev/null
+++ b/4-Object_Detection/SSD/core/dataset.py
@@ -0,0 +1,265 @@
+#! /usr/bin/env python
+# coding=utf-8
+#================================================================
+#   Copyright (C) 2019 * Ltd. All rights reserved.
+#
+#   Editor      : VIM
+#   File name   : dataset.py
+#   Author      : YunYang1994
+#   Created date: 2019-03-15 18:05:03
+#   Description :
+#
+#================================================================
+
+import os
+import cv2
+import random
+import numpy as np
+import tensorflow as tf
+import core.utils as utils
+from core.config import cfg
+
+
+
+class Dataset(object):
+    """implement Dataset here"""
+    def __init__(self, dataset_type):
+        self.annot_path  = cfg.TRAIN.ANNOT_PATH if dataset_type == 'train' else cfg.TEST.ANNOT_PATH
+        self.input_sizes = cfg.TRAIN.INPUT_SIZE if dataset_type == 'train' else cfg.TEST.INPUT_SIZE
+        self.batch_size  = cfg.TRAIN.BATCH_SIZE if dataset_type == 'train' else cfg.TEST.BATCH_SIZE
+        self.data_aug    = cfg.TRAIN.DATA_AUG   if dataset_type == 'train' else cfg.TEST.DATA_AUG
+
+        self.train_input_sizes = cfg.TRAIN.INPUT_SIZE
+        self.strides = np.array(cfg.YOLO.STRIDES)
+        self.classes = utils.read_class_names(cfg.YOLO.CLASSES)
+        self.num_classes = len(self.classes)
+        self.anchors = np.array(utils.get_anchors(cfg.YOLO.ANCHORS))
+        self.anchor_per_scale = cfg.YOLO.ANCHOR_PER_SCALE
+        self.max_bbox_per_scale = 150
+
+        self.annotations = self.load_annotations(dataset_type)
+        self.num_samples = len(self.annotations)
+        self.num_batchs = int(np.ceil(self.num_samples / self.batch_size))
+        self.batch_count = 0
+
+
+    def load_annotations(self, dataset_type):
+        with open(self.annot_path, 'r') as f:
+            txt = f.readlines()
+            annotations = [line.strip() for line in txt if len(line.strip().split()[1:]) != 0]
+        np.random.shuffle(annotations)
+        return annotations
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+
+        with tf.device('/cpu:0'):
+            self.train_input_size = random.choice(self.train_input_sizes)
+            self.train_output_sizes = self.train_input_size // self.strides
+
+            batch_image = np.zeros((self.batch_size, self.train_input_size, self.train_input_size, 3), dtype=np.float32)
+
+            batch_label_sbbox = np.zeros((self.batch_size, self.train_output_sizes[0], self.train_output_sizes[0],
+                                          self.anchor_per_scale, 5 + self.num_classes), dtype=np.float32)
+            batch_label_mbbox = np.zeros((self.batch_size, self.train_output_sizes[1], self.train_output_sizes[1],
+                                          self.anchor_per_scale, 5 + self.num_classes), dtype=np.float32)
+            batch_label_lbbox = np.zeros((self.batch_size, self.train_output_sizes[2], self.train_output_sizes[2],
+                                          self.anchor_per_scale, 5 + self.num_classes), dtype=np.float32)
+
+            batch_sbboxes = np.zeros((self.batch_size, self.max_bbox_per_scale, 4), dtype=np.float32)
+            batch_mbboxes = np.zeros((self.batch_size, self.max_bbox_per_scale, 4), dtype=np.float32)
+            batch_lbboxes = np.zeros((self.batch_size, self.max_bbox_per_scale, 4), dtype=np.float32)
+
+            num = 0
+            if self.batch_count < self.num_batchs:
+                while num < self.batch_size:
+                    index = self.batch_count * self.batch_size + num
+                    if index >= self.num_samples: index -= self.num_samples
+                    annotation = self.annotations[index]
+                    image, bboxes = self.parse_annotation(annotation)
+                    label_sbbox, label_mbbox, label_lbbox, sbboxes, mbboxes, lbboxes = self.preprocess_true_boxes(bboxes)
+
+                    batch_image[num, :, :, :] = image
+                    batch_label_sbbox[num, :, :, :, :] = label_sbbox
+                    batch_label_mbbox[num, :, :, :, :] = label_mbbox
+                    batch_label_lbbox[num, :, :, :, :] = label_lbbox
+                    batch_sbboxes[num, :, :] = sbboxes
+                    batch_mbboxes[num, :, :] = mbboxes
+                    batch_lbboxes[num, :, :] = lbboxes
+                    num += 1
+                self.batch_count += 1
+                batch_smaller_target = batch_label_sbbox, batch_sbboxes
+                batch_medium_target  = batch_label_mbbox, batch_mbboxes
+                batch_larger_target  = batch_label_lbbox, batch_lbboxes
+
+                return batch_image, (batch_smaller_target, batch_medium_target, batch_larger_target)
+            else:
+                self.batch_count = 0
+                np.random.shuffle(self.annotations)
+                raise StopIteration
+
+    def random_horizontal_flip(self, image, bboxes):
+
+        if random.random() < 0.5:
+            _, w, _ = image.shape
+            image = image[:, ::-1, :]
+            bboxes[:, [0,2]] = w - bboxes[:, [2,0]]
+
+        return image, bboxes
+
+    def random_crop(self, image, bboxes):
+
+        if random.random() < 0.5:
+            h, w, _ = image.shape
+            max_bbox = np.concatenate([np.min(bboxes[:, 0:2], axis=0), np.max(bboxes[:, 2:4], axis=0)], axis=-1)
+
+            max_l_trans = max_bbox[0]
+            max_u_trans = max_bbox[1]
+            max_r_trans = w - max_bbox[2]
+            max_d_trans = h - max_bbox[3]
+
+            crop_xmin = max(0, int(max_bbox[0] - random.uniform(0, max_l_trans)))
+            crop_ymin = max(0, int(max_bbox[1] - random.uniform(0, max_u_trans)))
+            crop_xmax = max(w, int(max_bbox[2] + random.uniform(0, max_r_trans)))
+            crop_ymax = max(h, int(max_bbox[3] + random.uniform(0, max_d_trans)))
+
+            image = image[crop_ymin : crop_ymax, crop_xmin : crop_xmax]
+
+            bboxes[:, [0, 2]] = bboxes[:, [0, 2]] - crop_xmin
+            bboxes[:, [1, 3]] = bboxes[:, [1, 3]] - crop_ymin
+
+        return image, bboxes
+
+    def random_translate(self, image, bboxes):
+
+        if random.random() < 0.5:
+            h, w, _ = image.shape
+            max_bbox = np.concatenate([np.min(bboxes[:, 0:2], axis=0), np.max(bboxes[:, 2:4], axis=0)], axis=-1)
+
+            max_l_trans = max_bbox[0]
+            max_u_trans = max_bbox[1]
+            max_r_trans = w - max_bbox[2]
+            max_d_trans = h - max_bbox[3]
+
+            tx = random.uniform(-(max_l_trans - 1), (max_r_trans - 1))
+            ty = random.uniform(-(max_u_trans - 1), (max_d_trans - 1))
+
+            M = np.array([[1, 0, tx], [0, 1, ty]])
+            image = cv2.warpAffine(image, M, (w, h))
+
+            bboxes[:, [0, 2]] = bboxes[:, [0, 2]] + tx
+            bboxes[:, [1, 3]] = bboxes[:, [1, 3]] + ty
+
+        return image, bboxes
+
+    def parse_annotation(self, annotation):
+
+        line = annotation.split()
+        image_path = line[0]
+        if not os.path.exists(image_path):
+            raise KeyError("%s does not exist ... " %image_path)
+        image = cv2.imread(image_path)
+        bboxes = np.array([list(map(int, box.split(','))) for box in line[1:]])
+
+        if self.data_aug:
+            image, bboxes = self.random_horizontal_flip(np.copy(image), np.copy(bboxes))
+            image, bboxes = self.random_crop(np.copy(image), np.copy(bboxes))
+            image, bboxes = self.random_translate(np.copy(image), np.copy(bboxes))
+
+        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        image, bboxes = utils.image_preporcess(np.copy(image), [self.train_input_size, self.train_input_size], np.copy(bboxes))
+        return image, bboxes
+
+    def bbox_iou(self, boxes1, boxes2):
+
+        boxes1 = np.array(boxes1)
+        boxes2 = np.array(boxes2)
+
+        boxes1_area = boxes1[..., 2] * boxes1[..., 3]
+        boxes2_area = boxes2[..., 2] * boxes2[..., 3]
+
+        boxes1 = np.concatenate([boxes1[..., :2] - boxes1[..., 2:] * 0.5,
+                                boxes1[..., :2] + boxes1[..., 2:] * 0.5], axis=-1)
+        boxes2 = np.concatenate([boxes2[..., :2] - boxes2[..., 2:] * 0.5,
+                                boxes2[..., :2] + boxes2[..., 2:] * 0.5], axis=-1)
+
+        left_up = np.maximum(boxes1[..., :2], boxes2[..., :2])
+        right_down = np.minimum(boxes1[..., 2:], boxes2[..., 2:])
+
+        inter_section = np.maximum(right_down - left_up, 0.0)
+        inter_area = inter_section[..., 0] * inter_section[..., 1]
+        union_area = boxes1_area + boxes2_area - inter_area
+
+        return inter_area / union_area
+
+    def preprocess_true_boxes(self, bboxes):
+
+        label = [np.zeros((self.train_output_sizes[i], self.train_output_sizes[i], self.anchor_per_scale,
+                           5 + self.num_classes)) for i in range(3)]
+        bboxes_xywh = [np.zeros((self.max_bbox_per_scale, 4)) for _ in range(3)]
+        bbox_count = np.zeros((3,))
+
+        for bbox in bboxes:
+            bbox_coor = bbox[:4]
+            bbox_class_ind = bbox[4]
+
+            onehot = np.zeros(self.num_classes, dtype=np.float)
+            onehot[bbox_class_ind] = 1.0
+            uniform_distribution = np.full(self.num_classes, 1.0 / self.num_classes)
+            deta = 0.01
+            smooth_onehot = onehot * (1 - deta) + deta * uniform_distribution
+
+            bbox_xywh = np.concatenate([(bbox_coor[2:] + bbox_coor[:2]) * 0.5, bbox_coor[2:] - bbox_coor[:2]], axis=-1)
+            bbox_xywh_scaled = 1.0 * bbox_xywh[np.newaxis, :] / self.strides[:, np.newaxis]
+
+            iou = []
+            exist_positive = False
+            for i in range(3):
+                anchors_xywh = np.zeros((self.anchor_per_scale, 4))
+                anchors_xywh[:, 0:2] = np.floor(bbox_xywh_scaled[i, 0:2]).astype(np.int32) + 0.5
+                anchors_xywh[:, 2:4] = self.anchors[i]
+
+                iou_scale = self.bbox_iou(bbox_xywh_scaled[i][np.newaxis, :], anchors_xywh)
+                iou.append(iou_scale)
+                iou_mask = iou_scale > 0.3
+
+                if np.any(iou_mask):
+                    xind, yind = np.floor(bbox_xywh_scaled[i, 0:2]).astype(np.int32)
+
+                    label[i][yind, xind, iou_mask, :] = 0
+                    label[i][yind, xind, iou_mask, 0:4] = bbox_xywh
+                    label[i][yind, xind, iou_mask, 4:5] = 1.0
+                    label[i][yind, xind, iou_mask, 5:] = smooth_onehot
+
+                    bbox_ind = int(bbox_count[i] % self.max_bbox_per_scale)
+                    bboxes_xywh[i][bbox_ind, :4] = bbox_xywh
+                    bbox_count[i] += 1
+
+                    exist_positive = True
+
+            if not exist_positive:
+                best_anchor_ind = np.argmax(np.array(iou).reshape(-1), axis=-1)
+                best_detect = int(best_anchor_ind / self.anchor_per_scale)
+                best_anchor = int(best_anchor_ind % self.anchor_per_scale)
+                xind, yind = np.floor(bbox_xywh_scaled[best_detect, 0:2]).astype(np.int32)
+
+                label[best_detect][yind, xind, best_anchor, :] = 0
+                label[best_detect][yind, xind, best_anchor, 0:4] = bbox_xywh
+                label[best_detect][yind, xind, best_anchor, 4:5] = 1.0
+                label[best_detect][yind, xind, best_anchor, 5:] = smooth_onehot
+
+                bbox_ind = int(bbox_count[best_detect] % self.max_bbox_per_scale)
+                bboxes_xywh[best_detect][bbox_ind, :4] = bbox_xywh
+                bbox_count[best_detect] += 1
+        label_sbbox, label_mbbox, label_lbbox = label
+        sbboxes, mbboxes, lbboxes = bboxes_xywh
+        return label_sbbox, label_mbbox, label_lbbox, sbboxes, mbboxes, lbboxes
+
+    def __len__(self):
+        return self.num_batchs
+
+
+
+
diff --git a/4-Object_Detection/SSD/core/utils.py b/4-Object_Detection/SSD/core/utils.py
new file mode 100644
index 00000000..4e4b6762
--- /dev/null
+++ b/4-Object_Detection/SSD/core/utils.py
@@ -0,0 +1,240 @@
+#! /usr/bin/env python
+# coding=utf-8
+#================================================================
+#   Copyright (C) 2019 * Ltd. All rights reserved.
+#
+#   Editor      : VIM
+#   File name   : utils.py
+#   Author      : YunYang1994
+#   Created date: 2019-07-12 01:33:38
+#   Description :
+#
+#================================================================
+
+import cv2
+import random
+import colorsys
+import numpy as np
+from core.config import cfg
+
+def load_weights(model, weights_file):
+    """
+    I agree that this code is very ugly, but I don’t know any better way of doing it.
+    """
+    wf = open(weights_file, 'rb')
+    major, minor, revision, seen, _ = np.fromfile(wf, dtype=np.int32, count=5)
+
+    j = 0
+    for i in range(75):
+        conv_layer_name = 'conv2d_%d' %i if i > 0 else 'conv2d'
+        bn_layer_name = 'batch_normalization_%d' %j if j > 0 else 'batch_normalization'
+
+        conv_layer = model.get_layer(conv_layer_name)
+        filters = conv_layer.filters
+        k_size = conv_layer.kernel_size[0]
+        in_dim = conv_layer.input_shape[-1]
+
+        if i not in [58, 66, 74]:
+            # darknet weights: [beta, gamma, mean, variance]
+            bn_weights = np.fromfile(wf, dtype=np.float32, count=4 * filters)
+            # tf weights: [gamma, beta, mean, variance]
+            bn_weights = bn_weights.reshape((4, filters))[[1, 0, 2, 3]]
+            bn_layer = model.get_layer(bn_layer_name)
+            j += 1
+        else:
+            conv_bias = np.fromfile(wf, dtype=np.float32, count=filters)
+
+        # darknet shape (out_dim, in_dim, height, width)
+        conv_shape = (filters, in_dim, k_size, k_size)
+        conv_weights = np.fromfile(wf, dtype=np.float32, count=np.product(conv_shape))
+        # tf shape (height, width, in_dim, out_dim)
+        conv_weights = conv_weights.reshape(conv_shape).transpose([2, 3, 1, 0])
+
+        if i not in [58, 66, 74]:
+            conv_layer.set_weights([conv_weights])
+            bn_layer.set_weights(bn_weights)
+        else:
+            conv_layer.set_weights([conv_weights, conv_bias])
+
+    assert len(wf.read()) == 0, 'failed to read all data'
+    wf.close()
+
+
+def read_class_names(class_file_name):
+    '''loads class name from a file'''
+    names = {}
+    with open(class_file_name, 'r') as data:
+        for ID, name in enumerate(data):
+            names[ID] = name.strip('\n')
+    return names
+
+
+def get_anchors(anchors_path):
+    '''loads the anchors from a file'''
+    with open(anchors_path) as f:
+        anchors = f.readline()
+    anchors = np.array(anchors.split(','), dtype=np.float32)
+    return anchors.reshape(3, 3, 2)
+
+
+def image_preporcess(image, target_size, gt_boxes=None):
+
+    ih, iw    = target_size
+    h,  w, _  = image.shape
+
+    scale = min(iw/w, ih/h)
+    nw, nh  = int(scale * w), int(scale * h)
+    image_resized = cv2.resize(image, (nw, nh))
+
+    image_paded = np.full(shape=[ih, iw, 3], fill_value=128.0)
+    dw, dh = (iw - nw) // 2, (ih-nh) // 2
+    image_paded[dh:nh+dh, dw:nw+dw, :] = image_resized
+    image_paded = image_paded / 255.
+
+    if gt_boxes is None:
+        return image_paded
+
+    else:
+        gt_boxes[:, [0, 2]] = gt_boxes[:, [0, 2]] * scale + dw
+        gt_boxes[:, [1, 3]] = gt_boxes[:, [1, 3]] * scale + dh
+        return image_paded, gt_boxes
+
+
+def draw_bbox(image, bboxes, classes=read_class_names(cfg.YOLO.CLASSES), show_label=True):
+    """
+    bboxes: [x_min, y_min, x_max, y_max, probability, cls_id] format coordinates.
+    """
+
+    num_classes = len(classes)
+    image_h, image_w, _ = image.shape
+    hsv_tuples = [(1.0 * x / num_classes, 1., 1.) for x in range(num_classes)]
+    colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
+    colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors))
+
+    random.seed(0)
+    random.shuffle(colors)
+    random.seed(None)
+
+    for i, bbox in enumerate(bboxes):
+        coor = np.array(bbox[:4], dtype=np.int32)
+        fontScale = 0.5
+        score = bbox[4]
+        class_ind = int(bbox[5])
+        bbox_color = colors[class_ind]
+        bbox_thick = int(0.6 * (image_h + image_w) / 600)
+        c1, c2 = (coor[0], coor[1]), (coor[2], coor[3])
+        cv2.rectangle(image, c1, c2, bbox_color, bbox_thick)
+
+        if show_label:
+            bbox_mess = '%s: %.2f' % (classes[class_ind], score)
+            t_size = cv2.getTextSize(bbox_mess, 0, fontScale, thickness=bbox_thick//2)[0]
+            cv2.rectangle(image, c1, (c1[0] + t_size[0], c1[1] - t_size[1] - 3), bbox_color, -1)  # filled
+
+            cv2.putText(image, bbox_mess, (c1[0], c1[1]-2), cv2.FONT_HERSHEY_SIMPLEX,
+                        fontScale, (0, 0, 0), bbox_thick//2, lineType=cv2.LINE_AA)
+
+    return image
+
+
+
+def bboxes_iou(boxes1, boxes2):
+
+    boxes1 = np.array(boxes1)
+    boxes2 = np.array(boxes2)
+
+    boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1])
+    boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1])
+
+    left_up       = np.maximum(boxes1[..., :2], boxes2[..., :2])
+    right_down    = np.minimum(boxes1[..., 2:], boxes2[..., 2:])
+
+    inter_section = np.maximum(right_down - left_up, 0.0)
+    inter_area    = inter_section[..., 0] * inter_section[..., 1]
+    union_area    = boxes1_area + boxes2_area - inter_area
+    ious          = np.maximum(1.0 * inter_area / union_area, np.finfo(np.float32).eps)
+
+    return ious
+
+
+def nms(bboxes, iou_threshold, sigma=0.3, method='nms'):
+    """
+    :param bboxes: (xmin, ymin, xmax, ymax, score, class)
+
+    Note: soft-nms, https://arxiv.org/pdf/1704.04503.pdf
+          https://github.com/bharatsingh430/soft-nms
+    """
+    classes_in_img = list(set(bboxes[:, 5]))
+    best_bboxes = []
+
+    for cls in classes_in_img:
+        cls_mask = (bboxes[:, 5] == cls)
+        cls_bboxes = bboxes[cls_mask]
+
+        while len(cls_bboxes) > 0:
+            max_ind = np.argmax(cls_bboxes[:, 4])
+            best_bbox = cls_bboxes[max_ind]
+            best_bboxes.append(best_bbox)
+            cls_bboxes = np.concatenate([cls_bboxes[: max_ind], cls_bboxes[max_ind + 1:]])
+            iou = bboxes_iou(best_bbox[np.newaxis, :4], cls_bboxes[:, :4])
+            weight = np.ones((len(iou),), dtype=np.float32)
+
+            assert method in ['nms', 'soft-nms']
+
+            if method == 'nms':
+                iou_mask = iou > iou_threshold
+                weight[iou_mask] = 0.0
+
+            if method == 'soft-nms':
+                weight = np.exp(-(1.0 * iou ** 2 / sigma))
+
+            cls_bboxes[:, 4] = cls_bboxes[:, 4] * weight
+            score_mask = cls_bboxes[:, 4] > 0.
+            cls_bboxes = cls_bboxes[score_mask]
+
+    return best_bboxes
+
+
+def postprocess_boxes(pred_bbox, org_img_shape, input_size, score_threshold):
+
+    valid_scale=[0, np.inf]
+    pred_bbox = np.array(pred_bbox)
+
+    pred_xywh = pred_bbox[:, 0:4]
+    pred_conf = pred_bbox[:, 4]
+    pred_prob = pred_bbox[:, 5:]
+
+    # # (1) (x, y, w, h) --> (xmin, ymin, xmax, ymax)
+    pred_coor = np.concatenate([pred_xywh[:, :2] - pred_xywh[:, 2:] * 0.5,
+                                pred_xywh[:, :2] + pred_xywh[:, 2:] * 0.5], axis=-1)
+    # # (2) (xmin, ymin, xmax, ymax) -> (xmin_org, ymin_org, xmax_org, ymax_org)
+    org_h, org_w = org_img_shape
+    resize_ratio = min(input_size / org_w, input_size / org_h)
+
+    dw = (input_size - resize_ratio * org_w) / 2
+    dh = (input_size - resize_ratio * org_h) / 2
+
+    pred_coor[:, 0::2] = 1.0 * (pred_coor[:, 0::2] - dw) / resize_ratio
+    pred_coor[:, 1::2] = 1.0 * (pred_coor[:, 1::2] - dh) / resize_ratio
+
+    # # (3) clip some boxes those are out of range
+    pred_coor = np.concatenate([np.maximum(pred_coor[:, :2], [0, 0]),
+                                np.minimum(pred_coor[:, 2:], [org_w - 1, org_h - 1])], axis=-1)
+    invalid_mask = np.logical_or((pred_coor[:, 0] > pred_coor[:, 2]), (pred_coor[:, 1] > pred_coor[:, 3]))
+    pred_coor[invalid_mask] = 0
+
+    # # (4) discard some invalid boxes
+    bboxes_scale = np.sqrt(np.multiply.reduce(pred_coor[:, 2:4] - pred_coor[:, 0:2], axis=-1))
+    scale_mask = np.logical_and((valid_scale[0] < bboxes_scale), (bboxes_scale < valid_scale[1]))
+
+    # # (5) discard some boxes with low scores
+    classes = np.argmax(pred_prob, axis=-1)
+    scores = pred_conf * pred_prob[np.arange(len(pred_coor)), classes]
+    score_mask = scores > score_threshold
+    mask = np.logical_and(scale_mask, score_mask)
+    coors, scores, classes = pred_coor[mask], scores[mask], classes[mask]
+
+    return np.concatenate([coors, scores[:, np.newaxis], classes[:, np.newaxis]], axis=-1)
+
+
+
+
diff --git a/4-Object_Detection/SSD/ssd.py b/4-Object_Detection/SSD/ssd.py
index 48b8ec0b..1cdc08a6 100644
--- a/4-Object_Detection/SSD/ssd.py
+++ b/4-Object_Detection/SSD/ssd.py
@@ -12,113 +12,49 @@
 #================================================================
 
 import tensorflow as tf
-
+from core.backbone import vgg16
 
 class SSD(tf.keras.Model):
-    def __init__(self, num_class=21):
+    def __init__(self, input_data, num_class=21):
         super(SSD, self).__init__()
         # conv1
-        self.conv1_1 = tf.keras.layers.Conv2D(64, 3, activation='relu', padding='same')
-        self.conv1_2 = tf.keras.layers.Conv2D(64, 3, activation='relu', padding='same')
-        self.pool1   = tf.keras.layers.MaxPooling2D(2, strides=2, padding='same')
-
-        # conv2
-        self.conv2_1 = tf.keras.layers.Conv2D(128, 3, activation='relu', padding='same')
-        self.conv2_2 = tf.keras.layers.Conv2D(128, 3, activation='relu', padding='same')
-        self.pool2   = tf.keras.layers.MaxPooling2D(2, strides=2, padding='same')
-
-        # conv3
-        self.conv3_1 = tf.keras.layers.Conv2D(256, 3, activation='relu', padding='same')
-        self.conv3_2 = tf.keras.layers.Conv2D(256, 3, activation='relu', padding='same')
-        self.conv3_3 = tf.keras.layers.Conv2D(256, 3, activation='relu', padding='same')
-        self.pool3   = tf.keras.layers.MaxPooling2D(2, strides=2, padding='same')
-
-        # conv4
-        self.conv4_1 = tf.keras.layers.Conv2D(512, 3, activation='relu', padding='same')
-        self.conv4_2 = tf.keras.layers.Conv2D(512, 3, activation='relu', padding='same')
-        self.conv4_3 = tf.keras.layers.Conv2D(512, 3, activation='relu', padding='same')
-        self.pool4   = tf.keras.layers.MaxPooling2D(2, strides=2, padding='same')
-
-        # conv5
-        self.conv5_1 = tf.keras.layers.Conv2D(512, 3, activation='relu', padding='same')
-        self.conv5_2 = tf.keras.layers.Conv2D(512, 3, activation='relu', padding='same')
-        self.conv5_3 = tf.keras.layers.Conv2D(512, 3, activation='relu', padding='same')
-        self.pool5   = tf.keras.layers.MaxPooling2D(3, strides=1, padding='same')
-
-        # fc6, => vgg backbone is finished. now they are all SSD blocks
-        self.fc6 = tf.keras.layers.Conv2D(1024, 3, dilation_rate=6, activation='relu', padding='same')
+        conv4, conv = vgg16(input_data)
+        self.conv4 = conv4
+        # fc6, from now they are all SSD blocks
+        conv = tf.keras.layers.Conv2D(1024, 3, dilation_rate=6, activation='relu', padding='same')(conv)#fc6
         # fc7
-        self.fc7 = tf.keras.layers.Conv2D(1024, 1, activation='relu', padding='same')
-        # Block 8/9/10/11: 1x1 and 3x3 convolutions strides 2 (except lasts)
+        conv = tf.keras.layers.Conv2D(1024, 1, activation='relu', padding='same')(conv)#fc7
+        self.conv7 = conv
+        # Block 8/9/10/11: 1x1 and 3x3 convolutions strides 2 (except the last 2 layers)
+
         # conv8
-        self.conv8_1 = tf.keras.layers.Conv2D(256, 1, activation='relu', padding='same')
-        self.conv8_2 = tf.keras.layers.Conv2D(512, 3, strides=2, activation='relu', padding='same')
+        conv = tf.keras.layers.Conv2D(256, 1, activation='relu', padding='same')(conv)
+        conv = tf.keras.layers.Conv2D(512, 3, strides=2, activation='relu', padding='same')(conv)
+        self.conv8 = conv
         # conv9
-        self.conv9_1 = tf.keras.layers.Conv2D(128, 1, activation='relu', padding='same')
-        self.conv9_2 = tf.keras.layers.Conv2D(256, 3, strides=2, activation='relu', padding='same')
+        conv = tf.keras.layers.Conv2D(128, 1, activation='relu', padding='same')(conv)
+        conv = tf.keras.layers.Conv2D(256, 3, strides=2, activation='relu', padding='same')(conv)
+        self.conv9 = conv
         # conv10
-        self.conv10_1 = tf.keras.layers.Conv2D(128, 1, activation='relu', padding='same')
-        self.conv10_2 = tf.keras.layers.Conv2D(256, 3, activation='relu', padding='valid')
+        conv = tf.keras.layers.Conv2D(128, 1, activation='relu', padding='same')(conv)
+        conv = tf.keras.layers.Conv2D(256, 3, activation='relu', padding='valid')(conv)
+        self.conv10 = conv
         # conv11
-        self.conv11_1 = tf.keras.layers.Conv2D(128, 1, activation='relu', padding='same')
-        self.conv11_2 = tf.keras.layers.Conv2D(256, 3, activation='relu', padding='valid')
-
-
-
-    def call(self, x, training=False):
-        h = self.conv1_1(x)
-        h = self.conv1_2(h)
-        h = self.pool1(h)
-
-        h = self.conv2_1(h)
-        h = self.conv2_2(h)
-        h = self.pool2(h)
-
-        h = self.conv3_1(h)
-        h = self.conv3_2(h)
-        h = self.conv3_3(h)
-        h = self.pool3(h)
-
-        h = self.conv4_1(h)
-        h = self.conv4_2(h)
-        h = self.conv4_3(h)
-        conv4 = h
-        print(h.shape)
-        h = self.pool4(h)
-
-        h = self.conv5_1(h)
-        h = self.conv5_2(h)
-        h = self.conv5_3(h)
-        h = self.pool5(h)
-
-        h = self.fc6(h)     # [1,19,19,1024]
-        h = self.fc7(h)     # [1,19,19,1024]
-        conv7 = h
-        print(h.shape)
-
-        h = self.conv8_1(h)
-        h = self.conv8_2(h) # [1,10,10, 512]
-        conv8 = h
-        print(h.shape)
-
-        h = self.conv9_1(h)
-        h = self.conv9_2(h) # [1, 5, 5, 256]
-        conv9 = h
-        print(h.shape)
-
-        h = self.conv10_1(h)
-        h = self.conv10_2(h) # [1, 3, 3, 256]
-        conv10 = h
-        print(h.shape)
-
-        h = self.conv11_1(h)
-        h = self.conv11_2(h) # [1, 1, 1, 256]
-        conv11 = h
-        print(h.shape)
-        return h
-
-conv4, conv7, conv8, conv9, conv10, conv11 = SSD(21)
-x = conv11(tf.ones(shape=[1,300,300,3]))
+        conv = tf.keras.layers.Conv2D(128, 1, activation='relu', padding='same')(conv)
+        conv = tf.keras.layers.Conv2D(256, 3, activation='relu', padding='valid')(conv)
+        self.conv11 = conv
+
+    def display(self):
+        print(self.conv4.shape)
+        print(self.conv7.shape)
+        print(self.conv8.shape)
+        print(self.conv9.shape)
+        print(self.conv10.shape)
+        print(self.conv11.shape)
+        return self.conv4, self.conv7, self.conv8, self.conv9, self.conv10, self.conv11
+
+model = SSD(tf.ones(shape=[1,300,300,3]),21)
+model.display()
 
 
 

From 19f9e694e4ba93fe1d1adca241d4d7991a277aee Mon Sep 17 00:00:00 2001
From: WeikangWang <wwang72@vols.utk.edu>
Date: Sat, 14 Dec 2019 23:14:15 -0500
Subject: [PATCH 5/5] 1. adjusted ssd model per the structure in the SSD paper
 (Fig. 2) 2. move the common file from YOLO_V3 to the core folder

---
 4-Object_Detection/SSD/core/common.py | 58 +++++++++++++++++++++++++++
 4-Object_Detection/SSD/ssd.py         | 13 +++---
 2 files changed, 64 insertions(+), 7 deletions(-)
 create mode 100644 4-Object_Detection/SSD/core/common.py

diff --git a/4-Object_Detection/SSD/core/common.py b/4-Object_Detection/SSD/core/common.py
new file mode 100644
index 00000000..88cc8884
--- /dev/null
+++ b/4-Object_Detection/SSD/core/common.py
@@ -0,0 +1,58 @@
+#! /usr/bin/env python
+# coding=utf-8
+#================================================================
+#   Copyright (C) 2019 * Ltd. All rights reserved.
+#
+#   Editor      : VIM
+#   File name   : common.py
+#   Author      : YunYang1994
+#   Created date: 2019-07-11 23:12:53
+#   Description :
+#
+#================================================================
+
+import tensorflow as tf
+
+class BatchNormalization(tf.keras.layers.BatchNormalization):
+    """
+    "Frozen state" and "inference mode" are two separate concepts.
+    `layer.trainable = False` is to freeze the layer, so the layer will use
+    stored moving `var` and `mean` in the "inference mode", and both `gama`
+    and `beta` will not be updated !
+    """
+    def call(self, x, training=False):
+        if not training:
+            training = tf.constant(False)
+        training = tf.logical_and(training, self.trainable)
+        return super().call(x, training)
+
+def convolutional(input_layer, filters_shape, downsample=False, activate=True, bn=True):
+    if downsample:
+        input_layer = tf.keras.layers.ZeroPadding2D(((1, 0), (1, 0)))(input_layer)
+        padding = 'valid'
+        strides = 2
+    else:
+        strides = 1
+        padding = 'same'
+
+    conv = tf.keras.layers.Conv2D(filters=filters_shape[-1], kernel_size = filters_shape[0], strides=strides, padding=padding,
+                                  use_bias=not bn, kernel_regularizer=tf.keras.regularizers.l2(0.0005),
+                                  kernel_initializer=tf.random_normal_initializer(stddev=0.01),
+                                  bias_initializer=tf.constant_initializer(0.))(input_layer)
+
+    if bn: conv = BatchNormalization()(conv)
+    if activate == True: conv = tf.nn.leaky_relu(conv, alpha=0.1)
+
+    return conv
+
+def residual_block(input_layer, input_channel, filter_num1, filter_num2):
+    short_cut = input_layer
+    conv = convolutional(input_layer, filters_shape=(1, 1, input_channel, filter_num1))
+    conv = convolutional(conv       , filters_shape=(3, 3, filter_num1,   filter_num2))
+
+    residual_output = short_cut + conv
+    return residual_output
+
+def upsample(input_layer):
+    return tf.image.resize(input_layer, (input_layer.shape[1] * 2, input_layer.shape[2] * 2), method='nearest')
+
diff --git a/4-Object_Detection/SSD/ssd.py b/4-Object_Detection/SSD/ssd.py
index 1cdc08a6..f049b17c 100644
--- a/4-Object_Detection/SSD/ssd.py
+++ b/4-Object_Detection/SSD/ssd.py
@@ -19,30 +19,29 @@ def __init__(self, input_data, num_class=21):
         super(SSD, self).__init__()
         # conv1
         conv4, conv = vgg16(input_data)
-        self.conv4 = conv4
+        self.conv4 = tf.keras.layers.Conv2D(4*(num_class + 5),3, padding='same')(conv4)
         # fc6, from now they are all SSD blocks
         conv = tf.keras.layers.Conv2D(1024, 3, dilation_rate=6, activation='relu', padding='same')(conv)#fc6
         # fc7
         conv = tf.keras.layers.Conv2D(1024, 1, activation='relu', padding='same')(conv)#fc7
-        self.conv7 = conv
+        self.conv7 = tf.keras.layers.Conv2D(6*(num_class + 5), 3, padding='same')(conv)
         # Block 8/9/10/11: 1x1 and 3x3 convolutions strides 2 (except the last 2 layers)
-
         # conv8
         conv = tf.keras.layers.Conv2D(256, 1, activation='relu', padding='same')(conv)
         conv = tf.keras.layers.Conv2D(512, 3, strides=2, activation='relu', padding='same')(conv)
-        self.conv8 = conv
+        self.conv8 = tf.keras.layers.Conv2D(6*(num_class + 5),3, padding='same')(conv)
         # conv9
         conv = tf.keras.layers.Conv2D(128, 1, activation='relu', padding='same')(conv)
         conv = tf.keras.layers.Conv2D(256, 3, strides=2, activation='relu', padding='same')(conv)
-        self.conv9 = conv
+        self.conv9 = tf.keras.layers.Conv2D(6*(num_class + 5),3, padding='same')(conv)
         # conv10
         conv = tf.keras.layers.Conv2D(128, 1, activation='relu', padding='same')(conv)
         conv = tf.keras.layers.Conv2D(256, 3, activation='relu', padding='valid')(conv)
-        self.conv10 = conv
+        self.conv10 = tf.keras.layers.Conv2D(4*(num_class + 5),3, padding='same')(conv)
         # conv11
         conv = tf.keras.layers.Conv2D(128, 1, activation='relu', padding='same')(conv)
         conv = tf.keras.layers.Conv2D(256, 3, activation='relu', padding='valid')(conv)
-        self.conv11 = conv
+        self.conv11 = tf.keras.layers.Conv2D(4*(num_class + 5),3, padding='same')(conv)
 
     def display(self):
         print(self.conv4.shape)