Darknet53 整体架构如下图所示:
def _conv2d(channel, kernel, padding, stride, norm_layer=BatchNorm, norm_kwargs=None):
"""A common conv-bn-leakyrelu cell"""
cell = nn.HybridSequential(prefix='')
cell.add(nn.Conv2D(channel, kernel_size=kernel,
strides=stride, padding=padding, use_bias=False))
cell.add(norm_layer(epsilon=1e-5, momentum=0.9, **({} if norm_kwargs is None else norm_kwargs)))
cell.add(nn.LeakyReLU(0.1))
return cell
- 就是返回 Conv2D + BatchNorm + LeakyReLU 这样一个 Cell
# default configurations
darknet_versions = {'v3': DarknetV3}
darknet_spec = {
'v3': {53: ([1, 2, 8, 8, 4], [32, 64, 128, 256, 512, 1024]),}
}
darknet_version 是 v3
specs = darknet_spec[darknet_version]
- specs 是
{53: ([1, 2, 8, 8, 4], [32, 64, 128, 256, 512, 1024]),}
这样一个 dict
layers, channels = specs[num_layers]
- num_layers 是 53, 因此 layers 是 [1, 2, 8, 8, 4], channels 是 [32, 64, 128, 256, 512, 1024]
- 这个 layers 用于指定每层 Layer 有几个 DarknetBasicBlockV3 搭建而成, 而 channels 则用于指定每层 Layer 相应的 DarknetBasicBlockV3 的 channel 是多少
darknet_class = darknet_versions[darknet_version]
因为 darknet_versions = {'v3': DarknetV3}
, 因此 darknet_class 是 DarknetV3
net = darknet_class(layers, channels, **kwargs)
- 其实就是在 call 如下代码
DarknetV3(layers=[1, 2, 8, 8, 4], channels=[32, 64, 128, 256, 512, 1024])
def __init__(self, channel, norm_layer=BatchNorm, norm_kwargs=None, **kwargs):
super(DarknetBasicBlockV3, self).__init__(**kwargs)
self.body = nn.HybridSequential(prefix='')
# 1x1 reduce
self.body.add(_conv2d(channel, 1, 0, 1, norm_layer=norm_layer, norm_kwargs=norm_kwargs))
# 3x3 conv expand
self.body.add(_conv2d(channel * 2, 3, 1, 1, norm_layer=norm_layer, norm_kwargs=norm_kwargs))
- 两个
_conv2d
cell, 每个 cell 都是 Conv2D + BatchNorm + LeakyReLU 这样的 - 第一个
_conv2d
cell 是 kernel = 1, padding = 0, stride = 1, 看来只是用来做 channel 维数的变换的 - 第二个
_conv2d
cell 的 channel 数是第一个 cell 的两倍, kernel = 3, padding = 1, stride = 1 的 Cell
def hybrid_forward(self, F, x, *args):
residual = x
x = self.body(x)
return x + residual
- 看出这也是一个 residual block, 因此 DarknetBasicBlockV3 就是一个带两层 Conv + BN + LeakyReLU 的 Residual Block, 最后输出的 channel 数会是初始化函数指定的 channel 数的两倍
with self.name_scope():
self.features = nn.HybridSequential()
# first 3x3 conv
self.features.add(_conv2d(channels[0], 3, 1, 1,
norm_layer=norm_layer, norm_kwargs=norm_kwargs))
for nlayer, channel in zip(layers, channels[1:]):
assert channel % 2 == 0, "channel {} cannot be divided by 2".format(channel)
# add downsample conv with stride=2
self.features.add(_conv2d(channel, 3, 1, 2,
norm_layer=norm_layer, norm_kwargs=norm_kwargs))
# add nlayer basic blocks
for _ in range(nlayer):
self.features.add(DarknetBasicBlockV3(channel // 2,
norm_layer=BatchNorm,
norm_kwargs=None))
# output
self.output = nn.Dense(classes)
网络结构已经在最开始的图里画出来了
def hybrid_forward(self, F, x):
x = self.features(x)
x = F.Pooling(x, kernel=(7, 7), global_pool=True, pool_type='avg')
return self.output(x)
这个 kernel=(7, 7) 是假的, 因为一旦 global_pool=True 那么就会忽略 kernel size