Skip to content

Instantly share code, notes, and snippets.

@rowhanm
Created October 7, 2022 15:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rowhanm/6380ccd79937c4f38c1d6e9fc255a95f to your computer and use it in GitHub Desktop.
Save rowhanm/6380ccd79937c4f38c1d6e9fc255a95f to your computer and use it in GitHub Desktop.
timm coatnet definition as per https://arxiv.org/pdf/2106.04803v2.pdf
MaxxVit(
(stem): Stem(
(conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(norm1): BatchNormAct2d(
64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
(drop): Identity()
(act): GELU()
)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
)
(stages): Sequential(
(0): MaxxVitStage(
(blocks): Sequential(
(0): MbConvBlock(
(shortcut): Downsample2d(
(pool): AvgPool2d(kernel_size=2, stride=2, padding=0)
(expand): Conv2d(64, 96, kernel_size=(1, 1), stride=(1, 1))
)
(pre_norm): BatchNormAct2d(
64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
(drop): Identity()
(act): Identity()
)
(down): Identity()
(conv1_1x1): Conv2d(64, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm1): BatchNormAct2d(
384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
(drop): Identity()
(act): GELU()
)
(conv2_kxk): Conv2d(384, 384, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=384, bias=False)
(norm2): BatchNormAct2d(
384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
(drop): Identity()
(act): GELU()
)
(se): SEModule(
(fc1): Conv2d(384, 24, kernel_size=(1, 1), stride=(1, 1))
(bn): Identity()
(act): SiLU(inplace=True)
(fc2): Conv2d(24, 384, kernel_size=(1, 1), stride=(1, 1))
(gate): Sigmoid()
)
(conv3_1x1): Conv2d(384, 96, kernel_size=(1, 1), stride=(1, 1))
(drop_path): Identity()
)
(1): MbConvBlock(
(shortcut): Identity()
(pre_norm): BatchNormAct2d(
96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
(drop): Identity()
(act): Identity()
)
(down): Identity()
(conv1_1x1): Conv2d(96, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm1): BatchNormAct2d(
384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
(drop): Identity()
(act): GELU()
)
(conv2_kxk): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=384, bias=False)
(norm2): BatchNormAct2d(
384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
(drop): Identity()
(act): GELU()
)
(se): SEModule(
(fc1): Conv2d(384, 24, kernel_size=(1, 1), stride=(1, 1))
(bn): Identity()
(act): SiLU(inplace=True)
(fc2): Conv2d(24, 384, kernel_size=(1, 1), stride=(1, 1))
(gate): Sigmoid()
)
(conv3_1x1): Conv2d(384, 96, kernel_size=(1, 1), stride=(1, 1))
(drop_path): Identity()
)
)
)
(1): MaxxVitStage(
(blocks): Sequential(
(0): MbConvBlock(
(shortcut): Downsample2d(
(pool): AvgPool2d(kernel_size=2, stride=2, padding=0)
(expand): Conv2d(96, 192, kernel_size=(1, 1), stride=(1, 1))
)
(pre_norm): BatchNormAct2d(
96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
(drop): Identity()
(act): Identity()
)
(down): Identity()
(conv1_1x1): Conv2d(96, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm1): BatchNormAct2d(
768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
(drop): Identity()
(act): GELU()
)
(conv2_kxk): Conv2d(768, 768, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=768, bias=False)
(norm2): BatchNormAct2d(
768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
(drop): Identity()
(act): GELU()
)
(se): SEModule(
(fc1): Conv2d(768, 48, kernel_size=(1, 1), stride=(1, 1))
(bn): Identity()
(act): SiLU(inplace=True)
(fc2): Conv2d(48, 768, kernel_size=(1, 1), stride=(1, 1))
(gate): Sigmoid()
)
(conv3_1x1): Conv2d(768, 192, kernel_size=(1, 1), stride=(1, 1))
(drop_path): Identity()
)
(1): MbConvBlock(
(shortcut): Identity()
(pre_norm): BatchNormAct2d(
192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
(drop): Identity()
(act): Identity()
)
(down): Identity()
(conv1_1x1): Conv2d(192, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm1): BatchNormAct2d(
768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
(drop): Identity()
(act): GELU()
)
(conv2_kxk): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False)
(norm2): BatchNormAct2d(
768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
(drop): Identity()
(act): GELU()
)
(se): SEModule(
(fc1): Conv2d(768, 48, kernel_size=(1, 1), stride=(1, 1))
(bn): Identity()
(act): SiLU(inplace=True)
(fc2): Conv2d(48, 768, kernel_size=(1, 1), stride=(1, 1))
(gate): Sigmoid()
)
(conv3_1x1): Conv2d(768, 192, kernel_size=(1, 1), stride=(1, 1))
(drop_path): Identity()
)
(2): MbConvBlock(
(shortcut): Identity()
(pre_norm): BatchNormAct2d(
192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
(drop): Identity()
(act): Identity()
)
(down): Identity()
(conv1_1x1): Conv2d(192, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm1): BatchNormAct2d(
768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
(drop): Identity()
(act): GELU()
)
(conv2_kxk): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False)
(norm2): BatchNormAct2d(
768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
(drop): Identity()
(act): GELU()
)
(se): SEModule(
(fc1): Conv2d(768, 48, kernel_size=(1, 1), stride=(1, 1))
(bn): Identity()
(act): SiLU(inplace=True)
(fc2): Conv2d(48, 768, kernel_size=(1, 1), stride=(1, 1))
(gate): Sigmoid()
)
(conv3_1x1): Conv2d(768, 192, kernel_size=(1, 1), stride=(1, 1))
(drop_path): Identity()
)
(3): MbConvBlock(
(shortcut): Identity()
(pre_norm): BatchNormAct2d(
192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
(drop): Identity()
(act): Identity()
)
(down): Identity()
(conv1_1x1): Conv2d(192, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm1): BatchNormAct2d(
768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
(drop): Identity()
(act): GELU()
)
(conv2_kxk): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False)
(norm2): BatchNormAct2d(
768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
(drop): Identity()
(act): GELU()
)
(se): SEModule(
(fc1): Conv2d(768, 48, kernel_size=(1, 1), stride=(1, 1))
(bn): Identity()
(act): SiLU(inplace=True)
(fc2): Conv2d(48, 768, kernel_size=(1, 1), stride=(1, 1))
(gate): Sigmoid()
)
(conv3_1x1): Conv2d(768, 192, kernel_size=(1, 1), stride=(1, 1))
(drop_path): Identity()
)
(4): MbConvBlock(
(shortcut): Identity()
(pre_norm): BatchNormAct2d(
192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
(drop): Identity()
(act): Identity()
)
(down): Identity()
(conv1_1x1): Conv2d(192, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm1): BatchNormAct2d(
768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
(drop): Identity()
(act): GELU()
)
(conv2_kxk): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False)
(norm2): BatchNormAct2d(
768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
(drop): Identity()
(act): GELU()
)
(se): SEModule(
(fc1): Conv2d(768, 48, kernel_size=(1, 1), stride=(1, 1))
(bn): Identity()
(act): SiLU(inplace=True)
(fc2): Conv2d(48, 768, kernel_size=(1, 1), stride=(1, 1))
(gate): Sigmoid()
)
(conv3_1x1): Conv2d(768, 192, kernel_size=(1, 1), stride=(1, 1))
(drop_path): Identity()
)
(5): MbConvBlock(
(shortcut): Identity()
(pre_norm): BatchNormAct2d(
192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
(drop): Identity()
(act): Identity()
)
(down): Identity()
(conv1_1x1): Conv2d(192, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
(norm1): BatchNormAct2d(
768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
(drop): Identity()
(act): GELU()
)
(conv2_kxk): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False)
(norm2): BatchNormAct2d(
768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
(drop): Identity()
(act): GELU()
)
(se): SEModule(
(fc1): Conv2d(768, 48, kernel_size=(1, 1), stride=(1, 1))
(bn): Identity()
(act): SiLU(inplace=True)
(fc2): Conv2d(48, 768, kernel_size=(1, 1), stride=(1, 1))
(gate): Sigmoid()
)
(conv3_1x1): Conv2d(768, 192, kernel_size=(1, 1), stride=(1, 1))
(drop_path): Identity()
)
)
)
(2): MaxxVitStage(
(blocks): Sequential(
(0): TransformerBlock2d(
(shortcut): Downsample2d(
(pool): AvgPool2d(kernel_size=2, stride=2, padding=0)
(expand): Conv2d(192, 384, kernel_size=(1, 1), stride=(1, 1))
)
(norm1): Sequential(
(norm): LayerNorm2d((192,), eps=1e-06, elementwise_affine=True)
(down): Downsample2d(
(pool): AvgPool2d(kernel_size=2, stride=2, padding=0)
(expand): Identity()
)
)
(attn): Attention2d(
(qkv): Conv2d(192, 1152, kernel_size=(1, 1), stride=(1, 1))
(rel_pos): RelPosBias()
(attn_drop): Dropout(p=0.0, inplace=False)
(proj): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1))
(proj_drop): Dropout(p=0.0, inplace=False)
)
(ls1): Identity()
(drop_path1): Identity()
(norm2): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
(mlp): ConvMlp(
(fc1): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1))
(norm): Identity()
(act): GELU()
(drop): Dropout(p=0.0, inplace=False)
(fc2): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1))
)
(ls2): Identity()
(drop_path2): Identity()
)
(1): TransformerBlock2d(
(shortcut): Identity()
(norm1): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
(attn): Attention2d(
(qkv): Conv2d(384, 1152, kernel_size=(1, 1), stride=(1, 1))
(rel_pos): RelPosBias()
(attn_drop): Dropout(p=0.0, inplace=False)
(proj): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1))
(proj_drop): Dropout(p=0.0, inplace=False)
)
(ls1): Identity()
(drop_path1): Identity()
(norm2): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
(mlp): ConvMlp(
(fc1): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1))
(norm): Identity()
(act): GELU()
(drop): Dropout(p=0.0, inplace=False)
(fc2): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1))
)
(ls2): Identity()
(drop_path2): Identity()
)
(2): TransformerBlock2d(
(shortcut): Identity()
(norm1): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
(attn): Attention2d(
(qkv): Conv2d(384, 1152, kernel_size=(1, 1), stride=(1, 1))
(rel_pos): RelPosBias()
(attn_drop): Dropout(p=0.0, inplace=False)
(proj): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1))
(proj_drop): Dropout(p=0.0, inplace=False)
)
(ls1): Identity()
(drop_path1): Identity()
(norm2): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
(mlp): ConvMlp(
(fc1): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1))
(norm): Identity()
(act): GELU()
(drop): Dropout(p=0.0, inplace=False)
(fc2): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1))
)
(ls2): Identity()
(drop_path2): Identity()
)
(3): TransformerBlock2d(
(shortcut): Identity()
(norm1): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
(attn): Attention2d(
(qkv): Conv2d(384, 1152, kernel_size=(1, 1), stride=(1, 1))
(rel_pos): RelPosBias()
(attn_drop): Dropout(p=0.0, inplace=False)
(proj): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1))
(proj_drop): Dropout(p=0.0, inplace=False)
)
(ls1): Identity()
(drop_path1): Identity()
(norm2): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
(mlp): ConvMlp(
(fc1): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1))
(norm): Identity()
(act): GELU()
(drop): Dropout(p=0.0, inplace=False)
(fc2): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1))
)
(ls2): Identity()
(drop_path2): Identity()
)
(4): TransformerBlock2d(
(shortcut): Identity()
(norm1): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
(attn): Attention2d(
(qkv): Conv2d(384, 1152, kernel_size=(1, 1), stride=(1, 1))
(rel_pos): RelPosBias()
(attn_drop): Dropout(p=0.0, inplace=False)
(proj): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1))
(proj_drop): Dropout(p=0.0, inplace=False)
)
(ls1): Identity()
(drop_path1): Identity()
(norm2): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
(mlp): ConvMlp(
(fc1): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1))
(norm): Identity()
(act): GELU()
(drop): Dropout(p=0.0, inplace=False)
(fc2): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1))
)
(ls2): Identity()
(drop_path2): Identity()
)
(5): TransformerBlock2d(
(shortcut): Identity()
(norm1): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
(attn): Attention2d(
(qkv): Conv2d(384, 1152, kernel_size=(1, 1), stride=(1, 1))
(rel_pos): RelPosBias()
(attn_drop): Dropout(p=0.0, inplace=False)
(proj): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1))
(proj_drop): Dropout(p=0.0, inplace=False)
)
(ls1): Identity()
(drop_path1): Identity()
(norm2): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
(mlp): ConvMlp(
(fc1): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1))
(norm): Identity()
(act): GELU()
(drop): Dropout(p=0.0, inplace=False)
(fc2): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1))
)
(ls2): Identity()
(drop_path2): Identity()
)
(6): TransformerBlock2d(
(shortcut): Identity()
(norm1): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
(attn): Attention2d(
(qkv): Conv2d(384, 1152, kernel_size=(1, 1), stride=(1, 1))
(rel_pos): RelPosBias()
(attn_drop): Dropout(p=0.0, inplace=False)
(proj): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1))
(proj_drop): Dropout(p=0.0, inplace=False)
)
(ls1): Identity()
(drop_path1): Identity()
(norm2): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
(mlp): ConvMlp(
(fc1): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1))
(norm): Identity()
(act): GELU()
(drop): Dropout(p=0.0, inplace=False)
(fc2): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1))
)
(ls2): Identity()
(drop_path2): Identity()
)
(7): TransformerBlock2d(
(shortcut): Identity()
(norm1): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
(attn): Attention2d(
(qkv): Conv2d(384, 1152, kernel_size=(1, 1), stride=(1, 1))
(rel_pos): RelPosBias()
(attn_drop): Dropout(p=0.0, inplace=False)
(proj): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1))
(proj_drop): Dropout(p=0.0, inplace=False)
)
(ls1): Identity()
(drop_path1): Identity()
(norm2): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
(mlp): ConvMlp(
(fc1): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1))
(norm): Identity()
(act): GELU()
(drop): Dropout(p=0.0, inplace=False)
(fc2): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1))
)
(ls2): Identity()
(drop_path2): Identity()
)
(8): TransformerBlock2d(
(shortcut): Identity()
(norm1): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
(attn): Attention2d(
(qkv): Conv2d(384, 1152, kernel_size=(1, 1), stride=(1, 1))
(rel_pos): RelPosBias()
(attn_drop): Dropout(p=0.0, inplace=False)
(proj): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1))
(proj_drop): Dropout(p=0.0, inplace=False)
)
(ls1): Identity()
(drop_path1): Identity()
(norm2): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
(mlp): ConvMlp(
(fc1): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1))
(norm): Identity()
(act): GELU()
(drop): Dropout(p=0.0, inplace=False)
(fc2): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1))
)
(ls2): Identity()
(drop_path2): Identity()
)
(9): TransformerBlock2d(
(shortcut): Identity()
(norm1): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
(attn): Attention2d(
(qkv): Conv2d(384, 1152, kernel_size=(1, 1), stride=(1, 1))
(rel_pos): RelPosBias()
(attn_drop): Dropout(p=0.0, inplace=False)
(proj): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1))
(proj_drop): Dropout(p=0.0, inplace=False)
)
(ls1): Identity()
(drop_path1): Identity()
(norm2): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
(mlp): ConvMlp(
(fc1): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1))
(norm): Identity()
(act): GELU()
(drop): Dropout(p=0.0, inplace=False)
(fc2): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1))
)
(ls2): Identity()
(drop_path2): Identity()
)
(10): TransformerBlock2d(
(shortcut): Identity()
(norm1): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
(attn): Attention2d(
(qkv): Conv2d(384, 1152, kernel_size=(1, 1), stride=(1, 1))
(rel_pos): RelPosBias()
(attn_drop): Dropout(p=0.0, inplace=False)
(proj): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1))
(proj_drop): Dropout(p=0.0, inplace=False)
)
(ls1): Identity()
(drop_path1): Identity()
(norm2): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
(mlp): ConvMlp(
(fc1): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1))
(norm): Identity()
(act): GELU()
(drop): Dropout(p=0.0, inplace=False)
(fc2): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1))
)
(ls2): Identity()
(drop_path2): Identity()
)
(11): TransformerBlock2d(
(shortcut): Identity()
(norm1): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
(attn): Attention2d(
(qkv): Conv2d(384, 1152, kernel_size=(1, 1), stride=(1, 1))
(rel_pos): RelPosBias()
(attn_drop): Dropout(p=0.0, inplace=False)
(proj): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1))
(proj_drop): Dropout(p=0.0, inplace=False)
)
(ls1): Identity()
(drop_path1): Identity()
(norm2): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
(mlp): ConvMlp(
(fc1): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1))
(norm): Identity()
(act): GELU()
(drop): Dropout(p=0.0, inplace=False)
(fc2): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1))
)
(ls2): Identity()
(drop_path2): Identity()
)
(12): TransformerBlock2d(
(shortcut): Identity()
(norm1): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
(attn): Attention2d(
(qkv): Conv2d(384, 1152, kernel_size=(1, 1), stride=(1, 1))
(rel_pos): RelPosBias()
(attn_drop): Dropout(p=0.0, inplace=False)
(proj): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1))
(proj_drop): Dropout(p=0.0, inplace=False)
)
(ls1): Identity()
(drop_path1): Identity()
(norm2): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
(mlp): ConvMlp(
(fc1): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1))
(norm): Identity()
(act): GELU()
(drop): Dropout(p=0.0, inplace=False)
(fc2): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1))
)
(ls2): Identity()
(drop_path2): Identity()
)
(13): TransformerBlock2d(
(shortcut): Identity()
(norm1): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
(attn): Attention2d(
(qkv): Conv2d(384, 1152, kernel_size=(1, 1), stride=(1, 1))
(rel_pos): RelPosBias()
(attn_drop): Dropout(p=0.0, inplace=False)
(proj): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1))
(proj_drop): Dropout(p=0.0, inplace=False)
)
(ls1): Identity()
(drop_path1): Identity()
(norm2): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
(mlp): ConvMlp(
(fc1): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1))
(norm): Identity()
(act): GELU()
(drop): Dropout(p=0.0, inplace=False)
(fc2): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1))
)
(ls2): Identity()
(drop_path2): Identity()
)
)
)
(3): MaxxVitStage(
(blocks): Sequential(
(0): TransformerBlock2d(
(shortcut): Downsample2d(
(pool): AvgPool2d(kernel_size=2, stride=2, padding=0)
(expand): Conv2d(384, 768, kernel_size=(1, 1), stride=(1, 1))
)
(norm1): Sequential(
(norm): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
(down): Downsample2d(
(pool): AvgPool2d(kernel_size=2, stride=2, padding=0)
(expand): Identity()
)
)
(attn): Attention2d(
(qkv): Conv2d(384, 2304, kernel_size=(1, 1), stride=(1, 1))
(rel_pos): RelPosBias()
(attn_drop): Dropout(p=0.0, inplace=False)
(proj): Conv2d(768, 768, kernel_size=(1, 1), stride=(1, 1))
(proj_drop): Dropout(p=0.0, inplace=False)
)
(ls1): Identity()
(drop_path1): Identity()
(norm2): LayerNorm2d((768,), eps=1e-06, elementwise_affine=True)
(mlp): ConvMlp(
(fc1): Conv2d(768, 3072, kernel_size=(1, 1), stride=(1, 1))
(norm): Identity()
(act): GELU()
(drop): Dropout(p=0.0, inplace=False)
(fc2): Conv2d(3072, 768, kernel_size=(1, 1), stride=(1, 1))
)
(ls2): Identity()
(drop_path2): Identity()
)
(1): TransformerBlock2d(
(shortcut): Identity()
(norm1): LayerNorm2d((768,), eps=1e-06, elementwise_affine=True)
(attn): Attention2d(
(qkv): Conv2d(768, 2304, kernel_size=(1, 1), stride=(1, 1))
(rel_pos): RelPosBias()
(attn_drop): Dropout(p=0.0, inplace=False)
(proj): Conv2d(768, 768, kernel_size=(1, 1), stride=(1, 1))
(proj_drop): Dropout(p=0.0, inplace=False)
)
(ls1): Identity()
(drop_path1): Identity()
(norm2): LayerNorm2d((768,), eps=1e-06, elementwise_affine=True)
(mlp): ConvMlp(
(fc1): Conv2d(768, 3072, kernel_size=(1, 1), stride=(1, 1))
(norm): Identity()
(act): GELU()
(drop): Dropout(p=0.0, inplace=False)
(fc2): Conv2d(3072, 768, kernel_size=(1, 1), stride=(1, 1))
)
(ls2): Identity()
(drop_path2): Identity()
)
)
)
)
(norm): LayerNorm2d((768,), eps=1e-06, elementwise_affine=True)
(head): ClassifierHead(
(global_pool): SelectAdaptivePool2d (pool_type=avg, flatten=Flatten(start_dim=1, end_dim=-1))
(fc): Linear(in_features=768, out_features=1000, bias=True)
(flatten): Identity()
)
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment