rowhanm/coatnet_def.txt

## coatnet_def.txt
MaxxVit(
  (stem): Stem(
    (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (norm1): BatchNormAct2d(
      64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
      (drop): Identity()
      (act): GELU()
    )
    (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  )
  (stages): Sequential(
    (0): MaxxVitStage(
      (blocks): Sequential(
        (0): MbConvBlock(
          (shortcut): Downsample2d(
            (pool): AvgPool2d(kernel_size=2, stride=2, padding=0)
            (expand): Conv2d(64, 96, kernel_size=(1, 1), stride=(1, 1))
          )
          (pre_norm): BatchNormAct2d(
            64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
            (drop): Identity()
            (act): Identity()
          )
          (down): Identity()
          (conv1_1x1): Conv2d(64, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (norm1): BatchNormAct2d(
            384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
            (drop): Identity()
            (act): GELU()
          )
          (conv2_kxk): Conv2d(384, 384, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=384, bias=False)
          (norm2): BatchNormAct2d(
            384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
            (drop): Identity()
            (act): GELU()
          )
          (se): SEModule(
            (fc1): Conv2d(384, 24, kernel_size=(1, 1), stride=(1, 1))
            (bn): Identity()
            (act): SiLU(inplace=True)
            (fc2): Conv2d(24, 384, kernel_size=(1, 1), stride=(1, 1))
            (gate): Sigmoid()
          )
          (conv3_1x1): Conv2d(384, 96, kernel_size=(1, 1), stride=(1, 1))
          (drop_path): Identity()
        )
        (1): MbConvBlock(
          (shortcut): Identity()
          (pre_norm): BatchNormAct2d(
            96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
            (drop): Identity()
            (act): Identity()
          )
          (down): Identity()
          (conv1_1x1): Conv2d(96, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (norm1): BatchNormAct2d(
            384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
            (drop): Identity()
            (act): GELU()
          )
          (conv2_kxk): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=384, bias=False)
          (norm2): BatchNormAct2d(
            384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
            (drop): Identity()
            (act): GELU()
          )
          (se): SEModule(
            (fc1): Conv2d(384, 24, kernel_size=(1, 1), stride=(1, 1))
            (bn): Identity()
            (act): SiLU(inplace=True)
            (fc2): Conv2d(24, 384, kernel_size=(1, 1), stride=(1, 1))
            (gate): Sigmoid()
          )
          (conv3_1x1): Conv2d(384, 96, kernel_size=(1, 1), stride=(1, 1))
          (drop_path): Identity()
        )
      )
    )
    (1): MaxxVitStage(
      (blocks): Sequential(
        (0): MbConvBlock(
          (shortcut): Downsample2d(
            (pool): AvgPool2d(kernel_size=2, stride=2, padding=0)
            (expand): Conv2d(96, 192, kernel_size=(1, 1), stride=(1, 1))
          )
          (pre_norm): BatchNormAct2d(
            96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
            (drop): Identity()
            (act): Identity()
          )
          (down): Identity()
          (conv1_1x1): Conv2d(96, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (norm1): BatchNormAct2d(
            768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
            (drop): Identity()
            (act): GELU()
          )
          (conv2_kxk): Conv2d(768, 768, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=768, bias=False)
          (norm2): BatchNormAct2d(
            768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
            (drop): Identity()
            (act): GELU()
          )
          (se): SEModule(
            (fc1): Conv2d(768, 48, kernel_size=(1, 1), stride=(1, 1))
            (bn): Identity()
            (act): SiLU(inplace=True)
            (fc2): Conv2d(48, 768, kernel_size=(1, 1), stride=(1, 1))
            (gate): Sigmoid()
          )
          (conv3_1x1): Conv2d(768, 192, kernel_size=(1, 1), stride=(1, 1))
          (drop_path): Identity()
        )
        (1): MbConvBlock(
          (shortcut): Identity()
          (pre_norm): BatchNormAct2d(
            192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
            (drop): Identity()
            (act): Identity()
          )
          (down): Identity()
          (conv1_1x1): Conv2d(192, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (norm1): BatchNormAct2d(
            768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
            (drop): Identity()
            (act): GELU()
          )
          (conv2_kxk): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False)
          (norm2): BatchNormAct2d(
            768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
            (drop): Identity()
            (act): GELU()
          )
          (se): SEModule(
            (fc1): Conv2d(768, 48, kernel_size=(1, 1), stride=(1, 1))
            (bn): Identity()
            (act): SiLU(inplace=True)
            (fc2): Conv2d(48, 768, kernel_size=(1, 1), stride=(1, 1))
            (gate): Sigmoid()
          )
          (conv3_1x1): Conv2d(768, 192, kernel_size=(1, 1), stride=(1, 1))
          (drop_path): Identity()
        )
        (2): MbConvBlock(
          (shortcut): Identity()
          (pre_norm): BatchNormAct2d(
            192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
            (drop): Identity()
            (act): Identity()
          )
          (down): Identity()
          (conv1_1x1): Conv2d(192, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (norm1): BatchNormAct2d(
            768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
            (drop): Identity()
            (act): GELU()
          )
          (conv2_kxk): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False)
          (norm2): BatchNormAct2d(
            768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
            (drop): Identity()
            (act): GELU()
          )
          (se): SEModule(
            (fc1): Conv2d(768, 48, kernel_size=(1, 1), stride=(1, 1))
            (bn): Identity()
            (act): SiLU(inplace=True)
            (fc2): Conv2d(48, 768, kernel_size=(1, 1), stride=(1, 1))
            (gate): Sigmoid()
          )
          (conv3_1x1): Conv2d(768, 192, kernel_size=(1, 1), stride=(1, 1))
          (drop_path): Identity()
        )
        (3): MbConvBlock(
          (shortcut): Identity()
          (pre_norm): BatchNormAct2d(
            192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
            (drop): Identity()
            (act): Identity()
          )
          (down): Identity()
          (conv1_1x1): Conv2d(192, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (norm1): BatchNormAct2d(
            768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
            (drop): Identity()
            (act): GELU()
          )
          (conv2_kxk): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False)
          (norm2): BatchNormAct2d(
            768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
            (drop): Identity()
            (act): GELU()
          )
          (se): SEModule(
            (fc1): Conv2d(768, 48, kernel_size=(1, 1), stride=(1, 1))
            (bn): Identity()
            (act): SiLU(inplace=True)
            (fc2): Conv2d(48, 768, kernel_size=(1, 1), stride=(1, 1))
            (gate): Sigmoid()
          )
          (conv3_1x1): Conv2d(768, 192, kernel_size=(1, 1), stride=(1, 1))
          (drop_path): Identity()
        )
        (4): MbConvBlock(
          (shortcut): Identity()
          (pre_norm): BatchNormAct2d(
            192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
            (drop): Identity()
            (act): Identity()
          )
          (down): Identity()
          (conv1_1x1): Conv2d(192, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (norm1): BatchNormAct2d(
            768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
            (drop): Identity()
            (act): GELU()
          )
          (conv2_kxk): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False)
          (norm2): BatchNormAct2d(
            768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
            (drop): Identity()
            (act): GELU()
          )
          (se): SEModule(
            (fc1): Conv2d(768, 48, kernel_size=(1, 1), stride=(1, 1))
            (bn): Identity()
            (act): SiLU(inplace=True)
            (fc2): Conv2d(48, 768, kernel_size=(1, 1), stride=(1, 1))
            (gate): Sigmoid()
          )
          (conv3_1x1): Conv2d(768, 192, kernel_size=(1, 1), stride=(1, 1))
          (drop_path): Identity()
        )
        (5): MbConvBlock(
          (shortcut): Identity()
          (pre_norm): BatchNormAct2d(
            192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
            (drop): Identity()
            (act): Identity()
          )
          (down): Identity()
          (conv1_1x1): Conv2d(192, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (norm1): BatchNormAct2d(
            768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
            (drop): Identity()
            (act): GELU()
          )
          (conv2_kxk): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False)
          (norm2): BatchNormAct2d(
            768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
            (drop): Identity()
            (act): GELU()
          )
          (se): SEModule(
            (fc1): Conv2d(768, 48, kernel_size=(1, 1), stride=(1, 1))
            (bn): Identity()
            (act): SiLU(inplace=True)
            (fc2): Conv2d(48, 768, kernel_size=(1, 1), stride=(1, 1))
            (gate): Sigmoid()
          )
          (conv3_1x1): Conv2d(768, 192, kernel_size=(1, 1), stride=(1, 1))
          (drop_path): Identity()
        )
      )
    )
    (2): MaxxVitStage(
      (blocks): Sequential(
        (0): TransformerBlock2d(
          (shortcut): Downsample2d(
            (pool): AvgPool2d(kernel_size=2, stride=2, padding=0)
            (expand): Conv2d(192, 384, kernel_size=(1, 1), stride=(1, 1))
          )
          (norm1): Sequential(
            (norm): LayerNorm2d((192,), eps=1e-06, elementwise_affine=True)
            (down): Downsample2d(
              (pool): AvgPool2d(kernel_size=2, stride=2, padding=0)
              (expand): Identity()
            )
          )
          (attn): Attention2d(
            (qkv): Conv2d(192, 1152, kernel_size=(1, 1), stride=(1, 1))
            (rel_pos): RelPosBias()
            (attn_drop): Dropout(p=0.0, inplace=False)
            (proj): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1))
            (proj_drop): Dropout(p=0.0, inplace=False)
          )
          (ls1): Identity()
          (drop_path1): Identity()
          (norm2): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
          (mlp): ConvMlp(
            (fc1): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1))
            (norm): Identity()
            (act): GELU()
            (drop): Dropout(p=0.0, inplace=False)
            (fc2): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1))
          )
          (ls2): Identity()
          (drop_path2): Identity()
        )
        (1): TransformerBlock2d(
          (shortcut): Identity()
          (norm1): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
          (attn): Attention2d(
            (qkv): Conv2d(384, 1152, kernel_size=(1, 1), stride=(1, 1))
            (rel_pos): RelPosBias()
            (attn_drop): Dropout(p=0.0, inplace=False)
            (proj): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1))
            (proj_drop): Dropout(p=0.0, inplace=False)
          )
          (ls1): Identity()
          (drop_path1): Identity()
          (norm2): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
          (mlp): ConvMlp(
            (fc1): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1))
            (norm): Identity()
            (act): GELU()
            (drop): Dropout(p=0.0, inplace=False)
            (fc2): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1))
          )
          (ls2): Identity()
          (drop_path2): Identity()
        )
        (2): TransformerBlock2d(
          (shortcut): Identity()
          (norm1): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
          (attn): Attention2d(
            (qkv): Conv2d(384, 1152, kernel_size=(1, 1), stride=(1, 1))
            (rel_pos): RelPosBias()
            (attn_drop): Dropout(p=0.0, inplace=False)
            (proj): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1))
            (proj_drop): Dropout(p=0.0, inplace=False)
          )
          (ls1): Identity()
          (drop_path1): Identity()
          (norm2): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
          (mlp): ConvMlp(
            (fc1): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1))
            (norm): Identity()
            (act): GELU()
            (drop): Dropout(p=0.0, inplace=False)
            (fc2): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1))
          )
          (ls2): Identity()
          (drop_path2): Identity()
        )
        (3): TransformerBlock2d(
          (shortcut): Identity()
          (norm1): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
          (attn): Attention2d(
            (qkv): Conv2d(384, 1152, kernel_size=(1, 1), stride=(1, 1))
            (rel_pos): RelPosBias()
            (attn_drop): Dropout(p=0.0, inplace=False)
            (proj): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1))
            (proj_drop): Dropout(p=0.0, inplace=False)
          )
          (ls1): Identity()
          (drop_path1): Identity()
          (norm2): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
          (mlp): ConvMlp(
            (fc1): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1))
            (norm): Identity()
            (act): GELU()
            (drop): Dropout(p=0.0, inplace=False)
            (fc2): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1))
          )
          (ls2): Identity()
          (drop_path2): Identity()
        )
        (4): TransformerBlock2d(
          (shortcut): Identity()
          (norm1): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
          (attn): Attention2d(
            (qkv): Conv2d(384, 1152, kernel_size=(1, 1), stride=(1, 1))
            (rel_pos): RelPosBias()
            (attn_drop): Dropout(p=0.0, inplace=False)
            (proj): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1))
            (proj_drop): Dropout(p=0.0, inplace=False)
          )
          (ls1): Identity()
          (drop_path1): Identity()
          (norm2): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
          (mlp): ConvMlp(
            (fc1): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1))
            (norm): Identity()
            (act): GELU()
            (drop): Dropout(p=0.0, inplace=False)
            (fc2): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1))
          )
          (ls2): Identity()
          (drop_path2): Identity()
        )
        (5): TransformerBlock2d(
          (shortcut): Identity()
          (norm1): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
          (attn): Attention2d(
            (qkv): Conv2d(384, 1152, kernel_size=(1, 1), stride=(1, 1))
            (rel_pos): RelPosBias()
            (attn_drop): Dropout(p=0.0, inplace=False)
            (proj): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1))
            (proj_drop): Dropout(p=0.0, inplace=False)
          )
          (ls1): Identity()
          (drop_path1): Identity()
          (norm2): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
          (mlp): ConvMlp(
            (fc1): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1))
            (norm): Identity()
            (act): GELU()
            (drop): Dropout(p=0.0, inplace=False)
            (fc2): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1))
          )
          (ls2): Identity()
          (drop_path2): Identity()
        )
        (6): TransformerBlock2d(
          (shortcut): Identity()
          (norm1): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
          (attn): Attention2d(
            (qkv): Conv2d(384, 1152, kernel_size=(1, 1), stride=(1, 1))
            (rel_pos): RelPosBias()
            (attn_drop): Dropout(p=0.0, inplace=False)
            (proj): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1))
            (proj_drop): Dropout(p=0.0, inplace=False)
          )
          (ls1): Identity()
          (drop_path1): Identity()
          (norm2): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
          (mlp): ConvMlp(
            (fc1): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1))
            (norm): Identity()
            (act): GELU()
            (drop): Dropout(p=0.0, inplace=False)
            (fc2): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1))
          )
          (ls2): Identity()
          (drop_path2): Identity()
        )
        (7): TransformerBlock2d(
          (shortcut): Identity()
          (norm1): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
          (attn): Attention2d(
            (qkv): Conv2d(384, 1152, kernel_size=(1, 1), stride=(1, 1))
            (rel_pos): RelPosBias()
            (attn_drop): Dropout(p=0.0, inplace=False)
            (proj): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1))
            (proj_drop): Dropout(p=0.0, inplace=False)
          )
          (ls1): Identity()
          (drop_path1): Identity()
          (norm2): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
          (mlp): ConvMlp(
            (fc1): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1))
            (norm): Identity()
            (act): GELU()
            (drop): Dropout(p=0.0, inplace=False)
            (fc2): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1))
          )
          (ls2): Identity()
          (drop_path2): Identity()
        )
        (8): TransformerBlock2d(
          (shortcut): Identity()
          (norm1): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
          (attn): Attention2d(
            (qkv): Conv2d(384, 1152, kernel_size=(1, 1), stride=(1, 1))
            (rel_pos): RelPosBias()
            (attn_drop): Dropout(p=0.0, inplace=False)
            (proj): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1))
            (proj_drop): Dropout(p=0.0, inplace=False)
          )
          (ls1): Identity()
          (drop_path1): Identity()
          (norm2): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
          (mlp): ConvMlp(
            (fc1): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1))
            (norm): Identity()
            (act): GELU()
            (drop): Dropout(p=0.0, inplace=False)
            (fc2): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1))
          )
          (ls2): Identity()
          (drop_path2): Identity()
        )
        (9): TransformerBlock2d(
          (shortcut): Identity()
          (norm1): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
          (attn): Attention2d(
            (qkv): Conv2d(384, 1152, kernel_size=(1, 1), stride=(1, 1))
            (rel_pos): RelPosBias()
            (attn_drop): Dropout(p=0.0, inplace=False)
            (proj): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1))
            (proj_drop): Dropout(p=0.0, inplace=False)
          )
          (ls1): Identity()
          (drop_path1): Identity()
          (norm2): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
          (mlp): ConvMlp(
            (fc1): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1))
            (norm): Identity()
            (act): GELU()
            (drop): Dropout(p=0.0, inplace=False)
            (fc2): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1))
          )
          (ls2): Identity()
          (drop_path2): Identity()
        )
        (10): TransformerBlock2d(
          (shortcut): Identity()
          (norm1): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
          (attn): Attention2d(
            (qkv): Conv2d(384, 1152, kernel_size=(1, 1), stride=(1, 1))
            (rel_pos): RelPosBias()
            (attn_drop): Dropout(p=0.0, inplace=False)
            (proj): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1))
            (proj_drop): Dropout(p=0.0, inplace=False)
          )
          (ls1): Identity()
          (drop_path1): Identity()
          (norm2): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
          (mlp): ConvMlp(
            (fc1): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1))
            (norm): Identity()
            (act): GELU()
            (drop): Dropout(p=0.0, inplace=False)
            (fc2): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1))
          )
          (ls2): Identity()
          (drop_path2): Identity()
        )
        (11): TransformerBlock2d(
          (shortcut): Identity()
          (norm1): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
          (attn): Attention2d(
            (qkv): Conv2d(384, 1152, kernel_size=(1, 1), stride=(1, 1))
            (rel_pos): RelPosBias()
            (attn_drop): Dropout(p=0.0, inplace=False)
            (proj): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1))
            (proj_drop): Dropout(p=0.0, inplace=False)
          )
          (ls1): Identity()
          (drop_path1): Identity()
          (norm2): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
          (mlp): ConvMlp(
            (fc1): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1))
            (norm): Identity()
            (act): GELU()
            (drop): Dropout(p=0.0, inplace=False)
            (fc2): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1))
          )
          (ls2): Identity()
          (drop_path2): Identity()
        )
        (12): TransformerBlock2d(
          (shortcut): Identity()
          (norm1): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
          (attn): Attention2d(
            (qkv): Conv2d(384, 1152, kernel_size=(1, 1), stride=(1, 1))
            (rel_pos): RelPosBias()
            (attn_drop): Dropout(p=0.0, inplace=False)
            (proj): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1))
            (proj_drop): Dropout(p=0.0, inplace=False)
          )
          (ls1): Identity()
          (drop_path1): Identity()
          (norm2): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
          (mlp): ConvMlp(
            (fc1): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1))
            (norm): Identity()
            (act): GELU()
            (drop): Dropout(p=0.0, inplace=False)
            (fc2): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1))
          )
          (ls2): Identity()
          (drop_path2): Identity()
        )
        (13): TransformerBlock2d(
          (shortcut): Identity()
          (norm1): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
          (attn): Attention2d(
            (qkv): Conv2d(384, 1152, kernel_size=(1, 1), stride=(1, 1))
            (rel_pos): RelPosBias()
            (attn_drop): Dropout(p=0.0, inplace=False)
            (proj): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1))
            (proj_drop): Dropout(p=0.0, inplace=False)
          )
          (ls1): Identity()
          (drop_path1): Identity()
          (norm2): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
          (mlp): ConvMlp(
            (fc1): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1))
            (norm): Identity()
            (act): GELU()
            (drop): Dropout(p=0.0, inplace=False)
            (fc2): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1))
          )
          (ls2): Identity()
          (drop_path2): Identity()
        )
      )
    )
    (3): MaxxVitStage(
      (blocks): Sequential(
        (0): TransformerBlock2d(
          (shortcut): Downsample2d(
            (pool): AvgPool2d(kernel_size=2, stride=2, padding=0)
            (expand): Conv2d(384, 768, kernel_size=(1, 1), stride=(1, 1))
          )
          (norm1): Sequential(
            (norm): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
            (down): Downsample2d(
              (pool): AvgPool2d(kernel_size=2, stride=2, padding=0)
              (expand): Identity()
            )
          )
          (attn): Attention2d(
            (qkv): Conv2d(384, 2304, kernel_size=(1, 1), stride=(1, 1))
            (rel_pos): RelPosBias()
            (attn_drop): Dropout(p=0.0, inplace=False)
            (proj): Conv2d(768, 768, kernel_size=(1, 1), stride=(1, 1))
            (proj_drop): Dropout(p=0.0, inplace=False)
          )
          (ls1): Identity()
          (drop_path1): Identity()
          (norm2): LayerNorm2d((768,), eps=1e-06, elementwise_affine=True)
          (mlp): ConvMlp(
            (fc1): Conv2d(768, 3072, kernel_size=(1, 1), stride=(1, 1))
            (norm): Identity()
            (act): GELU()
            (drop): Dropout(p=0.0, inplace=False)
            (fc2): Conv2d(3072, 768, kernel_size=(1, 1), stride=(1, 1))
          )
          (ls2): Identity()
          (drop_path2): Identity()
        )
        (1): TransformerBlock2d(
          (shortcut): Identity()
          (norm1): LayerNorm2d((768,), eps=1e-06, elementwise_affine=True)
          (attn): Attention2d(
            (qkv): Conv2d(768, 2304, kernel_size=(1, 1), stride=(1, 1))
            (rel_pos): RelPosBias()
            (attn_drop): Dropout(p=0.0, inplace=False)
            (proj): Conv2d(768, 768, kernel_size=(1, 1), stride=(1, 1))
            (proj_drop): Dropout(p=0.0, inplace=False)
          )
          (ls1): Identity()
          (drop_path1): Identity()
          (norm2): LayerNorm2d((768,), eps=1e-06, elementwise_affine=True)
          (mlp): ConvMlp(
            (fc1): Conv2d(768, 3072, kernel_size=(1, 1), stride=(1, 1))
            (norm): Identity()
            (act): GELU()
            (drop): Dropout(p=0.0, inplace=False)
            (fc2): Conv2d(3072, 768, kernel_size=(1, 1), stride=(1, 1))
          )
          (ls2): Identity()
          (drop_path2): Identity()
        )
      )
    )
  )
  (norm): LayerNorm2d((768,), eps=1e-06, elementwise_affine=True)
  (head): ClassifierHead(
    (global_pool): SelectAdaptivePool2d (pool_type=avg, flatten=Flatten(start_dim=1, end_dim=-1))
    (fc): Linear(in_features=768, out_features=1000, bias=True)
    (flatten): Identity()
  )
)
	MaxxVit(
	(stem): Stem(
	(conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
	(norm1): BatchNormAct2d(
	64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
	(drop): Identity()
	(act): GELU()
	)
	(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
	)
	(stages): Sequential(
	(0): MaxxVitStage(
	(blocks): Sequential(
	(0): MbConvBlock(
	(shortcut): Downsample2d(
	(pool): AvgPool2d(kernel_size=2, stride=2, padding=0)
	(expand): Conv2d(64, 96, kernel_size=(1, 1), stride=(1, 1))
	)
	(pre_norm): BatchNormAct2d(
	64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
	(drop): Identity()
	(act): Identity()
	)
	(down): Identity()
	(conv1_1x1): Conv2d(64, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
	(norm1): BatchNormAct2d(
	384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
	(drop): Identity()
	(act): GELU()
	)
	(conv2_kxk): Conv2d(384, 384, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=384, bias=False)
	(norm2): BatchNormAct2d(
	384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
	(drop): Identity()
	(act): GELU()
	)
	(se): SEModule(
	(fc1): Conv2d(384, 24, kernel_size=(1, 1), stride=(1, 1))
	(bn): Identity()
	(act): SiLU(inplace=True)
	(fc2): Conv2d(24, 384, kernel_size=(1, 1), stride=(1, 1))
	(gate): Sigmoid()
	)
	(conv3_1x1): Conv2d(384, 96, kernel_size=(1, 1), stride=(1, 1))
	(drop_path): Identity()
	)
	(1): MbConvBlock(
	(shortcut): Identity()
	(pre_norm): BatchNormAct2d(
	96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
	(drop): Identity()
	(act): Identity()
	)
	(down): Identity()
	(conv1_1x1): Conv2d(96, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
	(norm1): BatchNormAct2d(
	384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
	(drop): Identity()
	(act): GELU()
	)
	(conv2_kxk): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=384, bias=False)
	(norm2): BatchNormAct2d(
	384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
	(drop): Identity()
	(act): GELU()
	)
	(se): SEModule(
	(fc1): Conv2d(384, 24, kernel_size=(1, 1), stride=(1, 1))
	(bn): Identity()
	(act): SiLU(inplace=True)
	(fc2): Conv2d(24, 384, kernel_size=(1, 1), stride=(1, 1))
	(gate): Sigmoid()
	)
	(conv3_1x1): Conv2d(384, 96, kernel_size=(1, 1), stride=(1, 1))
	(drop_path): Identity()
	)
	)
	)
	(1): MaxxVitStage(
	(blocks): Sequential(
	(0): MbConvBlock(
	(shortcut): Downsample2d(
	(pool): AvgPool2d(kernel_size=2, stride=2, padding=0)
	(expand): Conv2d(96, 192, kernel_size=(1, 1), stride=(1, 1))
	)
	(pre_norm): BatchNormAct2d(
	96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
	(drop): Identity()
	(act): Identity()
	)
	(down): Identity()
	(conv1_1x1): Conv2d(96, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
	(norm1): BatchNormAct2d(
	768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
	(drop): Identity()
	(act): GELU()
	)
	(conv2_kxk): Conv2d(768, 768, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=768, bias=False)
	(norm2): BatchNormAct2d(
	768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
	(drop): Identity()
	(act): GELU()
	)
	(se): SEModule(
	(fc1): Conv2d(768, 48, kernel_size=(1, 1), stride=(1, 1))
	(bn): Identity()
	(act): SiLU(inplace=True)
	(fc2): Conv2d(48, 768, kernel_size=(1, 1), stride=(1, 1))
	(gate): Sigmoid()
	)
	(conv3_1x1): Conv2d(768, 192, kernel_size=(1, 1), stride=(1, 1))
	(drop_path): Identity()
	)
	(1): MbConvBlock(
	(shortcut): Identity()
	(pre_norm): BatchNormAct2d(
	192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
	(drop): Identity()
	(act): Identity()
	)
	(down): Identity()
	(conv1_1x1): Conv2d(192, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
	(norm1): BatchNormAct2d(
	768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
	(drop): Identity()
	(act): GELU()
	)
	(conv2_kxk): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False)
	(norm2): BatchNormAct2d(
	768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
	(drop): Identity()
	(act): GELU()
	)
	(se): SEModule(
	(fc1): Conv2d(768, 48, kernel_size=(1, 1), stride=(1, 1))
	(bn): Identity()
	(act): SiLU(inplace=True)
	(fc2): Conv2d(48, 768, kernel_size=(1, 1), stride=(1, 1))
	(gate): Sigmoid()
	)
	(conv3_1x1): Conv2d(768, 192, kernel_size=(1, 1), stride=(1, 1))
	(drop_path): Identity()
	)
	(2): MbConvBlock(
	(shortcut): Identity()
	(pre_norm): BatchNormAct2d(
	192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
	(drop): Identity()
	(act): Identity()
	)
	(down): Identity()
	(conv1_1x1): Conv2d(192, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
	(norm1): BatchNormAct2d(
	768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
	(drop): Identity()
	(act): GELU()
	)
	(conv2_kxk): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False)
	(norm2): BatchNormAct2d(
	768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
	(drop): Identity()
	(act): GELU()
	)
	(se): SEModule(
	(fc1): Conv2d(768, 48, kernel_size=(1, 1), stride=(1, 1))
	(bn): Identity()
	(act): SiLU(inplace=True)
	(fc2): Conv2d(48, 768, kernel_size=(1, 1), stride=(1, 1))
	(gate): Sigmoid()
	)
	(conv3_1x1): Conv2d(768, 192, kernel_size=(1, 1), stride=(1, 1))
	(drop_path): Identity()
	)
	(3): MbConvBlock(
	(shortcut): Identity()
	(pre_norm): BatchNormAct2d(
	192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
	(drop): Identity()
	(act): Identity()
	)
	(down): Identity()
	(conv1_1x1): Conv2d(192, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
	(norm1): BatchNormAct2d(
	768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
	(drop): Identity()
	(act): GELU()
	)
	(conv2_kxk): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False)
	(norm2): BatchNormAct2d(
	768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
	(drop): Identity()
	(act): GELU()
	)
	(se): SEModule(
	(fc1): Conv2d(768, 48, kernel_size=(1, 1), stride=(1, 1))
	(bn): Identity()
	(act): SiLU(inplace=True)
	(fc2): Conv2d(48, 768, kernel_size=(1, 1), stride=(1, 1))
	(gate): Sigmoid()
	)
	(conv3_1x1): Conv2d(768, 192, kernel_size=(1, 1), stride=(1, 1))
	(drop_path): Identity()
	)
	(4): MbConvBlock(
	(shortcut): Identity()
	(pre_norm): BatchNormAct2d(
	192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
	(drop): Identity()
	(act): Identity()
	)
	(down): Identity()
	(conv1_1x1): Conv2d(192, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
	(norm1): BatchNormAct2d(
	768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
	(drop): Identity()
	(act): GELU()
	)
	(conv2_kxk): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False)
	(norm2): BatchNormAct2d(
	768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
	(drop): Identity()
	(act): GELU()
	)
	(se): SEModule(
	(fc1): Conv2d(768, 48, kernel_size=(1, 1), stride=(1, 1))
	(bn): Identity()
	(act): SiLU(inplace=True)
	(fc2): Conv2d(48, 768, kernel_size=(1, 1), stride=(1, 1))
	(gate): Sigmoid()
	)
	(conv3_1x1): Conv2d(768, 192, kernel_size=(1, 1), stride=(1, 1))
	(drop_path): Identity()
	)
	(5): MbConvBlock(
	(shortcut): Identity()
	(pre_norm): BatchNormAct2d(
	192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
	(drop): Identity()
	(act): Identity()
	)
	(down): Identity()
	(conv1_1x1): Conv2d(192, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
	(norm1): BatchNormAct2d(
	768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
	(drop): Identity()
	(act): GELU()
	)
	(conv2_kxk): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False)
	(norm2): BatchNormAct2d(
	768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
	(drop): Identity()
	(act): GELU()
	)
	(se): SEModule(
	(fc1): Conv2d(768, 48, kernel_size=(1, 1), stride=(1, 1))
	(bn): Identity()
	(act): SiLU(inplace=True)
	(fc2): Conv2d(48, 768, kernel_size=(1, 1), stride=(1, 1))
	(gate): Sigmoid()
	)
	(conv3_1x1): Conv2d(768, 192, kernel_size=(1, 1), stride=(1, 1))
	(drop_path): Identity()
	)
	)
	)
	(2): MaxxVitStage(
	(blocks): Sequential(
	(0): TransformerBlock2d(
	(shortcut): Downsample2d(
	(pool): AvgPool2d(kernel_size=2, stride=2, padding=0)
	(expand): Conv2d(192, 384, kernel_size=(1, 1), stride=(1, 1))
	)
	(norm1): Sequential(
	(norm): LayerNorm2d((192,), eps=1e-06, elementwise_affine=True)
	(down): Downsample2d(
	(pool): AvgPool2d(kernel_size=2, stride=2, padding=0)
	(expand): Identity()
	)
	)
	(attn): Attention2d(
	(qkv): Conv2d(192, 1152, kernel_size=(1, 1), stride=(1, 1))
	(rel_pos): RelPosBias()
	(attn_drop): Dropout(p=0.0, inplace=False)
	(proj): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1))
	(proj_drop): Dropout(p=0.0, inplace=False)
	)
	(ls1): Identity()
	(drop_path1): Identity()
	(norm2): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
	(mlp): ConvMlp(
	(fc1): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1))
	(norm): Identity()
	(act): GELU()
	(drop): Dropout(p=0.0, inplace=False)
	(fc2): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1))
	)
	(ls2): Identity()
	(drop_path2): Identity()
	)
	(1): TransformerBlock2d(
	(shortcut): Identity()
	(norm1): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
	(attn): Attention2d(
	(qkv): Conv2d(384, 1152, kernel_size=(1, 1), stride=(1, 1))
	(rel_pos): RelPosBias()
	(attn_drop): Dropout(p=0.0, inplace=False)
	(proj): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1))
	(proj_drop): Dropout(p=0.0, inplace=False)
	)
	(ls1): Identity()
	(drop_path1): Identity()
	(norm2): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
	(mlp): ConvMlp(
	(fc1): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1))
	(norm): Identity()
	(act): GELU()
	(drop): Dropout(p=0.0, inplace=False)
	(fc2): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1))
	)
	(ls2): Identity()
	(drop_path2): Identity()
	)
	(2): TransformerBlock2d(
	(shortcut): Identity()
	(norm1): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
	(attn): Attention2d(
	(qkv): Conv2d(384, 1152, kernel_size=(1, 1), stride=(1, 1))
	(rel_pos): RelPosBias()
	(attn_drop): Dropout(p=0.0, inplace=False)
	(proj): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1))
	(proj_drop): Dropout(p=0.0, inplace=False)
	)
	(ls1): Identity()
	(drop_path1): Identity()
	(norm2): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
	(mlp): ConvMlp(
	(fc1): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1))
	(norm): Identity()
	(act): GELU()
	(drop): Dropout(p=0.0, inplace=False)
	(fc2): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1))
	)
	(ls2): Identity()
	(drop_path2): Identity()
	)
	(3): TransformerBlock2d(
	(shortcut): Identity()
	(norm1): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
	(attn): Attention2d(
	(qkv): Conv2d(384, 1152, kernel_size=(1, 1), stride=(1, 1))
	(rel_pos): RelPosBias()
	(attn_drop): Dropout(p=0.0, inplace=False)
	(proj): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1))
	(proj_drop): Dropout(p=0.0, inplace=False)
	)
	(ls1): Identity()
	(drop_path1): Identity()
	(norm2): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
	(mlp): ConvMlp(
	(fc1): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1))
	(norm): Identity()
	(act): GELU()
	(drop): Dropout(p=0.0, inplace=False)
	(fc2): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1))
	)
	(ls2): Identity()
	(drop_path2): Identity()
	)
	(4): TransformerBlock2d(
	(shortcut): Identity()
	(norm1): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
	(attn): Attention2d(
	(qkv): Conv2d(384, 1152, kernel_size=(1, 1), stride=(1, 1))
	(rel_pos): RelPosBias()
	(attn_drop): Dropout(p=0.0, inplace=False)
	(proj): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1))
	(proj_drop): Dropout(p=0.0, inplace=False)
	)
	(ls1): Identity()
	(drop_path1): Identity()
	(norm2): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
	(mlp): ConvMlp(
	(fc1): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1))
	(norm): Identity()
	(act): GELU()
	(drop): Dropout(p=0.0, inplace=False)
	(fc2): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1))
	)
	(ls2): Identity()
	(drop_path2): Identity()
	)
	(5): TransformerBlock2d(
	(shortcut): Identity()
	(norm1): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
	(attn): Attention2d(
	(qkv): Conv2d(384, 1152, kernel_size=(1, 1), stride=(1, 1))
	(rel_pos): RelPosBias()
	(attn_drop): Dropout(p=0.0, inplace=False)
	(proj): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1))
	(proj_drop): Dropout(p=0.0, inplace=False)
	)
	(ls1): Identity()
	(drop_path1): Identity()
	(norm2): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
	(mlp): ConvMlp(
	(fc1): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1))
	(norm): Identity()
	(act): GELU()
	(drop): Dropout(p=0.0, inplace=False)
	(fc2): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1))
	)
	(ls2): Identity()
	(drop_path2): Identity()
	)
	(6): TransformerBlock2d(
	(shortcut): Identity()
	(norm1): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
	(attn): Attention2d(
	(qkv): Conv2d(384, 1152, kernel_size=(1, 1), stride=(1, 1))
	(rel_pos): RelPosBias()
	(attn_drop): Dropout(p=0.0, inplace=False)
	(proj): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1))
	(proj_drop): Dropout(p=0.0, inplace=False)
	)
	(ls1): Identity()
	(drop_path1): Identity()
	(norm2): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
	(mlp): ConvMlp(
	(fc1): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1))
	(norm): Identity()
	(act): GELU()
	(drop): Dropout(p=0.0, inplace=False)
	(fc2): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1))
	)
	(ls2): Identity()
	(drop_path2): Identity()
	)
	(7): TransformerBlock2d(
	(shortcut): Identity()
	(norm1): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
	(attn): Attention2d(
	(qkv): Conv2d(384, 1152, kernel_size=(1, 1), stride=(1, 1))
	(rel_pos): RelPosBias()
	(attn_drop): Dropout(p=0.0, inplace=False)
	(proj): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1))
	(proj_drop): Dropout(p=0.0, inplace=False)
	)
	(ls1): Identity()
	(drop_path1): Identity()
	(norm2): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
	(mlp): ConvMlp(
	(fc1): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1))
	(norm): Identity()
	(act): GELU()
	(drop): Dropout(p=0.0, inplace=False)
	(fc2): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1))
	)
	(ls2): Identity()
	(drop_path2): Identity()
	)
	(8): TransformerBlock2d(
	(shortcut): Identity()
	(norm1): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
	(attn): Attention2d(
	(qkv): Conv2d(384, 1152, kernel_size=(1, 1), stride=(1, 1))
	(rel_pos): RelPosBias()
	(attn_drop): Dropout(p=0.0, inplace=False)
	(proj): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1))
	(proj_drop): Dropout(p=0.0, inplace=False)
	)
	(ls1): Identity()
	(drop_path1): Identity()
	(norm2): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
	(mlp): ConvMlp(
	(fc1): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1))
	(norm): Identity()
	(act): GELU()
	(drop): Dropout(p=0.0, inplace=False)
	(fc2): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1))
	)
	(ls2): Identity()
	(drop_path2): Identity()
	)
	(9): TransformerBlock2d(
	(shortcut): Identity()
	(norm1): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
	(attn): Attention2d(
	(qkv): Conv2d(384, 1152, kernel_size=(1, 1), stride=(1, 1))
	(rel_pos): RelPosBias()
	(attn_drop): Dropout(p=0.0, inplace=False)
	(proj): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1))
	(proj_drop): Dropout(p=0.0, inplace=False)
	)
	(ls1): Identity()
	(drop_path1): Identity()
	(norm2): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
	(mlp): ConvMlp(
	(fc1): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1))
	(norm): Identity()
	(act): GELU()
	(drop): Dropout(p=0.0, inplace=False)
	(fc2): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1))
	)
	(ls2): Identity()
	(drop_path2): Identity()
	)
	(10): TransformerBlock2d(
	(shortcut): Identity()
	(norm1): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
	(attn): Attention2d(
	(qkv): Conv2d(384, 1152, kernel_size=(1, 1), stride=(1, 1))
	(rel_pos): RelPosBias()
	(attn_drop): Dropout(p=0.0, inplace=False)
	(proj): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1))
	(proj_drop): Dropout(p=0.0, inplace=False)
	)
	(ls1): Identity()
	(drop_path1): Identity()
	(norm2): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
	(mlp): ConvMlp(
	(fc1): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1))
	(norm): Identity()
	(act): GELU()
	(drop): Dropout(p=0.0, inplace=False)
	(fc2): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1))
	)
	(ls2): Identity()
	(drop_path2): Identity()
	)
	(11): TransformerBlock2d(
	(shortcut): Identity()
	(norm1): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
	(attn): Attention2d(
	(qkv): Conv2d(384, 1152, kernel_size=(1, 1), stride=(1, 1))
	(rel_pos): RelPosBias()
	(attn_drop): Dropout(p=0.0, inplace=False)
	(proj): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1))
	(proj_drop): Dropout(p=0.0, inplace=False)
	)
	(ls1): Identity()
	(drop_path1): Identity()
	(norm2): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
	(mlp): ConvMlp(
	(fc1): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1))
	(norm): Identity()
	(act): GELU()
	(drop): Dropout(p=0.0, inplace=False)
	(fc2): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1))
	)
	(ls2): Identity()
	(drop_path2): Identity()
	)
	(12): TransformerBlock2d(
	(shortcut): Identity()
	(norm1): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
	(attn): Attention2d(
	(qkv): Conv2d(384, 1152, kernel_size=(1, 1), stride=(1, 1))
	(rel_pos): RelPosBias()
	(attn_drop): Dropout(p=0.0, inplace=False)
	(proj): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1))
	(proj_drop): Dropout(p=0.0, inplace=False)
	)
	(ls1): Identity()
	(drop_path1): Identity()
	(norm2): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
	(mlp): ConvMlp(
	(fc1): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1))
	(norm): Identity()
	(act): GELU()
	(drop): Dropout(p=0.0, inplace=False)
	(fc2): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1))
	)
	(ls2): Identity()
	(drop_path2): Identity()
	)
	(13): TransformerBlock2d(
	(shortcut): Identity()
	(norm1): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
	(attn): Attention2d(
	(qkv): Conv2d(384, 1152, kernel_size=(1, 1), stride=(1, 1))
	(rel_pos): RelPosBias()
	(attn_drop): Dropout(p=0.0, inplace=False)
	(proj): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1))
	(proj_drop): Dropout(p=0.0, inplace=False)
	)
	(ls1): Identity()
	(drop_path1): Identity()
	(norm2): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
	(mlp): ConvMlp(
	(fc1): Conv2d(384, 1536, kernel_size=(1, 1), stride=(1, 1))
	(norm): Identity()
	(act): GELU()
	(drop): Dropout(p=0.0, inplace=False)
	(fc2): Conv2d(1536, 384, kernel_size=(1, 1), stride=(1, 1))
	)
	(ls2): Identity()
	(drop_path2): Identity()
	)
	)
	)
	(3): MaxxVitStage(
	(blocks): Sequential(
	(0): TransformerBlock2d(
	(shortcut): Downsample2d(
	(pool): AvgPool2d(kernel_size=2, stride=2, padding=0)
	(expand): Conv2d(384, 768, kernel_size=(1, 1), stride=(1, 1))
	)
	(norm1): Sequential(
	(norm): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
	(down): Downsample2d(
	(pool): AvgPool2d(kernel_size=2, stride=2, padding=0)
	(expand): Identity()
	)
	)
	(attn): Attention2d(
	(qkv): Conv2d(384, 2304, kernel_size=(1, 1), stride=(1, 1))
	(rel_pos): RelPosBias()
	(attn_drop): Dropout(p=0.0, inplace=False)
	(proj): Conv2d(768, 768, kernel_size=(1, 1), stride=(1, 1))
	(proj_drop): Dropout(p=0.0, inplace=False)
	)
	(ls1): Identity()
	(drop_path1): Identity()
	(norm2): LayerNorm2d((768,), eps=1e-06, elementwise_affine=True)
	(mlp): ConvMlp(
	(fc1): Conv2d(768, 3072, kernel_size=(1, 1), stride=(1, 1))
	(norm): Identity()
	(act): GELU()
	(drop): Dropout(p=0.0, inplace=False)
	(fc2): Conv2d(3072, 768, kernel_size=(1, 1), stride=(1, 1))
	)
	(ls2): Identity()
	(drop_path2): Identity()
	)
	(1): TransformerBlock2d(
	(shortcut): Identity()
	(norm1): LayerNorm2d((768,), eps=1e-06, elementwise_affine=True)
	(attn): Attention2d(
	(qkv): Conv2d(768, 2304, kernel_size=(1, 1), stride=(1, 1))
	(rel_pos): RelPosBias()
	(attn_drop): Dropout(p=0.0, inplace=False)
	(proj): Conv2d(768, 768, kernel_size=(1, 1), stride=(1, 1))
	(proj_drop): Dropout(p=0.0, inplace=False)
	)
	(ls1): Identity()
	(drop_path1): Identity()
	(norm2): LayerNorm2d((768,), eps=1e-06, elementwise_affine=True)
	(mlp): ConvMlp(
	(fc1): Conv2d(768, 3072, kernel_size=(1, 1), stride=(1, 1))
	(norm): Identity()
	(act): GELU()
	(drop): Dropout(p=0.0, inplace=False)
	(fc2): Conv2d(3072, 768, kernel_size=(1, 1), stride=(1, 1))
	)
	(ls2): Identity()
	(drop_path2): Identity()
	)
	)
	)
	)
	(norm): LayerNorm2d((768,), eps=1e-06, elementwise_affine=True)
	(head): ClassifierHead(
	(global_pool): SelectAdaptivePool2d (pool_type=avg, flatten=Flatten(start_dim=1, end_dim=-1))
	(fc): Linear(in_features=768, out_features=1000, bias=True)
	(flatten): Identity()
	)
	)