ppwwyyxx/maskrcnn_LazyCall.py

## maskrcnn_LazyCall.py
from detectron2.layers import ShapeSpec
from detectron2.modeling import FPN, GeneralizedRCNN, ResNet, StandardROIHeads
from detectron2.modeling.anchor_generator import DefaultAnchorGenerator
from detectron2.modeling.backbone.fpn import LastLevelMaxPool
from detectron2.modeling.backbone.resnet import BasicStem, BottleneckBlock
from detectron2.modeling.box_regression import Box2BoxTransform
from detectron2.modeling.matcher import Matcher
from detectron2.modeling.poolers import ROIPooler
from detectron2.modeling.proposal_generator.rpn import RPN, StandardRPNHead
from detectron2.modeling.roi_heads import FastRCNNOutputLayers, MaskRCNNConvUpsampleHead
from detectron2.modeling.roi_heads.box_head import FastRCNNConvFCHead

from newconfig import LazyCall as L

model = L(GeneralizedRCNN)(
    backbone=L(FPN)(
        bottom_up=L(ResNet)(
            stem=L(BasicStem)(in_channels=3, out_channels=64),
            # can create some specializations such as "ResNet50" to avoid writing this
            stages=[
                L(ResNet.make_stage)(
                    block_class=BottleneckBlock,
                    num_blocks=n,
                    stride_per_block=[s] + [1] * (n - 1),
                    in_channels=i,
                    bottleneck_channels=o // 4,
                    out_channels=o,
                    stride_in_1x1=True,
                )
                for (n, s, i, o) in zip(
                    [3, 4, 6, 3], [1, 2, 2, 2], [64, 256, 512, 1024], [256, 512, 1024, 2048]
                )
            ],
            out_features=["res2", "res3", "res4", "res5"],
            # freeze_at=2,
        ),
        in_features=["res2", "res3", "res4", "res5"],
        out_channels=256,
        top_block=L(LastLevelMaxPool)(),
    ),
    proposal_generator=L(RPN)(
        in_features=["p2", "p3", "p4", "p5", "p6"],
        head=L(StandardRPNHead)(in_channels=256, num_anchors=3),
        anchor_generator=L(DefaultAnchorGenerator)(
            sizes=[[32], [64], [128], [256], [512]],
            aspect_ratios=[0.5, 1.0, 2.0],
            strides=[4, 8, 16, 32, 64],
            offset=0.0,
        ),
        anchor_matcher=L(Matcher)(
            thresholds=[0.3, 0.7], labels=[0, -1, 1], allow_low_quality_matches=True
        ),
        box2box_transform=L(Box2BoxTransform)(weights=[1.0, 1.0, 1.0, 1.0]),
        batch_size_per_image=256,
        positive_fraction=0.5,
        pre_nms_topk=(2000, 1000),
        post_nms_topk=(1000, 1000),
        nms_thresh=0.7,
    ),
    roi_heads=L(StandardROIHeads)(
        num_classes=80,
        batch_size_per_image=512,
        positive_fraction=0.25,
        proposal_matcher=L(Matcher)(
            thresholds=[0.5], labels=[0, 1], allow_low_quality_matches=False
        ),
        box_in_features=["p2", "p3", "p4", "p5"],
        box_pooler=L(ROIPooler)(
            output_size=7,
            scales=(1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32),
            sampling_ratio=0,
            pooler_type="ROIAlignV2",
        ),
        box_head=L(FastRCNNConvFCHead)(
            input_shape=ShapeSpec(channels=256, height=7, width=7),
            conv_dims=[],
            fc_dims=[1024, 1024],
        ),
        box_predictor=L(FastRCNNOutputLayers)(
            input_shape=ShapeSpec(channels=1024),
            test_score_thresh=0.05,
            box2box_transform=L(Box2BoxTransform)(weights=(10, 10, 5, 5)),
            # NOTE: interpolation supported by OmegaConf
            num_classes="${..num_classes}",
        ),
        mask_in_features=["p2", "p3", "p4", "p5"],
        mask_pooler=L(ROIPooler)(
            output_size=14,
            scales=(1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32),
            sampling_ratio=0,
            pooler_type="ROIAlignV2",
        ),
        mask_head=L(MaskRCNNConvUpsampleHead)(
            input_shape=ShapeSpec(channels=256, width=14, height=14),
            num_classes="${..num_classes}",
            conv_dims=[256, 256, 256, 256, 256],
        ),
    ),
    pixel_mean=[103.530, 116.280, 123.675],
    pixel_std=[1.0, 1.0, 1.0],
    input_format="BGR",
)

## maskrcnn_mmdet.py
from detectron2.layers import ShapeSpec
from detectron2.modeling import FPN, GeneralizedRCNN, ResNet, StandardROIHeads
from detectron2.modeling.anchor_generator import DefaultAnchorGenerator
from detectron2.modeling.backbone.fpn import LastLevelMaxPool
from detectron2.modeling.backbone.resnet import BasicStem, BottleneckBlock
from detectron2.modeling.box_regression import Box2BoxTransform
from detectron2.modeling.matcher import Matcher
from detectron2.modeling.poolers import ROIPooler
from detectron2.modeling.proposal_generator.rpn import RPN, StandardRPNHead
from detectron2.modeling.roi_heads import FastRCNNOutputLayers, MaskRCNNConvUpsampleHead
from detectron2.modeling.roi_heads.box_head import FastRCNNConvFCHead

model = dict(
    _target_=GeneralizedRCNN,
    backbone=dict(
        _target_=FPN,
        bottom_up=dict(
            _target_=ResNet,
            stem=dict(_target_=BasicStem, in_channels=3, out_channels=64),
            # can create some specializations such as "ResNet50" to avoid writing this
            stages=[
                dict(
                    _target_=ResNet.make_stage,
                    block_class=BottleneckBlock,
                    num_blocks=n,
                    stride_per_block=[s] + [1] * (n - 1),
                    in_channels=i,
                    bottleneck_channels=o // 4,
                    out_channels=o,
                    stride_in_1x1=True,
                )
                for (n, s, i, o) in zip(
                    [3, 4, 6, 3], [1, 2, 2, 2], [64, 256, 512, 1024], [256, 512, 1024, 2048]
                )
            ],
            out_features=["res2", "res3", "res4", "res5"],
            # freeze_at=2,
        ),
        in_features=["res2", "res3", "res4", "res5"],
        out_channels=256,
        top_block=dict(_target_=LastLevelMaxPool),
    ),
    proposal_generator=dict(
        _target_=RPN,
        in_features=["p2", "p3", "p4", "p5", "p6"],
        head=dict(_target_=StandardRPNHead, in_channels=256, num_anchors=3),
        anchor_generator=dict(
            _target_=DefaultAnchorGenerator,
            sizes=[[32], [64], [128], [256], [512]],
            aspect_ratios=[0.5, 1.0, 2.0],
            strides=[4, 8, 16, 32, 64],
            offset=0.0,
        ),
        anchor_matcher=dict(
            _target_=Matcher,
            thresholds=[0.3, 0.7],
            labels=[0, -1, 1],
            allow_low_quality_matches=True,
        ),
        box2box_transform=dict(_target_=Box2BoxTransform, weights=[1.0, 1.0, 1.0, 1.0]),
        batch_size_per_image=256,
        positive_fraction=0.5,
        pre_nms_topk=(2000, 1000),
        post_nms_topk=(1000, 1000),
        nms_thresh=0.7,
    ),
    roi_heads=dict(
        _target_=StandardROIHeads,
        num_classes=80,
        batch_size_per_image=512,
        positive_fraction=0.25,
        proposal_matcher=dict(
            _target_=Matcher, thresholds=[0.5], labels=[0, 1], allow_low_quality_matches=False
        ),
        box_in_features=["p2", "p3", "p4", "p5"],
        box_pooler=dict(
            _target_=ROIPooler,
            output_size=7,
            scales=(1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32),
            sampling_ratio=0,
            pooler_type="ROIAlignV2",
        ),
        box_head=dict(
            _target_=FastRCNNConvFCHead,
            input_shape=ShapeSpec(channels=256, height=7, width=7),
            conv_dims=[],
            fc_dims=[1024, 1024],
        ),
        box_predictor=dict(
            _target_=FastRCNNOutputLayers,
            input_shape=ShapeSpec(channels=1024),
            test_score_thresh=0.05,
            box2box_transform=dict(_target_=Box2BoxTransform, weights=(10, 10, 5, 5)),
            # NOTE: interpolation supported by OmegaConf
            num_classes="${..num_classes}",
        ),
        mask_in_features=["p2", "p3", "p4", "p5"],
        mask_pooler=dict(
            _target_=ROIPooler,
            output_size=14,
            scales=(1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32),
            sampling_ratio=0,
            pooler_type="ROIAlignV2",
        ),
        mask_head=dict(
            _target_=MaskRCNNConvUpsampleHead,
            input_shape=ShapeSpec(channels=256, width=14, height=14),
            num_classes="${..num_classes}",
            conv_dims=[256, 256, 256, 256, 256],
        ),
    ),
    pixel_mean=[103.530, 116.280, 123.675],
    pixel_std=[1.0, 1.0, 1.0],
    input_format="BGR",
)
	from detectron2.layers import ShapeSpec
	from detectron2.modeling import FPN, GeneralizedRCNN, ResNet, StandardROIHeads
	from detectron2.modeling.anchor_generator import DefaultAnchorGenerator
	from detectron2.modeling.backbone.fpn import LastLevelMaxPool
	from detectron2.modeling.backbone.resnet import BasicStem, BottleneckBlock
	from detectron2.modeling.box_regression import Box2BoxTransform
	from detectron2.modeling.matcher import Matcher
	from detectron2.modeling.poolers import ROIPooler
	from detectron2.modeling.proposal_generator.rpn import RPN, StandardRPNHead
	from detectron2.modeling.roi_heads import FastRCNNOutputLayers, MaskRCNNConvUpsampleHead
	from detectron2.modeling.roi_heads.box_head import FastRCNNConvFCHead

	from newconfig import LazyCall as L

	model = L(GeneralizedRCNN)(
	backbone=L(FPN)(
	bottom_up=L(ResNet)(
	stem=L(BasicStem)(in_channels=3, out_channels=64),
	# can create some specializations such as "ResNet50" to avoid writing this
	stages=[
	L(ResNet.make_stage)(
	block_class=BottleneckBlock,
	num_blocks=n,
	stride_per_block=[s] + [1] * (n - 1),
	in_channels=i,
	bottleneck_channels=o // 4,
	out_channels=o,
	stride_in_1x1=True,
	)
	for (n, s, i, o) in zip(
	[3, 4, 6, 3], [1, 2, 2, 2], [64, 256, 512, 1024], [256, 512, 1024, 2048]
	)
	],
	out_features=["res2", "res3", "res4", "res5"],
	# freeze_at=2,
	),
	in_features=["res2", "res3", "res4", "res5"],
	out_channels=256,
	top_block=L(LastLevelMaxPool)(),
	),
	proposal_generator=L(RPN)(
	in_features=["p2", "p3", "p4", "p5", "p6"],
	head=L(StandardRPNHead)(in_channels=256, num_anchors=3),
	anchor_generator=L(DefaultAnchorGenerator)(
	sizes=[[32], [64], [128], [256], [512]],
	aspect_ratios=[0.5, 1.0, 2.0],
	strides=[4, 8, 16, 32, 64],
	offset=0.0,
	),
	anchor_matcher=L(Matcher)(
	thresholds=[0.3, 0.7], labels=[0, -1, 1], allow_low_quality_matches=True
	),
	box2box_transform=L(Box2BoxTransform)(weights=[1.0, 1.0, 1.0, 1.0]),
	batch_size_per_image=256,
	positive_fraction=0.5,
	pre_nms_topk=(2000, 1000),
	post_nms_topk=(1000, 1000),
	nms_thresh=0.7,
	),
	roi_heads=L(StandardROIHeads)(
	num_classes=80,
	batch_size_per_image=512,
	positive_fraction=0.25,
	proposal_matcher=L(Matcher)(
	thresholds=[0.5], labels=[0, 1], allow_low_quality_matches=False
	),
	box_in_features=["p2", "p3", "p4", "p5"],
	box_pooler=L(ROIPooler)(
	output_size=7,
	scales=(1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32),
	sampling_ratio=0,
	pooler_type="ROIAlignV2",
	),
	box_head=L(FastRCNNConvFCHead)(
	input_shape=ShapeSpec(channels=256, height=7, width=7),
	conv_dims=[],
	fc_dims=[1024, 1024],
	),
	box_predictor=L(FastRCNNOutputLayers)(
	input_shape=ShapeSpec(channels=1024),
	test_score_thresh=0.05,
	box2box_transform=L(Box2BoxTransform)(weights=(10, 10, 5, 5)),
	# NOTE: interpolation supported by OmegaConf
	num_classes="${..num_classes}",
	),
	mask_in_features=["p2", "p3", "p4", "p5"],
	mask_pooler=L(ROIPooler)(
	output_size=14,
	scales=(1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32),
	sampling_ratio=0,
	pooler_type="ROIAlignV2",
	),
	mask_head=L(MaskRCNNConvUpsampleHead)(
	input_shape=ShapeSpec(channels=256, width=14, height=14),
	num_classes="${..num_classes}",
	conv_dims=[256, 256, 256, 256, 256],
	),
	),
	pixel_mean=[103.530, 116.280, 123.675],
	pixel_std=[1.0, 1.0, 1.0],
	input_format="BGR",
	)