e96031413/retinanet_mbv3_large.py

## retinanet_mbv3_large.py
"""
RetinaNet model with the MobileNetV3 backbone from
Torchvision classification models.

Reference: https://github.com/pytorch/vision/blob/main/torchvision/models/detection/retinanet.py#L377-L405
"""

import torchvision
import torch
from torchvision.models.detection import RetinaNet
from torchvision.models.detection.anchor_utils import AnchorGenerator
from thop import profile

def create_model(num_classes=4, pretrained=True, coco_model=False):
    # Load the pretrained MobileNetV3 large features.
    # TOP-1 ACC: 75.274, TOP-5 ACC: 92.566, Params: 5.5M, GFLOPs: 0.22G
    backbone = torchvision.models.mobilenet_v3_large(weights='IMAGENET1K_V2').features
    # backbone = torch.nn.Sequential(*(list(torchvision.models.resnet18(weights='IMAGENET1K_V1').children())[:-1]))
    # We need the output channels of the last convolutional layers from the features for the RetinaNet model.
    backbone.out_channels = 960

    # Generate anchors using the RPN. Here, we are using 5x3 anchors.
    # Meaning, anchors with 5 different sizes and 3 different aspect
    # ratios.
    anchor_generator = AnchorGenerator(
        sizes=((32, 64, 128, 256, 512),),
        aspect_ratios=((0.5, 1.0, 2.0),)
    )

    # Final RetinaNet model.
    model = RetinaNet(
        backbone=backbone,
        num_classes=num_classes,
        anchor_generator=anchor_generator,
    )

    return model

if __name__ == '__main__':
    input = torch.randn(1, 3, 640, 640).cuda()
    target = {"boxes": torch.tensor([[100, 150, 300, 400]]).cuda(),"labels": torch.tensor([1]).cuda(),}

    model = create_model(num_classes=3, pretrained=True, coco_model=False)
    model = model.cuda()


    """
    model.train()
    out=model(input,[target])
    {'classification': tensor(1.0960, device='cuda:0', grad_fn=<DivBackward0>),
    'bbox_regression': tensor(0.8155, device='cuda:0', grad_fn=<DivBackward0>)}

    model.eval()
    out=model(input)
    [{'boxes': tensor([], device='cuda:0', size=(0, 4), grad_fn=<StackBackward0>),
    'scores': tensor([], device='cuda:0', grad_fn=<IndexBackward0>),
    'labels': tensor([], device='cuda:0', dtype=torch.int64)}]
    """

    macs, params = profile(model, inputs=(input, ))
    print('MACs = ' + str(macs/1000**3) + 'G')
    print('Params = ' + str(params/1000**2) + 'M')
	"""
	RetinaNet model with the MobileNetV3 backbone from
	Torchvision classification models.

	Reference: https://github.com/pytorch/vision/blob/main/torchvision/models/detection/retinanet.py#L377-L405
	"""

	import torchvision
	import torch
	from torchvision.models.detection import RetinaNet
	from torchvision.models.detection.anchor_utils import AnchorGenerator
	from thop import profile

	def create_model(num_classes=4, pretrained=True, coco_model=False):
	# Load the pretrained MobileNetV3 large features.
	# TOP-1 ACC: 75.274, TOP-5 ACC: 92.566, Params: 5.5M, GFLOPs: 0.22G
	backbone = torchvision.models.mobilenet_v3_large(weights='IMAGENET1K_V2').features
	# backbone = torch.nn.Sequential(*(list(torchvision.models.resnet18(weights='IMAGENET1K_V1').children())[:-1]))
	# We need the output channels of the last convolutional layers from the features for the RetinaNet model.
	backbone.out_channels = 960

	# Generate anchors using the RPN. Here, we are using 5x3 anchors.
	# Meaning, anchors with 5 different sizes and 3 different aspect
	# ratios.
	anchor_generator = AnchorGenerator(
	sizes=((32, 64, 128, 256, 512),),
	aspect_ratios=((0.5, 1.0, 2.0),)
	)

	# Final RetinaNet model.
	model = RetinaNet(
	backbone=backbone,
	num_classes=num_classes,
	anchor_generator=anchor_generator,
	)

	return model

	if __name__ == '__main__':
	input = torch.randn(1, 3, 640, 640).cuda()
	target = {"boxes": torch.tensor([[100, 150, 300, 400]]).cuda(),"labels": torch.tensor([1]).cuda(),}

	model = create_model(num_classes=3, pretrained=True, coco_model=False)
	model = model.cuda()


	"""
	model.train()
	out=model(input,[target])
	{'classification': tensor(1.0960, device='cuda:0', grad_fn=<DivBackward0>),
	'bbox_regression': tensor(0.8155, device='cuda:0', grad_fn=<DivBackward0>)}

	model.eval()
	out=model(input)
	[{'boxes': tensor([], device='cuda:0', size=(0, 4), grad_fn=<StackBackward0>),
	'scores': tensor([], device='cuda:0', grad_fn=<IndexBackward0>),
	'labels': tensor([], device='cuda:0', dtype=torch.int64)}]
	"""

	macs, params = profile(model, inputs=(input, ))
	print('MACs = ' + str(macs/1000**3) + 'G')
	print('Params = ' + str(params/1000**2) + 'M')