Skip to content

Instantly share code, notes, and snippets.

View farukcankaya's full-sized avatar

Faruk Cankaya farukcankaya

View GitHub Profile
| Method | AP | AP50 | AP75 | APs | APm | APl |
|:----------------------:|:------:|:------:|:------:|:-----:|:------:|:------:|
| Baseline | 75.497 | 80.927 | 80.927 | 0.000 | 52.240 | 95.806 |
| InstanceColorJittering | 75.374 | 80.901 | 80.901 | 0.000 | 50.109 | 96.650 |
| CopyPasteAug | 78.176 | 83.978 | 83.978 | 0.000 | 56.604 | 96.981 |
aug = InstanceColorJitterAugmentation(lambda img: ImageEnhance.Color(img).enhance(10))
augmentations = T.AugmentationList([aug])
aug_input = MultiModalAugInput(image, annos=d['annotations'])
transforms = augmentations(aug_input)
augmented_image = aug_input.image
class InstanceColorJitterTransform(MultiModalTransform):
def __init__(self, color_operation: Callable, instance_rate: float, min_count_to_apply: int) -> None:
if not callable(color_operation):
raise ValueError("color_operation parameter should be callable")
super().__init__()
self._set_attributes(locals())
def apply_multi_modal(self, img, annos, *args):
instance_count = len(annos)
apply_count = max(self.min_count_to_apply, int(instance_count * self.instance_rate))
def transform(self, tfm: Transform) -> None:
if isinstance(tfm, MultiModalTransform):
self.image, self.annos = \
tfm.apply_multi_modal(self.image, self.annos)
else:
super().transform(tfm)
class Transform:
@abstractmethod
def apply_image(self, img: np.ndarray):
@abstractmethod
def apply_coords(self, coords: np.ndarray):
def apply_segmentation(self, segmentation: np.ndarray) -> np.ndarray:
return self.apply_image(segmentation)
class AugInput:
def transform(self, tfm: Transform) -> None:
self.image = tfm.apply_image(self.image)
if self.boxes is not None:
self.boxes = tfm.apply_box(self.boxes)
if self.sem_seg is not None:
self.sem_seg = tfm.apply_segmentation(self.sem_seg)
class Augmentation:
def __call__(self, aug_input) -> Transform:
args = _get_aug_input_args(self, aug_input)
tfm = self.get_transform(*args)
aug_input.transform(tfm)
return tfm
# from: https://github.com/facebookresearch/detectron2/blob/998c4e15220a0db9f172b1e7ecf376a59b873f1d/detectron2/data/dataset_mapper.py#L163-L165
aug_input = T.AugInput(image, sem_seg=sem_seg_gt)
transforms = self.augmentations(aug_input)
image, sem_seg_gt = aug_input.image, aug_input.sem_seg
@META_ARCH @BACKBONE @PROPOSAL_GENERATOR @ROI_HEADS @ROI_MASK_HEAD
GeneralizedRCNN build_resnet_backbone RPN RROIHeads MaskRCNNConvUpsampleHead
PanopticFPN build_resnet_fpn_backbone RRPN Res5ROIHeads
ProposalNetwork build_retinanet_resnet_fpn_backbone CascadeROIHeads
RetinaNet StandardROIHeads
SemanticSegmentor
def do_train_visualization(visualizer, model, inputs):
training_mode = model.training
model.eval()
outputs = visualizer.inference(model, inputs)
visualizer.process(inputs, outputs)
model.train(training_mode)
with EventStorage(start_iter) as storage: