Faruk Cankaya farukcankaya

## multimodal_aug_color_train.py
|          Method        |   AP   |  AP50  |  AP75  |  APs  |  APm   |  APl   |
|:----------------------:|:------:|:------:|:------:|:-----:|:------:|:------:|
|        Baseline        | 75.497 | 80.927 | 80.927 | 0.000 | 52.240 | 95.806 |
| InstanceColorJittering | 75.374 | 80.901 | 80.901 | 0.000 | 50.109 | 96.650 |
|       CopyPasteAug     | 78.176 | 83.978 | 83.978 | 0.000 | 56.604 | 96.981 |

## multimodal_aug_color_jitter_apply.py
aug = InstanceColorJitterAugmentation(lambda img: ImageEnhance.Color(img).enhance(10))
augmentations = T.AugmentationList([aug])
aug_input = MultiModalAugInput(image, annos=d['annotations'])
transforms = augmentations(aug_input)
augmented_image = aug_input.image

## multimodal_aug_color_jitter.py
class InstanceColorJitterTransform(MultiModalTransform):
  def __init__(self, color_operation: Callable, instance_rate: float, min_count_to_apply: int) -> None:
    if not callable(color_operation):
        raise ValueError("color_operation parameter should be callable")
    super().__init__()
    self._set_attributes(locals())

  def apply_multi_modal(self, img, annos, *args):
    instance_count = len(annos)
    apply_count = max(self.min_count_to_apply, int(instance_count * self.instance_rate))

## multimodal_aug_x.py
def transform(self, tfm: Transform) -> None:
  if isinstance(tfm, MultiModalTransform):
      self.image, self.annos = \
          tfm.apply_multi_modal(self.image, self.annos)
  else:
      super().transform(tfm)

## multimodal_aug_transform.py
class Transform:
  @abstractmethod
  def apply_image(self, img: np.ndarray):

  @abstractmethod
  def apply_coords(self, coords: np.ndarray):

  def apply_segmentation(self, segmentation: np.ndarray) -> np.ndarray:
    return self.apply_image(segmentation)


## multimodal_aug_aug_input.py
class AugInput:
  def transform(self, tfm: Transform) -> None:
      self.image = tfm.apply_image(self.image)
      if self.boxes is not None:
          self.boxes = tfm.apply_box(self.boxes)
      if self.sem_seg is not None:
          self.sem_seg = tfm.apply_segmentation(self.sem_seg)

## multimodal_aug_augmentation.py
class Augmentation:
  def __call__(self, aug_input) -> Transform:
    args = _get_aug_input_args(self, aug_input)
    tfm = self.get_transform(*args)
    aug_input.transform(tfm)
    return tfm

## multimodal_aug_dataset_mapper.py
# from: https://github.com/facebookresearch/detectron2/blob/998c4e15220a0db9f172b1e7ecf376a59b873f1d/detectron2/data/dataset_mapper.py#L163-L165
aug_input = T.AugInput(image, sem_seg=sem_seg_gt)
transforms = self.augmentations(aug_input)
image, sem_seg_gt = aug_input.image, aug_input.sem_seg

## detectron2_models.csv

          
            @META_ARCH
            @BACKBONE
            @PROPOSAL_GENERATOR
            @ROI_HEADS
            @ROI_MASK_HEAD

            
              GeneralizedRCNN
              build_resnet_backbone
              RPN
              RROIHeads
              MaskRCNNConvUpsampleHead

            
              PanopticFPN
              build_resnet_fpn_backbone
              RRPN
              Res5ROIHeads

            
              ProposalNetwork
              build_retinanet_resnet_fpn_backbone
              
              CascadeROIHeads

            
              RetinaNet
              
              
              StandardROIHeads

            
              SemanticSegmentor

## detectron2_visuzalization.py
def do_train_visualization(visualizer, model, inputs):
    training_mode = model.training
    model.eval()

    outputs = visualizer.inference(model, inputs)
    visualizer.process(inputs, outputs)

    model.train(training_mode)

with EventStorage(start_iter) as storage:
	\| Method \| AP \| AP50 \| AP75 \| APs \| APm \| APl \|
	\|:----------------------:\|:------:\|:------:\|:------:\|:-----:\|:------:\|:------:\|
	\| Baseline \| 75.497 \| 80.927 \| 80.927 \| 0.000 \| 52.240 \| 95.806 \|
	\| InstanceColorJittering \| 75.374 \| 80.901 \| 80.901 \| 0.000 \| 50.109 \| 96.650 \|
	\| CopyPasteAug \| 78.176 \| 83.978 \| 83.978 \| 0.000 \| 56.604 \| 96.981 \|
	aug = InstanceColorJitterAugmentation(lambda img: ImageEnhance.Color(img).enhance(10))
	augmentations = T.AugmentationList([aug])
	aug_input = MultiModalAugInput(image, annos=d['annotations'])
	transforms = augmentations(aug_input)
	augmented_image = aug_input.image
	class InstanceColorJitterTransform(MultiModalTransform):
	def __init__(self, color_operation: Callable, instance_rate: float, min_count_to_apply: int) -> None:
	if not callable(color_operation):
	raise ValueError("color_operation parameter should be callable")
	super().__init__()
	self._set_attributes(locals())

	def apply_multi_modal(self, img, annos, *args):
	instance_count = len(annos)
	apply_count = max(self.min_count_to_apply, int(instance_count * self.instance_rate))
	def transform(self, tfm: Transform) -> None:
	if isinstance(tfm, MultiModalTransform):
	self.image, self.annos = \
	tfm.apply_multi_modal(self.image, self.annos)
	else:
	super().transform(tfm)
	class Transform:
	@abstractmethod
	def apply_image(self, img: np.ndarray):

	@abstractmethod
	def apply_coords(self, coords: np.ndarray):

	def apply_segmentation(self, segmentation: np.ndarray) -> np.ndarray:
	return self.apply_image(segmentation)
	class AugInput:
	def transform(self, tfm: Transform) -> None:
	self.image = tfm.apply_image(self.image)
	if self.boxes is not None:
	self.boxes = tfm.apply_box(self.boxes)
	if self.sem_seg is not None:
	self.sem_seg = tfm.apply_segmentation(self.sem_seg)
	class Augmentation:
	def __call__(self, aug_input) -> Transform:
	args = _get_aug_input_args(self, aug_input)
	tfm = self.get_transform(*args)
	aug_input.transform(tfm)
	return tfm
	# from: https://github.com/facebookresearch/detectron2/blob/998c4e15220a0db9f172b1e7ecf376a59b873f1d/detectron2/data/dataset_mapper.py#L163-L165
	aug_input = T.AugInput(image, sem_seg=sem_seg_gt)
	transforms = self.augmentations(aug_input)
	image, sem_seg_gt = aug_input.image, aug_input.sem_seg
@META_ARCH	@BACKBONE	@PROPOSAL_GENERATOR	@ROI_HEADS	@ROI_MASK_HEAD
GeneralizedRCNN	build_resnet_backbone	RPN	RROIHeads	MaskRCNNConvUpsampleHead
PanopticFPN	build_resnet_fpn_backbone	RRPN	Res5ROIHeads
ProposalNetwork	build_retinanet_resnet_fpn_backbone		CascadeROIHeads
RetinaNet			StandardROIHeads
SemanticSegmentor
	def do_train_visualization(visualizer, model, inputs):
	training_mode = model.training
	model.eval()

	outputs = visualizer.inference(model, inputs)
	visualizer.process(inputs, outputs)

	model.train(training_mode)

	with EventStorage(start_iter) as storage: