mexicantexan/create_coco_annotations.py

## create_coco_annotations.py
# Adapted from https://www.immersivelimit.com/tutorials/create-coco-annotations-from-scratch/#create-custom-coco-dataset
# Aimed at solving problem for: https://mmdetection.readthedocs.io/en/v2.25.0/tutorials/customize_dataset.html
# haven't tested script with multi-category, only tested with binary classification

import json
import numpy as np
from skimage import measure  # (pip install scikit-image)
from shapely.geometry import Polygon, MultiPolygon  # (pip install Shapely)
from PIL import Image  # (pip install Pillow)
import os
from pathlib import Path
from tqdm import tqdm
from typing import Dict, Union, List


def create_sub_masks(_mask_image: Image.Image) -> dict:
    """
    Create a cub mask list

    :param _mask_image:
    :return:
    """
    _width, _height = _mask_image.size

    # Initialize a dictionary of sub-masks indexed by RGB colors
    _sub_masks = {}
    for x in range(_width):
        for y in range(_height):
            # Get the RGB values of the pixel
            pixel = _mask_image.getpixel((x, y))[:3]

            # If the pixel is not black...
            if pixel != (0, 0, 0):
                # Check to see if we've created a sub-mask...
                pixel_str = str(pixel)
                sub_mask = _sub_masks.get(pixel_str)
                if sub_mask is None:
                    # Create a sub-mask (one bit per pixel) and add to the dictionary
                    # Note: we add 1 pixel of padding in each direction
                    # because the contours module doesn't handle cases
                    # where pixels bleed to the edge of the image
                    _sub_masks[pixel_str] = Image.new('1', (_width + 2, _height + 2))

                # Set the pixel value to 1 (default is 0), accounting for padding
                _sub_masks[pixel_str].putpixel((x + 1, y + 1), 1)

    return _sub_masks


def create_sub_mask_annotation(_sub_mask: Image.Image, _image_id: int, _category_id: int, _annotation_id: int, _is_crowd: int) -> dict:
    """
    Create the annotation for a given sub mask

    :param _sub_mask:
    :param _image_id:
    :param _category_id:
    :param _annotation_id:
    :param _is_crowd:
    :return:
    """
    # Find contours (boundary lines) around each sub-mask
    # Note: there could be multiple contours if the object
    # is partially occluded. (E.g. an elephant behind a tree)
    contours = measure.find_contours(_sub_mask, 0.5, positive_orientation='low')

    segmentations = []
    polygons = []
    for contour in contours:
        # Flip from (row, col) representation to (x, y)
        # and subtract the padding pixel
        for i in range(len(contour)):
            row, col = contour[i]
            contour[i] = (col - 1, row - 1)

        # Make a polygon and simplify it
        poly = Polygon(contour)
        poly = poly.simplify(1.0, preserve_topology=False)
        polygons.append(poly)
        segmentation = np.array(poly.exterior.coords).ravel().tolist()
        segmentations.append(segmentation)

    # Combine the polygons to calculate the bounding box and area
    multi_poly = MultiPolygon(polygons)
    x, y, max_x, max_y = multi_poly.bounds
    _width = max_x - x
    _height = max_y - y
    bbox = (x, y, _width, _height)
    area = multi_poly.area

    return {
        'segmentation': segmentations,
        'iscrowd': _is_crowd,
        'image_id': _image_id,
        'category_id': _category_id,
        'id': _annotation_id,
        'bbox': bbox,
        'area': area
    }


def get_annot_img_list(target_dir: Union[str, Path], specified_ext: str = "png") -> List:
    """
    to get all file types set specified_ext equal to *
    """
    if isinstance(target_dir, str):
        working_dir = Path(target_dir)
    else:
        working_dir = target_dir

    if specified_ext.startswith("."):
        specified_ext = specified_ext[1:]
    return [path.absolute() for path in working_dir.glob(f"**/*.{specified_ext}")]


if __name__ == "__main__":
    INPUT_DIR = ""  # where masks are stored
    OUTPUT_DIR = ""  # where to save output json file
    NUM_CATEGORIES = 1  # number of categories in your dataset

    mask_images = get_annot_img_list(INPUT_DIR)

    # Define which colors match which categories in the images
    if NUM_CATEGORIES > 1:
        # adjust this to your liking, this is just dummy data to give you a template
        sample_ids = [1, 2, 3, 4]
        sample_ids_names = ['houseplant', 'book', 'bottle', 'lamp']
        houseplant_id, book_id, bottle_id, lamp_id = sample_ids
        category_ids = {
            1: {
                '(0, 255, 0)': houseplant_id,
                '(0, 0, 255)': book_id,
            },
            2: {
                '(255, 255, 0)': bottle_id,
                '(255, 0, 128)': book_id,
                '(255, 100, 0)': lamp_id,
            }
        }
        category_list = [{'id': idx + 1, 'name': sample_ids_names[idx]} for idx, x in enumerate(sample_ids)]
    else:
        the_thing_i_want_to_mask = 1
        category_ids = {
            1: {'(255, 255, 255)': the_thing_i_want_to_mask}
        }
        category_list = [{'id': 1, 'name': 'the_thing_i_want_to_mask'}]

    is_crowd = 0

    # These ids will be automatically increased as we go
    annotation_id = 1
    image_id = 1

    # Create the annotations
    annotations = []
    image_descs = []
    for mask_image_path in tqdm(mask_images, total=len(mask_images), desc="Creating Mask Annotations"):
        mask_image = Image.open(mask_image_path)
        width, height = mask_image.size
        image_descs.append({
            'file_name': str(mask_image_path.name),
            'height': height,
            'width': width,
            'id': image_id
        })
        if mask_image.mode == "L":
            mask_image = mask_image.convert("RGB")
        sub_masks = create_sub_masks(mask_image)
        for color, sub_mask in sub_masks.items():
            if NUM_CATEGORIES > 1:
                category_id = category_ids[image_id][color]
            else:
                category_id = category_ids[1][color]
            annotation = create_sub_mask_annotation(sub_mask, image_id, category_id, annotation_id, is_crowd)
            annotations.append(annotation)
            annotation_id += 1
        image_id += 1

        # save the json file
    output_mmdetect_json_compat = {
        'images': image_descs,
        'annotations': annotations,
        'categories': category_list
    }

    with open(os.path.join(OUTPUT_DIR, 'output.json'), 'w') as fout:
        json_string = json.dumps(output_mmdetect_json_compat, default=lambda o: o.__dict__, sort_keys=True, indent=4)
        fout.write(json_string)
	# Adapted from https://www.immersivelimit.com/tutorials/create-coco-annotations-from-scratch/#create-custom-coco-dataset
	# Aimed at solving problem for: https://mmdetection.readthedocs.io/en/v2.25.0/tutorials/customize_dataset.html
	# haven't tested script with multi-category, only tested with binary classification

	import json
	import numpy as np
	from skimage import measure # (pip install scikit-image)
	from shapely.geometry import Polygon, MultiPolygon # (pip install Shapely)
	from PIL import Image # (pip install Pillow)
	import os
	from pathlib import Path
	from tqdm import tqdm
	from typing import Dict, Union, List


	def create_sub_masks(_mask_image: Image.Image) -> dict:
	"""
	Create a cub mask list

	:param _mask_image:
	:return:
	"""
	_width, _height = _mask_image.size

	# Initialize a dictionary of sub-masks indexed by RGB colors
	_sub_masks = {}
	for x in range(_width):
	for y in range(_height):
	# Get the RGB values of the pixel
	pixel = _mask_image.getpixel((x, y))[:3]

	# If the pixel is not black...
	if pixel != (0, 0, 0):
	# Check to see if we've created a sub-mask...
	pixel_str = str(pixel)
	sub_mask = _sub_masks.get(pixel_str)
	if sub_mask is None:
	# Create a sub-mask (one bit per pixel) and add to the dictionary
	# Note: we add 1 pixel of padding in each direction
	# because the contours module doesn't handle cases
	# where pixels bleed to the edge of the image
	_sub_masks[pixel_str] = Image.new('1', (_width + 2, _height + 2))

	# Set the pixel value to 1 (default is 0), accounting for padding
	_sub_masks[pixel_str].putpixel((x + 1, y + 1), 1)

	return _sub_masks


	def create_sub_mask_annotation(_sub_mask: Image.Image, _image_id: int, _category_id: int, _annotation_id: int, _is_crowd: int) -> dict:
	"""
	Create the annotation for a given sub mask

	:param _sub_mask:
	:param _image_id:
	:param _category_id:
	:param _annotation_id:
	:param _is_crowd:
	:return:
	"""
	# Find contours (boundary lines) around each sub-mask
	# Note: there could be multiple contours if the object
	# is partially occluded. (E.g. an elephant behind a tree)
	contours = measure.find_contours(_sub_mask, 0.5, positive_orientation='low')

	segmentations = []
	polygons = []
	for contour in contours:
	# Flip from (row, col) representation to (x, y)
	# and subtract the padding pixel
	for i in range(len(contour)):
	row, col = contour[i]
	contour[i] = (col - 1, row - 1)

	# Make a polygon and simplify it
	poly = Polygon(contour)
	poly = poly.simplify(1.0, preserve_topology=False)
	polygons.append(poly)
	segmentation = np.array(poly.exterior.coords).ravel().tolist()
	segmentations.append(segmentation)

	# Combine the polygons to calculate the bounding box and area
	multi_poly = MultiPolygon(polygons)
	x, y, max_x, max_y = multi_poly.bounds
	_width = max_x - x
	_height = max_y - y
	bbox = (x, y, _width, _height)
	area = multi_poly.area

	return {
	'segmentation': segmentations,
	'iscrowd': _is_crowd,
	'image_id': _image_id,
	'category_id': _category_id,
	'id': _annotation_id,
	'bbox': bbox,
	'area': area
	}


	def get_annot_img_list(target_dir: Union[str, Path], specified_ext: str = "png") -> List:
	"""
	to get all file types set specified_ext equal to *
	"""
	if isinstance(target_dir, str):
	working_dir = Path(target_dir)
	else:
	working_dir = target_dir

	if specified_ext.startswith("."):
	specified_ext = specified_ext[1:]
	return [path.absolute() for path in working_dir.glob(f"*/.{specified_ext}")]


	if __name__ == "__main__":
	INPUT_DIR = "" # where masks are stored
	OUTPUT_DIR = "" # where to save output json file
	NUM_CATEGORIES = 1 # number of categories in your dataset

	mask_images = get_annot_img_list(INPUT_DIR)

	# Define which colors match which categories in the images
	if NUM_CATEGORIES > 1:
	# adjust this to your liking, this is just dummy data to give you a template
	sample_ids = [1, 2, 3, 4]
	sample_ids_names = ['houseplant', 'book', 'bottle', 'lamp']
	houseplant_id, book_id, bottle_id, lamp_id = sample_ids
	category_ids = {
	1: {
	'(0, 255, 0)': houseplant_id,
	'(0, 0, 255)': book_id,
	},
	2: {
	'(255, 255, 0)': bottle_id,
	'(255, 0, 128)': book_id,
	'(255, 100, 0)': lamp_id,
	}
	}
	category_list = [{'id': idx + 1, 'name': sample_ids_names[idx]} for idx, x in enumerate(sample_ids)]
	else:
	the_thing_i_want_to_mask = 1
	category_ids = {
	1: {'(255, 255, 255)': the_thing_i_want_to_mask}
	}
	category_list = [{'id': 1, 'name': 'the_thing_i_want_to_mask'}]

	is_crowd = 0

	# These ids will be automatically increased as we go
	annotation_id = 1
	image_id = 1

	# Create the annotations
	annotations = []
	image_descs = []
	for mask_image_path in tqdm(mask_images, total=len(mask_images), desc="Creating Mask Annotations"):
	mask_image = Image.open(mask_image_path)
	width, height = mask_image.size
	image_descs.append({
	'file_name': str(mask_image_path.name),
	'height': height,
	'width': width,
	'id': image_id
	})
	if mask_image.mode == "L":
	mask_image = mask_image.convert("RGB")
	sub_masks = create_sub_masks(mask_image)
	for color, sub_mask in sub_masks.items():
	if NUM_CATEGORIES > 1:
	category_id = category_ids[image_id][color]
	else:
	category_id = category_ids[1][color]
	annotation = create_sub_mask_annotation(sub_mask, image_id, category_id, annotation_id, is_crowd)
	annotations.append(annotation)
	annotation_id += 1
	image_id += 1

	# save the json file
	output_mmdetect_json_compat = {
	'images': image_descs,
	'annotations': annotations,
	'categories': category_list
	}

	with open(os.path.join(OUTPUT_DIR, 'output.json'), 'w') as fout:
	json_string = json.dumps(output_mmdetect_json_compat, default=lambda o: o.__dict__, sort_keys=True, indent=4)
	fout.write(json_string)