dgtlmoon/scale-imgs-and-bboxes.py

## scale-imgs-and-bboxes.py
#!/usr/bin/python3
import argparse
from PIL import Image, ExifTags

# Given the training index file and a maximum size (used as width and height)
# Resize anything larger than these and rewrite their scaled bbox information
# ALSO - always rotates the image if there is EXIF data!

import glob
import sys
import math

max_size = False
strip_text_from_paths = False
# Find the EXIF tag
orientation = False
for orientation in ExifTags.TAGS.keys():
    if ExifTags.TAGS[orientation] == 'Orientation':
        break


def process_single_line_of_index(image_filename, bbox_filename):
    scale_factor = 0

    pil_image = Image.open(image_filename)
    largest_dimension = max(pil_image.width, pil_image.height)

    # ALWAYS try to fix the EXIF rotate data...
    try:
        exif = dict(pil_image._getexif().items())

        if exif[orientation] == 3:
            pil_image = pil_image.rotate(180, expand=True)
        elif exif[orientation] == 6:
            pil_image = pil_image.rotate(270, expand=True)
        elif exif[orientation] == 8:
            pil_image = pil_image.rotate(90, expand=True)
    except (AttributeError, KeyError, IndexError):
        # Probably doesnt have any relevant exif
#        print ("> EXIF skipping", image_filename)
        pass

    # If one of the dimensions of the image is too larger, we have to act
    # A nice way around this would be to offer some clipping ability if it doesn't affect the bbox
    if largest_dimension > max_size:

        scale_factor = (largest_dimension / max_size)
        new_width = int(pil_image.width / scale_factor)
        new_height = int(pil_image.height / scale_factor)

        # Update the BBOX
        # read bbox list into memory and write it out again
        bbox_fp = open(bbox_filename, 'r')
        bbox=[]
        for line in bbox_fp:
            (class_n, xmin, ymin, xmax, ymax) = line.strip().split(' ')
            # max used here incase we get a
            xmin = max(1, int(int(xmin) / scale_factor))
            ymin = max(1, int(int(ymin) / scale_factor))
            xmax = max(1, int(int(xmax) / scale_factor))
            ymax = max(1, int(int(ymax) / scale_factor))
            # print ("> original", line.strip())
            line="%s %s %s %s %s" % (class_n, xmin, ymin, xmax, ymax)
            bbox.append(line)

        bbox_fp = open(bbox_filename, 'w')
        bbox_fp.write('\n'.join(bbox))
        bbox_fp.close()

        # Update the IMAGE
        pil_image = pil_image.resize((new_width, new_height), Image.ANTIALIAS)

        print (image_filename, pil_image.width, 'x', pil_image.height, 'to', new_width, 'x',new_height, 'scale factor', scale_factor )

    if largest_dimension > max_size or orientation:

        pil_image.save(image_filename, quality=90)
        pil_image.close()


if __name__ == "__main__":

    parser = argparse.ArgumentParser(description='Resize images and their bbox to a scale factor according to if they hit a limit size or not')
    parser.add_argument('--mainlist', help='Train image list file, should contain list of  /path/image.jpg /path/bbox.txt', required=True)

    parser.add_argument('--maxsize',
                        help='Maximum size permitted by the network, usually 512 or 300 (resnet vs SSD etc)', required=True
                        )

    parser.add_argument('--strip',
                        help='Strip this string from any path', required=False
                        )
    args = parser.parse_args()
    max_size = int(args.maxsize)

    strip_text_from_paths=args.strip
    print ("Stripping %s from any paths I see" % (strip_text_from_paths))


    with open(args.mainlist, 'r') as fp:
        for line in fp:
            (image_filename, bbox_filename) = line.strip().split(' ')
            if strip_text_from_paths:
                process_single_line_of_index(image_filename.replace(strip_text_from_paths, ''), bbox_filename.replace(strip_text_from_paths, ''))

            else:
                process_single_line_of_index(image_filename, bbox_filename)

#        width = int(xmax)-int(xmin)
#       height = int(ymax)-int(ymin)
	#!/usr/bin/python3
	import argparse
	from PIL import Image, ExifTags

	# Given the training index file and a maximum size (used as width and height)
	# Resize anything larger than these and rewrite their scaled bbox information
	# ALSO - always rotates the image if there is EXIF data!

	import glob
	import sys
	import math

	max_size = False
	strip_text_from_paths = False
	# Find the EXIF tag
	orientation = False
	for orientation in ExifTags.TAGS.keys():
	if ExifTags.TAGS[orientation] == 'Orientation':
	break


	def process_single_line_of_index(image_filename, bbox_filename):
	scale_factor = 0

	pil_image = Image.open(image_filename)
	largest_dimension = max(pil_image.width, pil_image.height)

	# ALWAYS try to fix the EXIF rotate data...
	try:
	exif = dict(pil_image._getexif().items())

	if exif[orientation] == 3:
	pil_image = pil_image.rotate(180, expand=True)
	elif exif[orientation] == 6:
	pil_image = pil_image.rotate(270, expand=True)
	elif exif[orientation] == 8:
	pil_image = pil_image.rotate(90, expand=True)
	except (AttributeError, KeyError, IndexError):
	# Probably doesnt have any relevant exif
	# print ("> EXIF skipping", image_filename)
	pass

	# If one of the dimensions of the image is too larger, we have to act
	# A nice way around this would be to offer some clipping ability if it doesn't affect the bbox
	if largest_dimension > max_size:

	scale_factor = (largest_dimension / max_size)
	new_width = int(pil_image.width / scale_factor)
	new_height = int(pil_image.height / scale_factor)

	# Update the BBOX
	# read bbox list into memory and write it out again
	bbox_fp = open(bbox_filename, 'r')
	bbox=[]
	for line in bbox_fp:
	(class_n, xmin, ymin, xmax, ymax) = line.strip().split(' ')
	# max used here incase we get a
	xmin = max(1, int(int(xmin) / scale_factor))
	ymin = max(1, int(int(ymin) / scale_factor))
	xmax = max(1, int(int(xmax) / scale_factor))
	ymax = max(1, int(int(ymax) / scale_factor))
	# print ("> original", line.strip())
	line="%s %s %s %s %s" % (class_n, xmin, ymin, xmax, ymax)
	bbox.append(line)

	bbox_fp = open(bbox_filename, 'w')
	bbox_fp.write('\n'.join(bbox))
	bbox_fp.close()

	# Update the IMAGE
	pil_image = pil_image.resize((new_width, new_height), Image.ANTIALIAS)

	print (image_filename, pil_image.width, 'x', pil_image.height, 'to', new_width, 'x',new_height, 'scale factor', scale_factor )

	if largest_dimension > max_size or orientation:

	pil_image.save(image_filename, quality=90)
	pil_image.close()







	if __name__ == "__main__":

	parser = argparse.ArgumentParser(description='Resize images and their bbox to a scale factor according to if they hit a limit size or not')
	parser.add_argument('--mainlist', help='Train image list file, should contain list of /path/image.jpg /path/bbox.txt', required=True)

	parser.add_argument('--maxsize',
	help='Maximum size permitted by the network, usually 512 or 300 (resnet vs SSD etc)', required=True
	)

	parser.add_argument('--strip',
	help='Strip this string from any path', required=False
	)
	args = parser.parse_args()
	max_size = int(args.maxsize)

	strip_text_from_paths=args.strip
	print ("Stripping %s from any paths I see" % (strip_text_from_paths))


	with open(args.mainlist, 'r') as fp:
	for line in fp:
	(image_filename, bbox_filename) = line.strip().split(' ')
	if strip_text_from_paths:
	process_single_line_of_index(image_filename.replace(strip_text_from_paths, ''), bbox_filename.replace(strip_text_from_paths, ''))

	else:
	process_single_line_of_index(image_filename, bbox_filename)

	# width = int(xmax)-int(xmin)
	# height = int(ymax)-int(ymin)