Skip to content

Instantly share code, notes, and snippets.

@dgtlmoon
Last active August 20, 2020 19:07
Show Gist options
  • Save dgtlmoon/571c4af70ebff0a5374fd9f303ba91b1 to your computer and use it in GitHub Desktop.
Save dgtlmoon/571c4af70ebff0a5374fd9f303ba91b1 to your computer and use it in GitHub Desktop.
#!/usr/bin/python3
import argparse
from PIL import Image, ExifTags
# Given the training index file and a maximum size (used as width and height)
# Resize anything larger than these and rewrite their scaled bbox information
# ALSO - always rotates the image if there is EXIF data!
import glob
import sys
import math
max_size = False
strip_text_from_paths = False
# Find the EXIF tag
orientation = False
for orientation in ExifTags.TAGS.keys():
if ExifTags.TAGS[orientation] == 'Orientation':
break
def process_single_line_of_index(image_filename, bbox_filename):
scale_factor = 0
pil_image = Image.open(image_filename)
largest_dimension = max(pil_image.width, pil_image.height)
# ALWAYS try to fix the EXIF rotate data...
try:
exif = dict(pil_image._getexif().items())
if exif[orientation] == 3:
pil_image = pil_image.rotate(180, expand=True)
elif exif[orientation] == 6:
pil_image = pil_image.rotate(270, expand=True)
elif exif[orientation] == 8:
pil_image = pil_image.rotate(90, expand=True)
except (AttributeError, KeyError, IndexError):
# Probably doesnt have any relevant exif
# print ("> EXIF skipping", image_filename)
pass
# If one of the dimensions of the image is too larger, we have to act
# A nice way around this would be to offer some clipping ability if it doesn't affect the bbox
if largest_dimension > max_size:
scale_factor = (largest_dimension / max_size)
new_width = int(pil_image.width / scale_factor)
new_height = int(pil_image.height / scale_factor)
# Update the BBOX
# read bbox list into memory and write it out again
bbox_fp = open(bbox_filename, 'r')
bbox=[]
for line in bbox_fp:
(class_n, xmin, ymin, xmax, ymax) = line.strip().split(' ')
# max used here incase we get a
xmin = max(1, int(int(xmin) / scale_factor))
ymin = max(1, int(int(ymin) / scale_factor))
xmax = max(1, int(int(xmax) / scale_factor))
ymax = max(1, int(int(ymax) / scale_factor))
# print ("> original", line.strip())
line="%s %s %s %s %s" % (class_n, xmin, ymin, xmax, ymax)
bbox.append(line)
bbox_fp = open(bbox_filename, 'w')
bbox_fp.write('\n'.join(bbox))
bbox_fp.close()
# Update the IMAGE
pil_image = pil_image.resize((new_width, new_height), Image.ANTIALIAS)
print (image_filename, pil_image.width, 'x', pil_image.height, 'to', new_width, 'x',new_height, 'scale factor', scale_factor )
if largest_dimension > max_size or orientation:
pil_image.save(image_filename, quality=90)
pil_image.close()
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Resize images and their bbox to a scale factor according to if they hit a limit size or not')
parser.add_argument('--mainlist', help='Train image list file, should contain list of /path/image.jpg /path/bbox.txt', required=True)
parser.add_argument('--maxsize',
help='Maximum size permitted by the network, usually 512 or 300 (resnet vs SSD etc)', required=True
)
parser.add_argument('--strip',
help='Strip this string from any path', required=False
)
args = parser.parse_args()
max_size = int(args.maxsize)
strip_text_from_paths=args.strip
print ("Stripping %s from any paths I see" % (strip_text_from_paths))
with open(args.mainlist, 'r') as fp:
for line in fp:
(image_filename, bbox_filename) = line.strip().split(' ')
if strip_text_from_paths:
process_single_line_of_index(image_filename.replace(strip_text_from_paths, ''), bbox_filename.replace(strip_text_from_paths, ''))
else:
process_single_line_of_index(image_filename, bbox_filename)
# width = int(xmax)-int(xmin)
# height = int(ymax)-int(ymin)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment