Skip to content

Instantly share code, notes, and snippets.

@precious
Last active May 16, 2019 15:47
Show Gist options
  • Save precious/cf978a70191d3f6fcfae18862ff64a08 to your computer and use it in GitHub Desktop.
Save precious/cf978a70191d3f6fcfae18862ff64a08 to your computer and use it in GitHub Desktop.
import numpy as np
from shapely.geometry import Polygon, Point
from PIL import Image
import os
import sys
import csv
import pickle
import json
import random
import string
try:
source_data_file, image_base_dir, output_dir = sys.argv[1:]
except ValueError:
usage = '''===
usage: python {0} <source_data_file> <image_base_dir> <output_dir>
---
example 1: python3 {0} "/Users/vsevolodkulaga/Downloads/36a76ac517d1ae845c1b63e74a4cbf1aa2222a4e_P13 (1).txt" \
/Users/vsevolodkulaga/projects/ct /Users/vsevolodkulaga/projects/ct/out
(so if your relative image path is "images/13/00021.png" and its absolute path is \
"/Users/vsevolodkulaga/projects/ct/images/13/00021.png", you should specify image_base_dir \
parameter "/Users/vsevolodkulaga/projects/ct")
==='''.format(sys.argv[0])
print(usage, file=sys.stderr)
sys.exit(1)
def make_image_array(image_path):
image = Image.open(image_path)
image_array_2d = np.asarray(image)
if len(image_array_2d.shape) > 2: # need to convert to grayscale
image_array_2d = np.asarray(image.convert('L'))
return image_array_2d
def process_polygon(polygon_vertices, shape, target_array_2d=None):
polygon = Polygon(polygon_vertices)
minx, miny, maxx, maxy = polygon.bounds
if target_array_2d is None:
target_array_2d = np.zeros(shape).astype(np.int8)
for i in range(target_array_2d.shape[0]):
for j in range(target_array_2d.shape[1]):
if minx < i < maxx and miny < j < maxy:
target_array_2d[i][j] = int(polygon.contains(Point(i, j)))
return target_array_2d
def main():
header = ['number', 'name', 'image_relative_path', 'json_data', 'timestamp']
if not os.path.isdir(image_base_dir):
print('ERROR!', image_base_dir, 'is not a directory!', file=sys.stderr)
sys.exit(1)
with open(source_data_file) as input_file:
images_dicts = {}
reader = csv.DictReader(input_file, header, delimiter=';')
for row in reader:
data = json.loads(row['json_data'])
image_dict_key = row['image_relative_path'].replace('/', '_')
current_image_dict = images_dicts.setdefault(image_dict_key, {})
if data['type'] == 'global':
current_image_dict['label'] = data['label']
elif data['type'] == 'polygon':
print('processing polygon with label "{0}" for {1}'.format(data['label'], row['image_relative_path']))
image_full_path = os.path.join(image_base_dir, row['image_relative_path'])
if not os.path.isfile(image_full_path):
print('ERROR!', 'No such file: "{0}". Is this a correct images dir: "{1}"?'.format(image_full_path,
image_base_dir),
file=sys.stderr)
sys.exit(1)
if 'volume' not in current_image_dict:
current_image_dict['volume'] = make_image_array(image_full_path)
current_image_dict[data['label']] = process_polygon(data['data'],
current_image_dict['volume'].shape,
current_image_dict.get(data['label']))
else:
print('skipping unknown record type:', data['type'], file=sys.stderr)
# write everything to files
random_suffix = ''.join(random.choices(string.ascii_letters + string.digits, k=7))
for image_dict_key, current_image_dict in images_dicts.items():
output_path = os.path.join(output_dir, '{0}__{1}.pkl'.format(image_dict_key, random_suffix))
with open(output_path, 'wb') as output_file:
pickle.dump(current_image_dict, output_file)
print('=> generated file', output_path)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment