Created
March 23, 2019 08:23
-
-
Save hubutui/9b294f8b287eabc260b9162297638009 to your computer and use it in GitHub Desktop.
speed up labelme2voc via simple parallel
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
from __future__ import print_function | |
from multiprocessing.dummy import Pool as ThreadPool | |
import argparse | |
import glob | |
import json | |
import os | |
import os.path as osp | |
import numpy as np | |
import PIL.Image | |
import labelme | |
def main(): | |
parser = argparse.ArgumentParser( | |
formatter_class=argparse.ArgumentDefaultsHelpFormatter) | |
parser.add_argument('labels_file') | |
parser.add_argument('in_dir', help='input dir with annotated files') | |
parser.add_argument('out_dir', help='output dataset directory') | |
args = parser.parse_args() | |
if osp.exists(args.out_dir): | |
print('Output directory already exists:', args.out_dir) | |
quit(1) | |
os.makedirs(args.out_dir) | |
os.makedirs(osp.join(args.out_dir, 'JPEGImages')) | |
os.makedirs(osp.join(args.out_dir, 'SegmentationClass')) | |
os.makedirs(osp.join(args.out_dir, 'SegmentationClassPNG')) | |
os.makedirs(osp.join(args.out_dir, 'SegmentationClassVisualization')) | |
print('Creating dataset:', args.out_dir) | |
class_names = [] | |
class_name_to_id = {} | |
for i, line in enumerate(open(args.labels_file).readlines()): | |
class_id = i - 1 # starts with -1 | |
class_name = line.strip() | |
class_name_to_id[class_name] = class_id | |
if class_id == -1: | |
assert class_name == '__ignore__' | |
continue | |
elif class_id == 0: | |
assert class_name == '_background_' | |
class_names.append(class_name) | |
class_names = tuple(class_names) | |
print('class_names:', class_names) | |
out_class_names_file = osp.join(args.out_dir, 'class_names.txt') | |
with open(out_class_names_file, 'w') as f: | |
f.writelines('\n'.join(class_names)) | |
print('Saved class_names:', out_class_names_file) | |
colormap = labelme.utils.label_colormap(255) | |
items = glob.glob(osp.join(args.in_dir, '*.json')) | |
def generate_voc_like_dataset(label_file): | |
print('Generating dataset from:', label_file) | |
with open(label_file) as f: | |
base = osp.splitext(osp.basename(label_file))[0] | |
out_img_file = osp.join( | |
args.out_dir, 'JPEGImages', base + '.jpg') | |
out_lbl_file = osp.join( | |
args.out_dir, 'SegmentationClass', base + '.npy') | |
out_png_file = osp.join( | |
args.out_dir, 'SegmentationClassPNG', base + '.png') | |
out_viz_file = osp.join( | |
args.out_dir, 'SegmentationClassVisualization', base + '.jpg') | |
data = json.load(f) | |
img_file = osp.join(osp.dirname(label_file), data['imagePath']) | |
img = np.asarray(PIL.Image.open(img_file)) | |
PIL.Image.fromarray(img).save(out_img_file) | |
lbl = labelme.utils.shapes_to_label( | |
img_shape=img.shape, | |
shapes=data['shapes'], | |
label_name_to_value=class_name_to_id, | |
) | |
labelme.utils.lblsave(out_png_file, lbl) | |
items = glob.glob(osp.join(args.in_dir, '*.json')) | |
pool = ThreadPool(16) | |
pool.map(generate_voc_like_dataset, items) | |
pool.close() | |
pool.join() | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment