Skip to content

Instantly share code, notes, and snippets.

@hubutui
Created March 23, 2019 08:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hubutui/9b294f8b287eabc260b9162297638009 to your computer and use it in GitHub Desktop.
Save hubutui/9b294f8b287eabc260b9162297638009 to your computer and use it in GitHub Desktop.
speed up labelme2voc via simple parallel
#!/usr/bin/env python
from __future__ import print_function
from multiprocessing.dummy import Pool as ThreadPool
import argparse
import glob
import json
import os
import os.path as osp
import numpy as np
import PIL.Image
import labelme
def main():
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('labels_file')
parser.add_argument('in_dir', help='input dir with annotated files')
parser.add_argument('out_dir', help='output dataset directory')
args = parser.parse_args()
if osp.exists(args.out_dir):
print('Output directory already exists:', args.out_dir)
quit(1)
os.makedirs(args.out_dir)
os.makedirs(osp.join(args.out_dir, 'JPEGImages'))
os.makedirs(osp.join(args.out_dir, 'SegmentationClass'))
os.makedirs(osp.join(args.out_dir, 'SegmentationClassPNG'))
os.makedirs(osp.join(args.out_dir, 'SegmentationClassVisualization'))
print('Creating dataset:', args.out_dir)
class_names = []
class_name_to_id = {}
for i, line in enumerate(open(args.labels_file).readlines()):
class_id = i - 1 # starts with -1
class_name = line.strip()
class_name_to_id[class_name] = class_id
if class_id == -1:
assert class_name == '__ignore__'
continue
elif class_id == 0:
assert class_name == '_background_'
class_names.append(class_name)
class_names = tuple(class_names)
print('class_names:', class_names)
out_class_names_file = osp.join(args.out_dir, 'class_names.txt')
with open(out_class_names_file, 'w') as f:
f.writelines('\n'.join(class_names))
print('Saved class_names:', out_class_names_file)
colormap = labelme.utils.label_colormap(255)
items = glob.glob(osp.join(args.in_dir, '*.json'))
def generate_voc_like_dataset(label_file):
print('Generating dataset from:', label_file)
with open(label_file) as f:
base = osp.splitext(osp.basename(label_file))[0]
out_img_file = osp.join(
args.out_dir, 'JPEGImages', base + '.jpg')
out_lbl_file = osp.join(
args.out_dir, 'SegmentationClass', base + '.npy')
out_png_file = osp.join(
args.out_dir, 'SegmentationClassPNG', base + '.png')
out_viz_file = osp.join(
args.out_dir, 'SegmentationClassVisualization', base + '.jpg')
data = json.load(f)
img_file = osp.join(osp.dirname(label_file), data['imagePath'])
img = np.asarray(PIL.Image.open(img_file))
PIL.Image.fromarray(img).save(out_img_file)
lbl = labelme.utils.shapes_to_label(
img_shape=img.shape,
shapes=data['shapes'],
label_name_to_value=class_name_to_id,
)
labelme.utils.lblsave(out_png_file, lbl)
items = glob.glob(osp.join(args.in_dir, '*.json'))
pool = ThreadPool(16)
pool.map(generate_voc_like_dataset, items)
pool.close()
pool.join()
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment