cooliscool/create_dataset.py

## create_dataset.py
PASCAL_CLASSES = [
    'none',
    'aeroplane',
    'bicycle',
    'bird',
    'boat',
    'bottle',
    'bus',
    'car',
    'cat',
    'chair',
    'cow',
    'diningtable',
    'dog',
    'horse',
    'motorbike',
    'person',
    'pottedplant',
    'sheep',
    'sofa',
    'train',
    'tvmonitor'
]

# Fill in the classes you want to retain

classesINeed = ['none', 'car']

# Define the relevant directories

xmlDirectory = '/home/ajmal/data/VOCdevkit/VOC2012/Annotations/'

modifiedXmlDir = '/home/ajmal/data/VOCdevkit/VOC2012/newAnnotations/'

JPEGdirectory = '/home/ajmal/data/VOCdevkit/VOC2012/JPEGImages/'

modifiedJPEGdir = '/home/ajmal/data/VOCdevkit/VOC2012/newJPEGImages/'

listFile = '/home/ajmal/data/VOCdevkit/VOC2012/trainval.txt'

labelMap = '/home/ajmal/data/VOCdevkit/VOC2012/labelmap_voc.prototxt'

listfile = open(listFile, 'w')
labelmap = open(labelMap, 'w')

import os
from shutil import copyfile
from os.path import isfile, join

# Get all the xml files into list
onlyfiles = [f for f in os.listdir(xmlDirectory) if isfile(join(xmlDirectory,f))]

# For saving the class - file dictionary
fileDict = {}

i = 0

# for limiting number of images
imgnum = 0


for claz in classesINeed:
	fileDict[claz] = []
	# generate labelmap file
	labelmap.write('item {\n  name: "' + claz + '"\n  label: ' + str(i) + '\n  display_name: "' + claz + '"\n}\n')
	i += 1

labelmap.close()
# Parse each XML file
import xml.etree.ElementTree as ET

for filename in onlyfiles:
	filelink = join(xmlDirectory,filename)
	tree = ET.parse (filelink)
	root = tree.getroot()
	objs = root.findall('object')
	objNum  = 0
	for obj in objs:
		objNum += 1
		currentObj = obj.find('name').text
		if currentObj not in classesINeed:
			root.remove(obj)
			objNum  -= 1
		else:
			fileDict[currentObj].append(filename)

	if objNum  == 0 :
		continue # drop the file, there are no objects of 'interest '
	else : # write to the file as xml to the new folder
		fwrite = open(modifiedXmlDir + filename , 'w')
		tree.write(fwrite)
		fwrite.close()

		# copy the corresponding JPEG to modifiedJPEGDIr
		copyfile(JPEGdirectory + filename[:-3] + 'jpg' , modifiedJPEGdir + filename[:-3] + 'jpg')
		imgnum += 1

		# make entry in the list file required for LMDB
		listfile.write('VOC2012/newJPEGImages/' + filename[:-3] + 'jpg' + ' VOC2012/newAnnotations/' + filename + '\n')

		# Take only 101 images to train
		if imgnum == 101 :
			break


	#print "found "+ str(objNum ) + " object(s) in " + filename[:-3]

listfile.close()
print len(fileDict['car'])

## create_dataset_VOCtest.py
PASCAL_CLASSES = [
    'none',
    'aeroplane',
    'bicycle',
    'bird',
    'boat',
    'bottle',
    'bus',
    'car',
    'cat',
    'chair',
    'cow',
    'diningtable',
    'dog',
    'horse',
    'motorbike',
    'person',
    'pottedplant',
    'sheep',
    'sofa',
    'train',
    'tvmonitor'
]

# Fill in the classes you want to retain

classesINeed = ['none', 'car']

# Define the relevant directories

xmlDirectory = '/home/ajmal/data/VOCdevkit/VOC2007/Annotations/'

modifiedXmlDir = '/home/ajmal/data/VOCdevkit/VOC2007/newAnnotations/'

JPEGdirectory = '/home/ajmal/data/VOCdevkit/VOC2007/JPEGImages/'

modifiedJPEGdir = '/home/ajmal/data/VOCdevkit/VOC2007/newJPEGImages/'

listFile = '/home/ajmal/data/VOCdevkit/VOC2007/test.txt'

labelMap = '/home/ajmal/data/VOCdevkit/VOC2007/labelmap_voc.prototxt'

listfile = open(listFile, 'w')
labelmap = open(labelMap, 'w')

import os
from shutil import copyfile
from os.path import isfile, join

# Get all the xml files into list
onlyfiles = [f for f in os.listdir(xmlDirectory) if isfile(join(xmlDirectory,f))]

# For saving the class - file dictionary
fileDict = {}

i = 0

# for limiting number of images
imgnum = 0

for claz in classesINeed:
	fileDict[claz] = []
	# generate labelmap file
	labelmap.write('item {\n  name: "' + claz + '"\n  label: ' + str(i) + '\n  display_name: "' + claz + '"\n}\n')
	i += 1

labelmap.close()


# Parse each XML file
import xml.etree.ElementTree as ET

for filename in onlyfiles:
	filelink = join(xmlDirectory,filename)
	tree = ET.parse (filelink)
	root = tree.getroot()
	objs = root.findall('object')
	objNum  = 0
	for obj in objs:
		objNum += 1
		currentObj = obj.find('name').text
		if currentObj not in classesINeed:
			root.remove(obj)
			objNum  -= 1
		else:
			fileDict[currentObj].append(filename)

	if objNum  == 0 :
		continue # drop the file, there are no objects of 'interest '
	else : # write to the file as xml to the new folder
		fwrite = open(modifiedXmlDir + filename , 'w')
		tree.write(fwrite)
		fwrite.close()

		# copy the corresponding JPEG to modifiedJPEGDIr
		copyfile(JPEGdirectory + filename[:-3] + 'jpg' , modifiedJPEGdir + filename[:-3] + 'jpg')
		imgnum += 1


		# make entry in the list file required for LMDB
		listfile.write('VOC2007/newJPEGImages/' + filename[:-3] + 'jpg' + ' VOC2007/newAnnotations/' + filename + '\n')

		# Take only 21 images to test
		if imgnum == 21 :
			break


	#print "found "+ str(objNum ) + " object(s) in " + filename[:-3]

listfile.close()
print len(fileDict['car'])

'''

run in terminal, in caffe root , for generating filesizelist


 ./build/tools/get_image_size /home/ajmal/data/VOCdevkit/ data/VOC0712/test.txt data/VOC0712/test_name_size.txt


'''
	PASCAL_CLASSES = [
	'none',
	'aeroplane',
	'bicycle',
	'bird',
	'boat',
	'bottle',
	'bus',
	'car',
	'cat',
	'chair',
	'cow',
	'diningtable',
	'dog',
	'horse',
	'motorbike',
	'person',
	'pottedplant',
	'sheep',
	'sofa',
	'train',
	'tvmonitor'
	]

	# Fill in the classes you want to retain

	classesINeed = ['none', 'car']

	# Define the relevant directories

	xmlDirectory = '/home/ajmal/data/VOCdevkit/VOC2012/Annotations/'

	modifiedXmlDir = '/home/ajmal/data/VOCdevkit/VOC2012/newAnnotations/'

	JPEGdirectory = '/home/ajmal/data/VOCdevkit/VOC2012/JPEGImages/'

	modifiedJPEGdir = '/home/ajmal/data/VOCdevkit/VOC2012/newJPEGImages/'

	listFile = '/home/ajmal/data/VOCdevkit/VOC2012/trainval.txt'

	labelMap = '/home/ajmal/data/VOCdevkit/VOC2012/labelmap_voc.prototxt'

	listfile = open(listFile, 'w')
	labelmap = open(labelMap, 'w')

	import os
	from shutil import copyfile
	from os.path import isfile, join

	# Get all the xml files into list
	onlyfiles = [f for f in os.listdir(xmlDirectory) if isfile(join(xmlDirectory,f))]

	# For saving the class - file dictionary
	fileDict = {}

	i = 0

	# for limiting number of images
	imgnum = 0


	for claz in classesINeed:
	fileDict[claz] = []
	# generate labelmap file
	labelmap.write('item {\n name: "' + claz + '"\n label: ' + str(i) + '\n display_name: "' + claz + '"\n}\n')
	i += 1

	labelmap.close()
	# Parse each XML file
	import xml.etree.ElementTree as ET

	for filename in onlyfiles:
	filelink = join(xmlDirectory,filename)
	tree = ET.parse (filelink)
	root = tree.getroot()
	objs = root.findall('object')
	objNum = 0
	for obj in objs:
	objNum += 1
	currentObj = obj.find('name').text
	if currentObj not in classesINeed:
	root.remove(obj)
	objNum -= 1
	else:
	fileDict[currentObj].append(filename)

	if objNum == 0 :
	continue # drop the file, there are no objects of 'interest '
	else : # write to the file as xml to the new folder
	fwrite = open(modifiedXmlDir + filename , 'w')
	tree.write(fwrite)
	fwrite.close()

	# copy the corresponding JPEG to modifiedJPEGDIr
	copyfile(JPEGdirectory + filename[:-3] + 'jpg' , modifiedJPEGdir + filename[:-3] + 'jpg')
	imgnum += 1

	# make entry in the list file required for LMDB
	listfile.write('VOC2012/newJPEGImages/' + filename[:-3] + 'jpg' + ' VOC2012/newAnnotations/' + filename + '\n')

	# Take only 101 images to train
	if imgnum == 101 :
	break


	#print "found "+ str(objNum ) + " object(s) in " + filename[:-3]

	listfile.close()
	print len(fileDict['car'])