Skip to content

Instantly share code, notes, and snippets.

@cooliscool
Last active January 21, 2023 10:18
Show Gist options
  • Save cooliscool/6739d9ad27014c25f7816c40fc7cce60 to your computer and use it in GitHub Desktop.
Save cooliscool/6739d9ad27014c25f7816c40fc7cce60 to your computer and use it in GitHub Desktop.
For selecting only a few number of classes from PASCAL VOC for training in Tensorflow. ( Please refer the code thoroughly :) )
PASCAL_CLASSES = [
'none',
'aeroplane',
'bicycle',
'bird',
'boat',
'bottle',
'bus',
'car',
'cat',
'chair',
'cow',
'diningtable',
'dog',
'horse',
'motorbike',
'person',
'pottedplant',
'sheep',
'sofa',
'train',
'tvmonitor'
]
# Fill in the classes you want to retain
classesINeed = ['none', 'car']
# Define the relevant directories
xmlDirectory = '/home/ajmal/data/VOCdevkit/VOC2012/Annotations/'
modifiedXmlDir = '/home/ajmal/data/VOCdevkit/VOC2012/newAnnotations/'
JPEGdirectory = '/home/ajmal/data/VOCdevkit/VOC2012/JPEGImages/'
modifiedJPEGdir = '/home/ajmal/data/VOCdevkit/VOC2012/newJPEGImages/'
listFile = '/home/ajmal/data/VOCdevkit/VOC2012/trainval.txt'
labelMap = '/home/ajmal/data/VOCdevkit/VOC2012/labelmap_voc.prototxt'
listfile = open(listFile, 'w')
labelmap = open(labelMap, 'w')
import os
from shutil import copyfile
from os.path import isfile, join
# Get all the xml files into list
onlyfiles = [f for f in os.listdir(xmlDirectory) if isfile(join(xmlDirectory,f))]
# For saving the class - file dictionary
fileDict = {}
i = 0
# for limiting number of images
imgnum = 0
for claz in classesINeed:
fileDict[claz] = []
# generate labelmap file
labelmap.write('item {\n name: "' + claz + '"\n label: ' + str(i) + '\n display_name: "' + claz + '"\n}\n')
i += 1
labelmap.close()
# Parse each XML file
import xml.etree.ElementTree as ET
for filename in onlyfiles:
filelink = join(xmlDirectory,filename)
tree = ET.parse (filelink)
root = tree.getroot()
objs = root.findall('object')
objNum = 0
for obj in objs:
objNum += 1
currentObj = obj.find('name').text
if currentObj not in classesINeed:
root.remove(obj)
objNum -= 1
else:
fileDict[currentObj].append(filename)
if objNum == 0 :
continue # drop the file, there are no objects of 'interest '
else : # write to the file as xml to the new folder
fwrite = open(modifiedXmlDir + filename , 'w')
tree.write(fwrite)
fwrite.close()
# copy the corresponding JPEG to modifiedJPEGDIr
copyfile(JPEGdirectory + filename[:-3] + 'jpg' , modifiedJPEGdir + filename[:-3] + 'jpg')
imgnum += 1
# make entry in the list file required for LMDB
listfile.write('VOC2012/newJPEGImages/' + filename[:-3] + 'jpg' + ' VOC2012/newAnnotations/' + filename + '\n')
# Take only 101 images to train
if imgnum == 101 :
break
#print "found "+ str(objNum ) + " object(s) in " + filename[:-3]
listfile.close()
print len(fileDict['car'])
PASCAL_CLASSES = [
'none',
'aeroplane',
'bicycle',
'bird',
'boat',
'bottle',
'bus',
'car',
'cat',
'chair',
'cow',
'diningtable',
'dog',
'horse',
'motorbike',
'person',
'pottedplant',
'sheep',
'sofa',
'train',
'tvmonitor'
]
# Fill in the classes you want to retain
classesINeed = ['none', 'car']
# Define the relevant directories
xmlDirectory = '/home/ajmal/data/VOCdevkit/VOC2007/Annotations/'
modifiedXmlDir = '/home/ajmal/data/VOCdevkit/VOC2007/newAnnotations/'
JPEGdirectory = '/home/ajmal/data/VOCdevkit/VOC2007/JPEGImages/'
modifiedJPEGdir = '/home/ajmal/data/VOCdevkit/VOC2007/newJPEGImages/'
listFile = '/home/ajmal/data/VOCdevkit/VOC2007/test.txt'
labelMap = '/home/ajmal/data/VOCdevkit/VOC2007/labelmap_voc.prototxt'
listfile = open(listFile, 'w')
labelmap = open(labelMap, 'w')
import os
from shutil import copyfile
from os.path import isfile, join
# Get all the xml files into list
onlyfiles = [f for f in os.listdir(xmlDirectory) if isfile(join(xmlDirectory,f))]
# For saving the class - file dictionary
fileDict = {}
i = 0
# for limiting number of images
imgnum = 0
for claz in classesINeed:
fileDict[claz] = []
# generate labelmap file
labelmap.write('item {\n name: "' + claz + '"\n label: ' + str(i) + '\n display_name: "' + claz + '"\n}\n')
i += 1
labelmap.close()
# Parse each XML file
import xml.etree.ElementTree as ET
for filename in onlyfiles:
filelink = join(xmlDirectory,filename)
tree = ET.parse (filelink)
root = tree.getroot()
objs = root.findall('object')
objNum = 0
for obj in objs:
objNum += 1
currentObj = obj.find('name').text
if currentObj not in classesINeed:
root.remove(obj)
objNum -= 1
else:
fileDict[currentObj].append(filename)
if objNum == 0 :
continue # drop the file, there are no objects of 'interest '
else : # write to the file as xml to the new folder
fwrite = open(modifiedXmlDir + filename , 'w')
tree.write(fwrite)
fwrite.close()
# copy the corresponding JPEG to modifiedJPEGDIr
copyfile(JPEGdirectory + filename[:-3] + 'jpg' , modifiedJPEGdir + filename[:-3] + 'jpg')
imgnum += 1
# make entry in the list file required for LMDB
listfile.write('VOC2007/newJPEGImages/' + filename[:-3] + 'jpg' + ' VOC2007/newAnnotations/' + filename + '\n')
# Take only 21 images to test
if imgnum == 21 :
break
#print "found "+ str(objNum ) + " object(s) in " + filename[:-3]
listfile.close()
print len(fileDict['car'])
'''
run in terminal, in caffe root , for generating filesizelist
./build/tools/get_image_size /home/ajmal/data/VOCdevkit/ data/VOC0712/test.txt data/VOC0712/test_name_size.txt
'''
@BadarSaghir
Copy link

Change imag NUM value

@cooliscool
Copy link
Author

Reading this code I wrote several years back, I realise the whole thing is put together in a hacky way. 😅

I'm creating a file 'trainval.txt' for keeping a list of training images I'm finally intending to use ( https://gist.github.com/cooliscool/6739d9ad27014c25f7816c40fc7cce60#file-create_dataset-py-L99) . This is not the trainval_2007.txt file which is part of the dataset. Rather than using the 'trainval' file with the dataset - which has the image to corresponding class mapping - I'm parsing this information from XML annotations in 'VOC2012/Annotations/' . Which I'm not sure if it's the best way to do this - because 'trainval_2012.txt' itself has this image to class mapping. There wouldn't be the need to parse multiple files xml files in that case.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment