Created
September 26, 2016 18:27
-
-
Save shubh-agrawal/2ec69019b056941882d62c3c102f243b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# arguement required. | |
# Pre-requisites: 1. Format the dataset as explained by deboc | |
# 2. create the Imageset text file using 'ls Annotations/ -m | sed s/\\s/\\n/g | sed s/.xml//g | sed s/,//g > ImageSets/train.txt' | |
# 1st arguement = Imageset file name Example: train.txt | |
# 2nd arguement = path to custom dataset Example: ~/py-faster-rcnn/data/VOC_devkit/ | |
# Using Instructions | |
# $ python discard_shuffle_data.py [arg1] [arg2] | |
import numpy as np | |
import random, sys, os | |
import cv2 | |
import xml.etree.ElementTree as ET | |
ext=['.png','.jpg','.jpeg'] | |
discard_list=[] | |
def get_image_path_from_index(index): | |
""" | |
Construct an image path from the image file name form annotation path to be given to cv function for bbox verification | |
""" | |
for item in ext: | |
image_path = os.path.join(data_path, 'data', 'Images', index + item) | |
if os.path.exists(image_path): | |
break | |
assert os.path.exists(image_path), \ | |
'Path does not exist: {}'.format(image_path) | |
return image_path | |
def get_annotation_path_from_index(index): | |
"returns annotation path" | |
annotation_path=os.path.join(data_path, 'data', 'Annotations',index + '.xml') | |
assert os.path.exists(annotation_path), \ | |
'Path does not exist: {}'.format(annotation_path) | |
return annotation_path | |
def get_image_size(image_path): | |
"output image height and width of input image" | |
img = cv2.imread(image_path,0) | |
height, width = img.shape[:2] | |
def get_bbox_size(): | |
"will return bbox size which will be used to compare to image_size heigth and width" | |
"xml parsed here" | |
imageset_filename=sys.argv[1] | |
data_path=sys.argv[2] | |
imageset_file_path = os.path.join(data_path, 'data', 'ImageSets', imageset_filename) | |
assert os.path.exists(imageset_file_path), \ | |
'Path does not exist: {}'.format(imageset_file_path) | |
lines = open(imageset_file_path).readlines() | |
for line in lines: | |
image_index=line.strip() #should not contain newline character | |
image_path=get_image_path_from_index(image_index) | |
annotation_path=get_annotation_path_from_index(image_index) | |
tree=ET.parse(annotation_path) | |
img_max_width = int(tree.find('.//width').text) | |
img_max_height= int(tree.find('.//height').text) | |
objs=tree.findall('object') | |
for obj in objs: | |
bbox = obj.find('bndbox') | |
x1 = int(bbox.find('xmin').text) - 1 | |
y1 = int(bbox.find('ymin').text) - 1 | |
x2 = int(bbox.find('xmax').text) - 1 | |
y2 = int(bbox.find('ymax').text) - 1 | |
box=[x1,y1,x2,y2] | |
#print box | |
if x1 < -1 or y1 < -1 or (x2-x1) > img_max_width or (y2-y1) > img_max_height: | |
# Conditional loop for checking the error in bbox from dataset | |
discard_list.append(image_index) | |
print "Discarded image index : ",image_index | |
break | |
print "No. of discarded indexes : ",len(discard_list) | |
random.shuffle(lines) # Shuffles the lines in the list "lines" | |
text_file = open(imageset_file_path, "w") | |
for line in lines: # Check if line belongs to discard_list. If yes, that image index is not written | |
if line.strip() not in discard_list: | |
text_file.write(line) | |
text_file.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment