Created
April 18, 2017 07:37
-
-
Save IanBoyanZhang/06fd5e6ed1a004ede2c57432980592ad to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
parse XML files containing tracklet info for kitti data base (raw data section) | |
(http://cvlibs.net/datasets/kitti/raw_data.php) | |
No guarantees that this code is correct, usage is at your own risk! | |
created by Christian Herdtweck, Max Planck Institute for Biological Cybernetics | |
(christian.herdtweck@tuebingen.mpg.de) | |
requires numpy! | |
example usage: | |
import parseTrackletXML as xmlParser | |
kittiDir = '/path/to/kitti/data' | |
drive = '2011_09_26_drive_0001' | |
xmlParser.example(kittiDir, drive) | |
or simply on command line: | |
python parseTrackletXML.py | |
""" | |
# Version History: | |
# 4/7/12 Christian Herdtweck: seems to work with a few random test xml tracklet files; | |
# converts file contents to ElementTree and then to list of Tracklet objects; | |
# Tracklet objects have str and iter functions | |
# 5/7/12 ch: added constants for state, occlusion, truncation and added consistency checks | |
# 30/1/14 ch: create example function from example code | |
from sys import argv as cmdLineArgs | |
from xml.etree.ElementTree import ElementTree | |
import numpy as np | |
import itertools | |
from warnings import warn | |
STATE_UNSET = 0 | |
STATE_INTERP = 1 | |
STATE_LABELED = 2 | |
stateFromText = {'0':STATE_UNSET, '1':STATE_INTERP, '2':STATE_LABELED} | |
OCC_UNSET = 255 # -1 as uint8 | |
OCC_VISIBLE = 0 | |
OCC_PARTLY = 1 | |
OCC_FULLY = 2 | |
occFromText = {'-1':OCC_UNSET, '0':OCC_VISIBLE, '1':OCC_PARTLY, '2':OCC_FULLY} | |
TRUNC_UNSET = 255 # -1 as uint8, but in xml files the value '99' is used! | |
TRUNC_IN_IMAGE = 0 | |
TRUNC_TRUNCATED = 1 | |
TRUNC_OUT_IMAGE = 2 | |
TRUNC_BEHIND_IMAGE = 99 | |
truncFromText = {'-1':TRUNC_UNSET, '0':TRUNC_IN_IMAGE, '1':TRUNC_TRUNCATED, \ | |
'2':TRUNC_OUT_IMAGE, '99': TRUNC_BEHIND_IMAGE} | |
class Tracklet(object): | |
r""" representation an annotated object track | |
Tracklets are created in function parseXML and can most conveniently used as follows: | |
for trackletObj in parseXML(trackletFile): | |
for translation, rotation, state, occlusion, truncation, amtOcclusion, amtBorders, absoluteFrameNumber in trackletObj: | |
... your code here ... | |
#end: for all frames | |
#end: for all tracklets | |
absoluteFrameNumber is in range [firstFrame, firstFrame+nFrames[ | |
amtOcclusion and amtBorders could be None | |
You can of course also directly access the fields objType (string), size (len-3 ndarray), firstFrame/nFrames (int), | |
trans/rots (nFrames x 3 float ndarrays), states/truncs (len-nFrames uint8 ndarrays), occs (nFrames x 2 uint8 ndarray), | |
and for some tracklets amtOccs (nFrames x 2 float ndarray) and amtBorders (nFrames x 3 float ndarray). The last two | |
can be None if the xml file did not include these fields in poses | |
""" | |
objectType = None | |
size = None # len-3 float array: (height, width, length) | |
firstFrame = None | |
trans = None # n x 3 float array (x,y,z) | |
rots = None # n x 3 float array (x,y,z) | |
states = None # len-n uint8 array of states | |
occs = None # n x 2 uint8 array (occlusion, occlusion_kf) | |
truncs = None # len-n uint8 array of truncation | |
amtOccs = None # None or (n x 2) float array (amt_occlusion, amt_occlusion_kf) | |
amtBorders = None # None (n x 3) float array (amt_border_l / _r / _kf) | |
nFrames = None | |
def __init__(self): | |
r""" create Tracklet with no info set """ | |
self.size = np.nan*np.ones(3, dtype=float) | |
def __str__(self): | |
r""" return human-readable string representation of tracklet object | |
called implicitly in | |
print trackletObj | |
or in | |
text = str(trackletObj) | |
""" | |
return '[Tracklet over {0} frames for {1}]'.format(self.nFrames, self.objectType) | |
def __iter__(self): | |
r""" returns an iterator that yields tuple of all the available data for each frame | |
called whenever code iterates over a tracklet object, e.g. in | |
for translation, rotation, state, occlusion, truncation, amtOcclusion, amtBorders, absoluteFrameNumber in trackletObj: | |
...do something ... | |
or | |
trackDataIter = iter(trackletObj) | |
""" | |
if self.amtOccs is None: | |
return zip(self.trans, self.rots, self.states, self.occs, self.truncs, \ | |
itertools.repeat(None), itertools.repeat(None), range(self.firstFrame, self.firstFrame+self.nFrames)) | |
else: | |
return zip(self.trans, self.rots, self.states, self.occs, self.truncs, \ | |
self.amtOccs, self.amtBorders, range(self.firstFrame, self.firstFrame+self.nFrames)) | |
#end: class Tracklet | |
def parseXML(trackletFile): | |
r""" parse tracklet xml file and convert results to list of Tracklet objects | |
:param trackletFile: name of a tracklet xml file | |
:returns: list of Tracklet objects read from xml file | |
""" | |
# convert tracklet XML data to a tree structure | |
eTree = ElementTree() | |
print('parsing tracklet file', trackletFile) | |
with open(trackletFile) as f: | |
eTree.parse(f) | |
# now convert output to list of Tracklet objects | |
trackletsElem = eTree.find('tracklets') | |
tracklets = [] | |
trackletIdx = 0 | |
nTracklets = None | |
for trackletElem in trackletsElem: | |
#print 'track:', trackletElem.tag | |
if trackletElem.tag == 'count': | |
nTracklets = int(trackletElem.text) | |
print('file contains', nTracklets, 'tracklets') | |
elif trackletElem.tag == 'item_version': | |
pass | |
elif trackletElem.tag == 'item': | |
#print 'tracklet {0} of {1}'.format(trackletIdx, nTracklets) | |
# a tracklet | |
newTrack = Tracklet() | |
isFinished = False | |
hasAmt = False | |
frameIdx = None | |
for info in trackletElem: | |
#print 'trackInfo:', info.tag | |
if isFinished: | |
raise ValueError('more info on element after finished!') | |
if info.tag == 'objectType': | |
newTrack.objectType = info.text | |
elif info.tag == 'h': | |
newTrack.size[0] = float(info.text) | |
elif info.tag == 'w': | |
newTrack.size[1] = float(info.text) | |
elif info.tag == 'l': | |
newTrack.size[2] = float(info.text) | |
elif info.tag == 'first_frame': | |
newTrack.firstFrame = int(info.text) | |
elif info.tag == 'poses': | |
# this info is the possibly long list of poses | |
for pose in info: | |
#print 'trackInfoPose:', pose.tag | |
if pose.tag == 'count': # this should come before the others | |
if newTrack.nFrames is not None: | |
raise ValueError('there are several pose lists for a single track!') | |
elif frameIdx is not None: | |
raise ValueError('?!') | |
newTrack.nFrames = int(pose.text) | |
newTrack.trans = np.nan * np.ones((newTrack.nFrames, 3), dtype=float) | |
newTrack.rots = np.nan * np.ones((newTrack.nFrames, 3), dtype=float) | |
newTrack.states = np.nan * np.ones(newTrack.nFrames, dtype='uint8') | |
newTrack.occs = np.nan * np.ones((newTrack.nFrames, 2), dtype='uint8') | |
newTrack.truncs = np.nan * np.ones(newTrack.nFrames, dtype='uint8') | |
newTrack.amtOccs = np.nan * np.ones((newTrack.nFrames, 2), dtype=float) | |
newTrack.amtBorders = np.nan * np.ones((newTrack.nFrames, 3), dtype=float) | |
frameIdx = 0 | |
elif pose.tag == 'item_version': | |
pass | |
elif pose.tag == 'item': | |
# pose in one frame | |
if frameIdx is None: | |
raise ValueError('pose item came before number of poses!') | |
for poseInfo in pose: | |
#print 'trackInfoPoseInfo:', poseInfo.tag | |
if poseInfo.tag == 'tx': | |
newTrack.trans[frameIdx, 0] = float(poseInfo.text) | |
elif poseInfo.tag == 'ty': | |
newTrack.trans[frameIdx, 1] = float(poseInfo.text) | |
elif poseInfo.tag == 'tz': | |
newTrack.trans[frameIdx, 2] = float(poseInfo.text) | |
elif poseInfo.tag == 'rx': | |
newTrack.rots[frameIdx, 0] = float(poseInfo.text) | |
elif poseInfo.tag == 'ry': | |
newTrack.rots[frameIdx, 1] = float(poseInfo.text) | |
elif poseInfo.tag == 'rz': | |
newTrack.rots[frameIdx, 2] = float(poseInfo.text) | |
elif poseInfo.tag == 'state': | |
newTrack.states[frameIdx] = stateFromText[poseInfo.text] | |
elif poseInfo.tag == 'occlusion': | |
newTrack.occs[frameIdx, 0] = occFromText[poseInfo.text] | |
elif poseInfo.tag == 'occlusion_kf': | |
newTrack.occs[frameIdx, 1] = occFromText[poseInfo.text] | |
elif poseInfo.tag == 'truncation': | |
newTrack.truncs[frameIdx] = truncFromText[poseInfo.text] | |
elif poseInfo.tag == 'amt_occlusion': | |
newTrack.amtOccs[frameIdx,0] = float(poseInfo.text) | |
hasAmt = True | |
elif poseInfo.tag == 'amt_occlusion_kf': | |
newTrack.amtOccs[frameIdx,1] = float(poseInfo.text) | |
hasAmt = True | |
elif poseInfo.tag == 'amt_border_l': | |
newTrack.amtBorders[frameIdx,0] = float(poseInfo.text) | |
hasAmt = True | |
elif poseInfo.tag == 'amt_border_r': | |
newTrack.amtBorders[frameIdx,1] = float(poseInfo.text) | |
hasAmt = True | |
elif poseInfo.tag == 'amt_border_kf': | |
newTrack.amtBorders[frameIdx,2] = float(poseInfo.text) | |
hasAmt = True | |
else: | |
raise ValueError('unexpected tag in poses item: {0}!'.format(poseInfo.tag)) | |
frameIdx += 1 | |
else: | |
raise ValueError('unexpected pose info: {0}!'.format(pose.tag)) | |
elif info.tag == 'finished': | |
isFinished = True | |
else: | |
raise ValueError('unexpected tag in tracklets: {0}!'.format(info.tag)) | |
#end: for all fields in current tracklet | |
# some final consistency checks on new tracklet | |
if not isFinished: | |
warn('tracklet {0} was not finished!'.format(trackletIdx)) | |
if newTrack.nFrames is None: | |
warn('tracklet {0} contains no information!'.format(trackletIdx)) | |
elif frameIdx != newTrack.nFrames: | |
warn('tracklet {0} is supposed to have {1} frames, but perser found {1}!'.format(\ | |
trackletIdx, newTrack.nFrames, frameIdx)) | |
if np.abs(newTrack.rots[:,:2]).sum() > 1e-16: | |
warn('track contains rotation other than yaw!') | |
# if amtOccs / amtBorders are not set, set them to None | |
if not hasAmt: | |
newTrack.amtOccs = None | |
newTrack.amtBorders = None | |
# add new tracklet to list | |
tracklets.append(newTrack) | |
trackletIdx += 1 | |
else: | |
raise ValueError('unexpected tracklet info') | |
#end: for tracklet list items | |
print('loaded', trackletIdx, 'tracklets') | |
# final consistency check | |
if trackletIdx != nTracklets: | |
warn('according to xml information the file has {0} tracklets, but parser found {1}!'.format(nTracklets, trackletIdx)) | |
return tracklets | |
#end: function parseXML | |
def example(kittiDir=None, drive=None): | |
from os.path import join, expanduser | |
import readline # makes raw_input behave more fancy | |
# from xmlParser import parseXML, TRUNC_IN_IMAGE, TRUNC_TRUNCATED | |
DEFAULT_DRIVE = '2011_09_26_drive_0001' | |
twoPi = 2.*np.pi | |
# get dir names | |
if kittiDir is None: | |
kittiDir = expanduser(raw_input('please enter kitti base dir (e.g. ~/path/to/kitti): ').strip()) | |
if drive is None: | |
drive = raw_input('please enter drive name (default {0}): '.format(DEFAULT_DRIVE)).strip() | |
if len(drive) == 0: | |
drive = DEFAULT_DRIVE | |
# read tracklets from file | |
myTrackletFile = join(kittiDir, drive, 'tracklet_labels.xml') | |
tracklets = parseXML(myTrackletFile) | |
# loop over tracklets | |
for iTracklet, tracklet in enumerate(tracklets): | |
print('tracklet {0: 3d}: {1}'.format(iTracklet, tracklet)) | |
# this part is inspired by kitti object development kit matlab code: computeBox3D | |
h,w,l = tracklet.size | |
trackletBox = np.array([ # in velodyne coordinates around zero point and without orientation yet\ | |
[-l/2, -l/2, l/2, l/2, -l/2, -l/2, l/2, l/2], \ | |
[ w/2, -w/2, -w/2, w/2, w/2, -w/2, -w/2, w/2], \ | |
[ 0.0, 0.0, 0.0, 0.0, h, h, h, h]]) | |
# loop over all data in tracklet | |
for translation, rotation, state, occlusion, truncation, amtOcclusion, amtBorders, absoluteFrameNumber \ | |
in tracklet: | |
# determine if object is in the image; otherwise continue | |
if truncation not in (TRUNC_IN_IMAGE, TRUNC_TRUNCATED): | |
continue | |
# re-create 3D bounding box in velodyne coordinate system | |
yaw = rotation[2] # other rotations are 0 in all xml files I checked | |
assert np.abs(rotation[:2]).sum() == 0, 'object rotations other than yaw given!' | |
rotMat = np.array([\ | |
[np.cos(yaw), -np.sin(yaw), 0.0], \ | |
[np.sin(yaw), np.cos(yaw), 0.0], \ | |
[ 0.0, 0.0, 1.0]]) | |
cornerPosInVelo = np.dot(rotMat, trackletBox) + np.tile(translation, (8,1)).T | |
# calc yaw as seen from the camera (i.e. 0 degree = facing away from cam), as opposed to | |
# car-centered yaw (i.e. 0 degree = same orientation as car). | |
# makes quite a difference for objects in periphery! | |
# Result is in [0, 2pi] | |
x, y, z = translation | |
yawVisual = ( yaw - np.arctan2(y, x) ) % twoPi | |
#end: for all frames in track | |
#end: for all tracks | |
#end: function example | |
# when somebody runs this file as a script: | |
# run example if no arg or only 'example' was given as arg | |
# otherwise run parseXML | |
if __name__ == "__main__": | |
# cmdLineArgs[0] is 'parseTrackletXML.py' | |
if len(cmdLineArgs) < 2: | |
example() | |
elif (len(cmdLineArgs) == 2) and (cmdLineArgs[1] == 'example'): | |
example() | |
else: | |
parseXML(*cmdLineArgs[1:]) | |
# (created using vim - the world's best text editor) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment