Skip to content

Instantly share code, notes, and snippets.

@cmpute
Created July 11, 2017 20:09
Show Gist options
  • Save cmpute/ecc193bf9f8fa8b2b93cbd08a669f2ec to your computer and use it in GitHub Desktop.
Save cmpute/ecc193bf9f8fa8b2b93cbd08a669f2ec to your computer and use it in GitHub Desktop.
data path generator for reading Caltrans Travel Survey data
'''
Interface for caltrans data repo
'''
import os
import os.path as osp
_default_data_root = 'E:\\Data\\Caltrans_2010-2012'
class _DirIndexer():
'''
Support indexing directories by []
'''
def __init__(self, rootpath):
self._root = rootpath
def __getitem__(self, index):
return _DirIndexer(osp.join(self._root, index))
class _DataIndexer():
'''
Support indexing csv data by []
'''
def __init__(self, rootpath):
self._root = rootpath
def __getitem__(self, index):
return osp.join(self._root, index + '.csv')
class _CycleDirIndexer(_DirIndexer):
'''
Access driving cycle data folder with singleton
'''
def __new__(cls, *args):
path = args[0]
if not hasattr(cls, '_instance') or cls._instance_path != path:
cls._instance = super().__new__(cls)
cls._instance_path = path
return cls._instance
def __init__(self, rootpath):
super().__init__(rootpath)
if not hasattr(self, '_datalist'):
self._datalist = dict()
for name in os.listdir(rootpath):
spname = name.split('_')
sampno = int(spname[0])
vehno = int(spname[1])
if sampno in self._datalist:
self._datalist[sampno] += [vehno]
else:
self._datalist[sampno] = [vehno]
@property
def datalist(self):
'''
Get the avaliable (sampno, vehno) list of the data
'''
return self._datalist
def __getitem__(self, index):
if isinstance(index, tuple) and len(index) == 2:
return _CycleDataIndexer(osp.join(self._root, '%d_%d' % index))
else:
raise ValueError('wrong (sampno, vehno) format')
class _CycleDataIndexer(_DataIndexer):
'''
Access driving cycle .csv files
'''
def __init__(self, rootpath):
super().__init__(rootpath)
@property
def trips(self):
'''
Access trip summary data (trips.csv)
'''
return self['trips']
@property
def datelist(self):
'''
Get the list of dates existing in the folder (returning string)
'''
names = filter(lambda name: str.isdigit(name[0]), os.listdir(self._root))
return [name.split('.')[0] for name in names]
class _SurveyDirIndexer(_DirIndexer):
'''
Access driving sorted survey data folder with singleton
'''
def __new__(cls, *args):
path = args[0]
if not hasattr(cls, '_instance') or cls._instance_path != path:
cls._instance = super().__new__(cls)
cls._instance_path = path
return cls._instance
def __init__(self, rootpath, field):
super().__init__(rootpath)
if not hasattr(self, '_datalist'):
self._datalist = dict()
self._field = field
for name in os.listdir(rootpath):
spname = name.split('_')[1]
if not str.isdigit(spname[-1]):
continue
spname = spname.split('-')
sampno = int(spname[0])
fieldno = int(spname[1])
if sampno in self._datalist:
self._datalist[sampno] += [fieldno]
else:
self._datalist[sampno] = [fieldno]
@property
def datalist(self):
'''
Get the avaliable (sampno, perno/vehno) list of the data
'''
return self._datalist
def __getitem__(self, index):
if isinstance(index, tuple) and len(index) == 2:
return _SurveyDataIndexer(osp.join(self._root, self._field + '_%d-%d' % index))
else:
raise ValueError('wrong (sampno, perno/vehno) format')
class _SurveyDataIndexer(_DataIndexer):
'''
Access survey .csv files
'''
def __init__(self, rootpath):
super().__init__(rootpath)
@property
def tablelist(self):
'''
Get the list of dates existing in the folder (returning string)
'''
names = os.listdir(self._root)
return [name.split('.')[0] for name in names]
class Caltrans():
'''
Interface Class for Caltrans data
# Usage
-------
```
root = Caltrans()
root.full_survey['survey_person'] # get the path of 'survey_person.csv' in full survey folder
root.drive_cycles_data.datalist # dictionary with data like (1038404, [1, 2])
root.drive_cycles_data[1038404, 1] # cycle data in '1038404_1' folder
root.drive_cycles_data[1038404, 1].datelist # get the dates in '1038404_1' folder
root.drive_cycles_data[1038404, 1].trips # get the path of 'trips.csv' of '1038404_1' folder
root.drive_cycles_data[1038404, 1]['2012-05-06'] # get corresponding data path
root.survey_by_person.datalist # dictionary with data like (1041494, [2, 3, 4])
root.survey_by_person[1041494, 2] # cycle data in 'person_1041494-2' folder
root.survey_by_person[1041494, 2].tablelist # get the tables in the folder
root.survey_by_person[1041494, 2]['household'] # # get the path of 'household.csv' in folder
# same for survey_by_vehicle
```
'''
def __init__(self, rootpath=_default_data_root):
self._root = rootpath
@property
def full_survey(self):
'''
Access folder "caltrans_full_survey"
'''
return _DataIndexer(osp.join(self._root, 'caltrans_full_survey'))
@property
def drive_cycles(self):
'''
Access folder "caltrans_processed_drive_cycles"
'''
return _DirIndexer(osp.join(self._root, 'caltrans_processed_drive_cycles'))
@property
def drive_cycles_data(self):
'''
Access folder "caltrans_processed_drive_cycles/data"
'''
path = osp.join(self._root, 'caltrans_processed_drive_cycles', 'data')
return _CycleDirIndexer(path)
@property
def survey_by_person(self):
'''
Access folder "caltrans_sorted_by_person"
'''
return _SurveyDirIndexer(osp.join(self._root, 'caltrans_sorted_by_person'), 'person')
@property
def survey_by_vehicle(self):
'''
Access folder "caltrans_sorted_by_vehicle"
'''
return _SurveyDirIndexer(osp.join(self._root, 'caltrans_sorted_by_vehicle'), 'vehicle')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment