Created
July 11, 2017 20:09
-
-
Save cmpute/ecc193bf9f8fa8b2b93cbd08a669f2ec to your computer and use it in GitHub Desktop.
data path generator for reading Caltrans Travel Survey data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Interface for caltrans data repo | |
''' | |
import os | |
import os.path as osp | |
_default_data_root = 'E:\\Data\\Caltrans_2010-2012' | |
class _DirIndexer(): | |
''' | |
Support indexing directories by [] | |
''' | |
def __init__(self, rootpath): | |
self._root = rootpath | |
def __getitem__(self, index): | |
return _DirIndexer(osp.join(self._root, index)) | |
class _DataIndexer(): | |
''' | |
Support indexing csv data by [] | |
''' | |
def __init__(self, rootpath): | |
self._root = rootpath | |
def __getitem__(self, index): | |
return osp.join(self._root, index + '.csv') | |
class _CycleDirIndexer(_DirIndexer): | |
''' | |
Access driving cycle data folder with singleton | |
''' | |
def __new__(cls, *args): | |
path = args[0] | |
if not hasattr(cls, '_instance') or cls._instance_path != path: | |
cls._instance = super().__new__(cls) | |
cls._instance_path = path | |
return cls._instance | |
def __init__(self, rootpath): | |
super().__init__(rootpath) | |
if not hasattr(self, '_datalist'): | |
self._datalist = dict() | |
for name in os.listdir(rootpath): | |
spname = name.split('_') | |
sampno = int(spname[0]) | |
vehno = int(spname[1]) | |
if sampno in self._datalist: | |
self._datalist[sampno] += [vehno] | |
else: | |
self._datalist[sampno] = [vehno] | |
@property | |
def datalist(self): | |
''' | |
Get the avaliable (sampno, vehno) list of the data | |
''' | |
return self._datalist | |
def __getitem__(self, index): | |
if isinstance(index, tuple) and len(index) == 2: | |
return _CycleDataIndexer(osp.join(self._root, '%d_%d' % index)) | |
else: | |
raise ValueError('wrong (sampno, vehno) format') | |
class _CycleDataIndexer(_DataIndexer): | |
''' | |
Access driving cycle .csv files | |
''' | |
def __init__(self, rootpath): | |
super().__init__(rootpath) | |
@property | |
def trips(self): | |
''' | |
Access trip summary data (trips.csv) | |
''' | |
return self['trips'] | |
@property | |
def datelist(self): | |
''' | |
Get the list of dates existing in the folder (returning string) | |
''' | |
names = filter(lambda name: str.isdigit(name[0]), os.listdir(self._root)) | |
return [name.split('.')[0] for name in names] | |
class _SurveyDirIndexer(_DirIndexer): | |
''' | |
Access driving sorted survey data folder with singleton | |
''' | |
def __new__(cls, *args): | |
path = args[0] | |
if not hasattr(cls, '_instance') or cls._instance_path != path: | |
cls._instance = super().__new__(cls) | |
cls._instance_path = path | |
return cls._instance | |
def __init__(self, rootpath, field): | |
super().__init__(rootpath) | |
if not hasattr(self, '_datalist'): | |
self._datalist = dict() | |
self._field = field | |
for name in os.listdir(rootpath): | |
spname = name.split('_')[1] | |
if not str.isdigit(spname[-1]): | |
continue | |
spname = spname.split('-') | |
sampno = int(spname[0]) | |
fieldno = int(spname[1]) | |
if sampno in self._datalist: | |
self._datalist[sampno] += [fieldno] | |
else: | |
self._datalist[sampno] = [fieldno] | |
@property | |
def datalist(self): | |
''' | |
Get the avaliable (sampno, perno/vehno) list of the data | |
''' | |
return self._datalist | |
def __getitem__(self, index): | |
if isinstance(index, tuple) and len(index) == 2: | |
return _SurveyDataIndexer(osp.join(self._root, self._field + '_%d-%d' % index)) | |
else: | |
raise ValueError('wrong (sampno, perno/vehno) format') | |
class _SurveyDataIndexer(_DataIndexer): | |
''' | |
Access survey .csv files | |
''' | |
def __init__(self, rootpath): | |
super().__init__(rootpath) | |
@property | |
def tablelist(self): | |
''' | |
Get the list of dates existing in the folder (returning string) | |
''' | |
names = os.listdir(self._root) | |
return [name.split('.')[0] for name in names] | |
class Caltrans(): | |
''' | |
Interface Class for Caltrans data | |
# Usage | |
------- | |
``` | |
root = Caltrans() | |
root.full_survey['survey_person'] # get the path of 'survey_person.csv' in full survey folder | |
root.drive_cycles_data.datalist # dictionary with data like (1038404, [1, 2]) | |
root.drive_cycles_data[1038404, 1] # cycle data in '1038404_1' folder | |
root.drive_cycles_data[1038404, 1].datelist # get the dates in '1038404_1' folder | |
root.drive_cycles_data[1038404, 1].trips # get the path of 'trips.csv' of '1038404_1' folder | |
root.drive_cycles_data[1038404, 1]['2012-05-06'] # get corresponding data path | |
root.survey_by_person.datalist # dictionary with data like (1041494, [2, 3, 4]) | |
root.survey_by_person[1041494, 2] # cycle data in 'person_1041494-2' folder | |
root.survey_by_person[1041494, 2].tablelist # get the tables in the folder | |
root.survey_by_person[1041494, 2]['household'] # # get the path of 'household.csv' in folder | |
# same for survey_by_vehicle | |
``` | |
''' | |
def __init__(self, rootpath=_default_data_root): | |
self._root = rootpath | |
@property | |
def full_survey(self): | |
''' | |
Access folder "caltrans_full_survey" | |
''' | |
return _DataIndexer(osp.join(self._root, 'caltrans_full_survey')) | |
@property | |
def drive_cycles(self): | |
''' | |
Access folder "caltrans_processed_drive_cycles" | |
''' | |
return _DirIndexer(osp.join(self._root, 'caltrans_processed_drive_cycles')) | |
@property | |
def drive_cycles_data(self): | |
''' | |
Access folder "caltrans_processed_drive_cycles/data" | |
''' | |
path = osp.join(self._root, 'caltrans_processed_drive_cycles', 'data') | |
return _CycleDirIndexer(path) | |
@property | |
def survey_by_person(self): | |
''' | |
Access folder "caltrans_sorted_by_person" | |
''' | |
return _SurveyDirIndexer(osp.join(self._root, 'caltrans_sorted_by_person'), 'person') | |
@property | |
def survey_by_vehicle(self): | |
''' | |
Access folder "caltrans_sorted_by_vehicle" | |
''' | |
return _SurveyDirIndexer(osp.join(self._root, 'caltrans_sorted_by_vehicle'), 'vehicle') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment