cmpute/caltrans.py

## caltrans.py
'''
Interface for caltrans data repo
'''

import os
import os.path as osp

_default_data_root = 'E:\\Data\\Caltrans_2010-2012'

class _DirIndexer():
    '''
    Support indexing directories by []
    '''
    def __init__(self, rootpath):
        self._root = rootpath

    def __getitem__(self, index):
        return _DirIndexer(osp.join(self._root, index))

class _DataIndexer():
    '''
    Support indexing csv data by []
    '''
    def __init__(self, rootpath):
        self._root = rootpath

    def __getitem__(self, index):
        return osp.join(self._root, index + '.csv')

class _CycleDirIndexer(_DirIndexer):
    '''
    Access driving cycle data folder with singleton
    '''
    def __new__(cls, *args):
        path = args[0]
        if not hasattr(cls, '_instance') or cls._instance_path != path:
            cls._instance = super().__new__(cls)
            cls._instance_path = path
        return cls._instance

    def __init__(self, rootpath):
        super().__init__(rootpath)
        if not hasattr(self, '_datalist'):
            self._datalist = dict()
            for name in os.listdir(rootpath):
                spname = name.split('_')
                sampno = int(spname[0])
                vehno = int(spname[1])
                if sampno in self._datalist:
                    self._datalist[sampno] += [vehno]
                else:
                    self._datalist[sampno] = [vehno]

    @property
    def datalist(self):
        '''
        Get the avaliable (sampno, vehno) list of the data
        '''
        return self._datalist

    def __getitem__(self, index):
        if isinstance(index, tuple) and len(index) == 2:
            return _CycleDataIndexer(osp.join(self._root, '%d_%d' % index))
        else:
            raise ValueError('wrong (sampno, vehno) format')

class _CycleDataIndexer(_DataIndexer):
    '''
    Access driving cycle .csv files
    '''
    def __init__(self, rootpath):
        super().__init__(rootpath)

    @property
    def trips(self):
        '''
        Access trip summary data (trips.csv)
        '''
        return self['trips']

    @property
    def datelist(self):
        '''
        Get the list of dates existing in the folder (returning string)
        '''
        names = filter(lambda name: str.isdigit(name[0]), os.listdir(self._root))
        return [name.split('.')[0] for name in names]

class _SurveyDirIndexer(_DirIndexer):
    '''
    Access driving sorted survey data folder with singleton
    '''
    def __new__(cls, *args):
        path = args[0]
        if not hasattr(cls, '_instance') or cls._instance_path != path:
            cls._instance = super().__new__(cls)
            cls._instance_path = path
        return cls._instance

    def __init__(self, rootpath, field):
        super().__init__(rootpath)
        if not hasattr(self, '_datalist'):
            self._datalist = dict()
            self._field = field
            for name in os.listdir(rootpath):
                spname = name.split('_')[1]
                if not str.isdigit(spname[-1]):
                    continue
                spname = spname.split('-')
                sampno = int(spname[0])
                fieldno = int(spname[1])
                if sampno in self._datalist:
                    self._datalist[sampno] += [fieldno]
                else:
                    self._datalist[sampno] = [fieldno]

    @property
    def datalist(self):
        '''
        Get the avaliable (sampno, perno/vehno) list of the data
        '''
        return self._datalist

    def __getitem__(self, index):
        if isinstance(index, tuple) and len(index) == 2:
            return _SurveyDataIndexer(osp.join(self._root, self._field + '_%d-%d' % index))
        else:
            raise ValueError('wrong (sampno, perno/vehno) format')

class _SurveyDataIndexer(_DataIndexer):
    '''
    Access survey .csv files
    '''
    def __init__(self, rootpath):
        super().__init__(rootpath)

    @property
    def tablelist(self):
        '''
        Get the list of dates existing in the folder (returning string)
        '''
        names = os.listdir(self._root)
        return [name.split('.')[0] for name in names]

class Caltrans():
    '''
    Interface Class for Caltrans data

    # Usage
    -------
    ```
    root = Caltrans()
    root.full_survey['survey_person'] # get the path of 'survey_person.csv' in full survey folder
    root.drive_cycles_data.datalist # dictionary with data like (1038404, [1, 2])
    root.drive_cycles_data[1038404, 1] # cycle data in '1038404_1' folder
    root.drive_cycles_data[1038404, 1].datelist # get the dates in '1038404_1' folder
    root.drive_cycles_data[1038404, 1].trips # get the path of 'trips.csv' of '1038404_1' folder
    root.drive_cycles_data[1038404, 1]['2012-05-06'] # get corresponding data path
    root.survey_by_person.datalist # dictionary with data like (1041494, [2, 3, 4])
    root.survey_by_person[1041494, 2] # cycle data in 'person_1041494-2' folder
    root.survey_by_person[1041494, 2].tablelist # get the tables in the folder
    root.survey_by_person[1041494, 2]['household'] # # get the path of 'household.csv' in folder
    # same for survey_by_vehicle
    ```
    '''
    def __init__(self, rootpath=_default_data_root):
        self._root = rootpath

    @property
    def full_survey(self):
        '''
        Access folder "caltrans_full_survey"
        '''
        return _DataIndexer(osp.join(self._root, 'caltrans_full_survey'))

    @property
    def drive_cycles(self):
        '''
        Access folder "caltrans_processed_drive_cycles"
        '''
        return _DirIndexer(osp.join(self._root, 'caltrans_processed_drive_cycles'))

    @property
    def drive_cycles_data(self):
        '''
        Access folder "caltrans_processed_drive_cycles/data"
        '''
        path = osp.join(self._root, 'caltrans_processed_drive_cycles', 'data')
        return _CycleDirIndexer(path)

    @property
    def survey_by_person(self):
        '''
        Access folder "caltrans_sorted_by_person"
        '''
        return _SurveyDirIndexer(osp.join(self._root, 'caltrans_sorted_by_person'), 'person')

    @property
    def survey_by_vehicle(self):
        '''
        Access folder "caltrans_sorted_by_vehicle"
        '''
        return _SurveyDirIndexer(osp.join(self._root, 'caltrans_sorted_by_vehicle'), 'vehicle')
	'''
	Interface for caltrans data repo
	'''

	import os
	import os.path as osp

	_default_data_root = 'E:\\Data\\Caltrans_2010-2012'

	class _DirIndexer():
	'''
	Support indexing directories by []
	'''
	def __init__(self, rootpath):
	self._root = rootpath

	def __getitem__(self, index):
	return _DirIndexer(osp.join(self._root, index))

	class _DataIndexer():
	'''
	Support indexing csv data by []
	'''
	def __init__(self, rootpath):
	self._root = rootpath

	def __getitem__(self, index):
	return osp.join(self._root, index + '.csv')

	class _CycleDirIndexer(_DirIndexer):
	'''
	Access driving cycle data folder with singleton
	'''
	def __new__(cls, *args):
	path = args[0]
	if not hasattr(cls, '_instance') or cls._instance_path != path:
	cls._instance = super().__new__(cls)
	cls._instance_path = path
	return cls._instance

	def __init__(self, rootpath):
	super().__init__(rootpath)
	if not hasattr(self, '_datalist'):
	self._datalist = dict()
	for name in os.listdir(rootpath):
	spname = name.split('_')
	sampno = int(spname[0])
	vehno = int(spname[1])
	if sampno in self._datalist:
	self._datalist[sampno] += [vehno]
	else:
	self._datalist[sampno] = [vehno]

	@property
	def datalist(self):
	'''
	Get the avaliable (sampno, vehno) list of the data
	'''
	return self._datalist

	def __getitem__(self, index):
	if isinstance(index, tuple) and len(index) == 2:
	return _CycleDataIndexer(osp.join(self._root, '%d_%d' % index))
	else:
	raise ValueError('wrong (sampno, vehno) format')

	class _CycleDataIndexer(_DataIndexer):
	'''
	Access driving cycle .csv files
	'''
	def __init__(self, rootpath):
	super().__init__(rootpath)

	@property
	def trips(self):
	'''
	Access trip summary data (trips.csv)
	'''
	return self['trips']

	@property
	def datelist(self):
	'''
	Get the list of dates existing in the folder (returning string)
	'''
	names = filter(lambda name: str.isdigit(name[0]), os.listdir(self._root))
	return [name.split('.')[0] for name in names]

	class _SurveyDirIndexer(_DirIndexer):
	'''
	Access driving sorted survey data folder with singleton
	'''
	def __new__(cls, *args):
	path = args[0]
	if not hasattr(cls, '_instance') or cls._instance_path != path:
	cls._instance = super().__new__(cls)
	cls._instance_path = path
	return cls._instance

	def __init__(self, rootpath, field):
	super().__init__(rootpath)
	if not hasattr(self, '_datalist'):
	self._datalist = dict()
	self._field = field
	for name in os.listdir(rootpath):
	spname = name.split('_')[1]
	if not str.isdigit(spname[-1]):
	continue
	spname = spname.split('-')
	sampno = int(spname[0])
	fieldno = int(spname[1])
	if sampno in self._datalist:
	self._datalist[sampno] += [fieldno]
	else:
	self._datalist[sampno] = [fieldno]

	@property
	def datalist(self):
	'''
	Get the avaliable (sampno, perno/vehno) list of the data
	'''
	return self._datalist

	def __getitem__(self, index):
	if isinstance(index, tuple) and len(index) == 2:
	return _SurveyDataIndexer(osp.join(self._root, self._field + '_%d-%d' % index))
	else:
	raise ValueError('wrong (sampno, perno/vehno) format')

	class _SurveyDataIndexer(_DataIndexer):
	'''
	Access survey .csv files
	'''
	def __init__(self, rootpath):
	super().__init__(rootpath)

	@property
	def tablelist(self):
	'''
	Get the list of dates existing in the folder (returning string)
	'''
	names = os.listdir(self._root)
	return [name.split('.')[0] for name in names]

	class Caltrans():
	'''
	Interface Class for Caltrans data

	# Usage
	-------
	```
	root = Caltrans()
	root.full_survey['survey_person'] # get the path of 'survey_person.csv' in full survey folder
	root.drive_cycles_data.datalist # dictionary with data like (1038404, [1, 2])
	root.drive_cycles_data[1038404, 1] # cycle data in '1038404_1' folder
	root.drive_cycles_data[1038404, 1].datelist # get the dates in '1038404_1' folder
	root.drive_cycles_data[1038404, 1].trips # get the path of 'trips.csv' of '1038404_1' folder
	root.drive_cycles_data[1038404, 1]['2012-05-06'] # get corresponding data path
	root.survey_by_person.datalist # dictionary with data like (1041494, [2, 3, 4])
	root.survey_by_person[1041494, 2] # cycle data in 'person_1041494-2' folder
	root.survey_by_person[1041494, 2].tablelist # get the tables in the folder
	root.survey_by_person[1041494, 2]['household'] # # get the path of 'household.csv' in folder
	# same for survey_by_vehicle
	```
	'''
	def __init__(self, rootpath=_default_data_root):
	self._root = rootpath

	@property
	def full_survey(self):
	'''
	Access folder "caltrans_full_survey"
	'''
	return _DataIndexer(osp.join(self._root, 'caltrans_full_survey'))

	@property
	def drive_cycles(self):
	'''
	Access folder "caltrans_processed_drive_cycles"
	'''
	return _DirIndexer(osp.join(self._root, 'caltrans_processed_drive_cycles'))

	@property
	def drive_cycles_data(self):
	'''
	Access folder "caltrans_processed_drive_cycles/data"
	'''
	path = osp.join(self._root, 'caltrans_processed_drive_cycles', 'data')
	return _CycleDirIndexer(path)

	@property
	def survey_by_person(self):
	'''
	Access folder "caltrans_sorted_by_person"
	'''
	return _SurveyDirIndexer(osp.join(self._root, 'caltrans_sorted_by_person'), 'person')

	@property
	def survey_by_vehicle(self):
	'''
	Access folder "caltrans_sorted_by_vehicle"
	'''
	return _SurveyDirIndexer(osp.join(self._root, 'caltrans_sorted_by_vehicle'), 'vehicle')