lmeulen/corwdedness_import_ndov.py

## corwdedness_import_ndov.py
def get_crowdedness_operator(operatorcode, type, date):
    url = "https://data.ndovloket.nl/bezetting/" + operatorcode.lower() + "/OC_" + \
          operatorcode.upper() + "_" + date + "." + type
    filename = url.split("/")[-1]
    if not os.path.exists(os.path.join(NDOVDIR,filename)):
        with open(os.path.join(NDOVDIR, filename), "wb") as f:
            r = requests.get(url)
            f.write(r.content)
    return pd.read_csv(os.path.join(NDOVDIR, filename), low_memory=False,
                       compression='gzip' if type == 'csv.gz' else 'zip')

druktedata = get_crowdedness_operator('ns', 'csv.gz', displaydate)

druktedata = druktedata[['DataOwnerCode', 'JourneyNumber', 'OperatingDay', 'UserStopCodeBegin',
                         'UserStopCodeEnd', 'VehicleType', 'TotalNumberOfCoaches', 'Occupancy']]
druktedata.columns=['operator', 'ritnumber', 'date', 'departure', 'arrival', 'wagontype',
                    'coaches', 'classification']
druktedata = druktedata[druktedata.departure.str.len() < 6] # Filter station abbrev

# Use an estimation of capacity per wagon based on wikipedia, just as rough estimation
druktedata = druktedata.merge(pd.DataFrame({'wagontype' : ['ICM', 'VIRM', 'SW7-25KV', 'SW9-25KV',
                                                           'SNG', 'SLT', 'FLIRT FFF', 'SGMM',
                                                           'DDZ'],
                                            'seats' : [60, 100, 50, 50, 85, 60, 55, 45, 50]}))
druktedata['seats'] = druktedata['seats'] * druktedata['coaches']
	def get_crowdedness_operator(operatorcode, type, date):
	url = "https://data.ndovloket.nl/bezetting/" + operatorcode.lower() + "/OC_" + \
	operatorcode.upper() + "_" + date + "." + type
	filename = url.split("/")[-1]
	if not os.path.exists(os.path.join(NDOVDIR,filename)):
	with open(os.path.join(NDOVDIR, filename), "wb") as f:
	r = requests.get(url)
	f.write(r.content)
	return pd.read_csv(os.path.join(NDOVDIR, filename), low_memory=False,
	compression='gzip' if type == 'csv.gz' else 'zip')

	druktedata = get_crowdedness_operator('ns', 'csv.gz', displaydate)

	druktedata = druktedata[['DataOwnerCode', 'JourneyNumber', 'OperatingDay', 'UserStopCodeBegin',
	'UserStopCodeEnd', 'VehicleType', 'TotalNumberOfCoaches', 'Occupancy']]
	druktedata.columns=['operator', 'ritnumber', 'date', 'departure', 'arrival', 'wagontype',
	'coaches', 'classification']
	druktedata = druktedata[druktedata.departure.str.len() < 6] # Filter station abbrev

	# Use an estimation of capacity per wagon based on wikipedia, just as rough estimation
	druktedata = druktedata.merge(pd.DataFrame({'wagontype' : ['ICM', 'VIRM', 'SW7-25KV', 'SW9-25KV',
	'SNG', 'SLT', 'FLIRT FFF', 'SGMM',
	'DDZ'],
	'seats' : [60, 100, 50, 50, 85, 60, 55, 45, 50]}))
	druktedata['seats'] = druktedata['seats'] * druktedata['coaches']