Leo van der Meulen lmeulen

## corwdedness_import_ndov.py
def get_crowdedness_operator(operatorcode, type, date):
    url = "https://data.ndovloket.nl/bezetting/" + operatorcode.lower() + "/OC_" + \
          operatorcode.upper() + "_" + date + "." + type
    filename = url.split("/")[-1]
    if not os.path.exists(os.path.join(NDOVDIR,filename)):
        with open(os.path.join(NDOVDIR, filename), "wb") as f:
            r = requests.get(url)
            f.write(r.content)
    return pd.read_csv(os.path.join(NDOVDIR, filename), low_memory=False,
                       compression='gzip' if type == 'csv.gz' else 'zip')

## crowdedness_download_gtfs.py
if not os.path.exists(os.path.join(GTFSDIR,'gtfs-nl.zip')):
    url = 'http://gtfs.ovapi.nl/nl/gtfs-nl.zip'
    with requests.get(url, stream=True) as r:
        r.raise_for_status()
        with open(os.path.join(GTFSDIR,'gtfs-nl.zip'), 'wb') as f:
            for chunk in r.iter_content(chunk_size=1073741824):
                f.write(chunk)

## crowdedness_parse_gtfs.py
def read_csv(csvfile):
    with zipfile.ZipFile(os.path.join(GTFSDIR,'gtfs-nl.zip')) as z:
        with z.open(csvfile) as f:
            return pd.read_csv(f)

AGENCYNAMES = ['NS']
agencies = read_csv('agency.txt')
agency_ids = agencies[agencies.agency_name.isin(AGENCYNAMES)]['agency_id'].values
agencies = agencies[agencies.agency_name.isin(AGENCYNAMES)][['agency_id', 'agency_name']]

## crowdedness_interpol_func.py
def interpolate_lat_lon(tripshape, dist):
        ps = tripshape[tripshape['shape_dist_traveled'].le(dist)].index[-1]
        ns = tripshape[tripshape['shape_dist_traveled'].ge(dist)].index[0]
        lat1 = tripshape[tripshape.index == ps].iloc[0]['shape_pt_lat']
        lon1 = tripshape[tripshape.index == ps].iloc[0]['shape_pt_lon']
        lat2 = tripshape[tripshape.index == ns].iloc[0]['shape_pt_lat']
        lon2 = tripshape[tripshape.index == ns].iloc[0]['shape_pt_lon']
        dst1 = tripshape[tripshape.index == ps].iloc[0]['shape_dist_traveled']
        dst2 = tripshape[tripshape.index == ns].iloc[0]['shape_dist_traveled']
        prc = ((dist - dst1) / (dst2 - dst1)) if dst2 != dst1 else 1

## crowdedness_interpolate_trip.py
def get_trip_data(number, date):
  trip = trips[(trips.trip_short_name==number) & (trips.date==date)].iloc[0]
  tripid = trip['trip_id']
  shapeid = trip['shape_id']
  tripshape = shapes[shapes.shape_id == shapeid]
  tripstops = stoptimes[stoptimes.trip_id==tripid].sort_values('stop_sequence')

  lastdep = -1
  lastdist= -1
  seq = 0

## crowdedness_loop_trips.py
timedata = pd.DataFrame()
for r in trips[trips.date == displaydate].trip_short_name.unique():
  timedata = timedata.append(get_trip_data(ritnumber=r, ritdate = displaydate))
timedata = timedata.merge(stops[['stop_id', 'stop_code', 'stop_name']])

## crowdedness_combine_trip_crowds.py
timedata = timedata.merge(druktedata[['ritnumber', 'departure',
                                      'classification', 'seats', 'operator']],
                          left_on=['ritnumber', 'stop_code'],
                          right_on=['ritnumber', 'departure'])

timedata.time = timedata.time.apply(lambda x: min2str(x))
timedata['passengers'] = ((timedata.classification - 1) *
                          timedata.seats * 0.33).astype(int)
timedata['timestamp'] = (pd.to_datetime(timedata['time']) -
                         pd.Timestamp("1970-01-01")) // pd.Timedelta('1s')

## crowdedness_create_features.py
features=[]
for rn in timedata.ritnumber.unique():
    total_trip = timedata[timedata.ritnumber == rn].dropna()
    for sq in total_trip.sequence.unique():
        trip = total_trip[total_trip.sequence == sq]
        tripfirst = trip.iloc[0]
        triplast = trip.iloc[-1]
        name = str(tripfirst["ritnumber"]) + " : " + tripfirst["stop_code"] + \
              " - " + triplast["stop_code"]
        ls = LineString(trip[['lon', 'lat', 'elevation', 'timestamp']]. \

## crowdedness_create_network.py
network = trips.drop_duplicates('route_id').dropna()
network = network[~(network.trip_long_name == 'Stopbus i.p.v. trein')]

features=[]
for si in network.shape_id.unique():
    section = shapes[shapes.shape_id == si]
    ls =LineString(section[['shape_pt_lon','shape_pt_lat']].to_numpy().tolist())
    features.append(
        Feature(geometry=ls, properties=dict(label=str(si)))
    )

## crowdedness_import.py
import os, sys, requests, zipfile
import pandas as pd
from geojson import LineString, Feature, FeatureCollection, dump

GTFSDIR = 'gtfs-nl'
NDOVDIR = 'ndov'

displaydate = '20210309'

def min2str(minutes):
	def get_crowdedness_operator(operatorcode, type, date):
	url = "https://data.ndovloket.nl/bezetting/" + operatorcode.lower() + "/OC_" + \
	operatorcode.upper() + "_" + date + "." + type
	filename = url.split("/")[-1]
	if not os.path.exists(os.path.join(NDOVDIR,filename)):
	with open(os.path.join(NDOVDIR, filename), "wb") as f:
	r = requests.get(url)
	f.write(r.content)
	return pd.read_csv(os.path.join(NDOVDIR, filename), low_memory=False,
	compression='gzip' if type == 'csv.gz' else 'zip')
	if not os.path.exists(os.path.join(GTFSDIR,'gtfs-nl.zip')):
	url = 'http://gtfs.ovapi.nl/nl/gtfs-nl.zip'
	with requests.get(url, stream=True) as r:
	r.raise_for_status()
	with open(os.path.join(GTFSDIR,'gtfs-nl.zip'), 'wb') as f:
	for chunk in r.iter_content(chunk_size=1073741824):
	f.write(chunk)
	def read_csv(csvfile):
	with zipfile.ZipFile(os.path.join(GTFSDIR,'gtfs-nl.zip')) as z:
	with z.open(csvfile) as f:
	return pd.read_csv(f)

	AGENCYNAMES = ['NS']
	agencies = read_csv('agency.txt')
	agency_ids = agencies[agencies.agency_name.isin(AGENCYNAMES)]['agency_id'].values
	agencies = agencies[agencies.agency_name.isin(AGENCYNAMES)][['agency_id', 'agency_name']]
	def interpolate_lat_lon(tripshape, dist):
	ps = tripshape[tripshape['shape_dist_traveled'].le(dist)].index[-1]
	ns = tripshape[tripshape['shape_dist_traveled'].ge(dist)].index[0]
	lat1 = tripshape[tripshape.index == ps].iloc[0]['shape_pt_lat']
	lon1 = tripshape[tripshape.index == ps].iloc[0]['shape_pt_lon']
	lat2 = tripshape[tripshape.index == ns].iloc[0]['shape_pt_lat']
	lon2 = tripshape[tripshape.index == ns].iloc[0]['shape_pt_lon']
	dst1 = tripshape[tripshape.index == ps].iloc[0]['shape_dist_traveled']
	dst2 = tripshape[tripshape.index == ns].iloc[0]['shape_dist_traveled']
	prc = ((dist - dst1) / (dst2 - dst1)) if dst2 != dst1 else 1
	def get_trip_data(number, date):
	trip = trips[(trips.trip_short_name==number) & (trips.date==date)].iloc[0]
	tripid = trip['trip_id']
	shapeid = trip['shape_id']
	tripshape = shapes[shapes.shape_id == shapeid]
	tripstops = stoptimes[stoptimes.trip_id==tripid].sort_values('stop_sequence')

	lastdep = -1
	lastdist= -1
	seq = 0
	timedata = pd.DataFrame()
	for r in trips[trips.date == displaydate].trip_short_name.unique():
	timedata = timedata.append(get_trip_data(ritnumber=r, ritdate = displaydate))
	timedata = timedata.merge(stops[['stop_id', 'stop_code', 'stop_name']])
	timedata = timedata.merge(druktedata[['ritnumber', 'departure',
	'classification', 'seats', 'operator']],
	left_on=['ritnumber', 'stop_code'],
	right_on=['ritnumber', 'departure'])

	timedata.time = timedata.time.apply(lambda x: min2str(x))
	timedata['passengers'] = ((timedata.classification - 1) *
	timedata.seats * 0.33).astype(int)
	timedata['timestamp'] = (pd.to_datetime(timedata['time']) -
	pd.Timestamp("1970-01-01")) // pd.Timedelta('1s')
	features=[]
	for rn in timedata.ritnumber.unique():
	total_trip = timedata[timedata.ritnumber == rn].dropna()
	for sq in total_trip.sequence.unique():
	trip = total_trip[total_trip.sequence == sq]
	tripfirst = trip.iloc[0]
	triplast = trip.iloc[-1]
	name = str(tripfirst["ritnumber"]) + " : " + tripfirst["stop_code"] + \
	" - " + triplast["stop_code"]
	ls = LineString(trip[['lon', 'lat', 'elevation', 'timestamp']]. \
	network = trips.drop_duplicates('route_id').dropna()
	network = network[~(network.trip_long_name == 'Stopbus i.p.v. trein')]

	features=[]
	for si in network.shape_id.unique():
	section = shapes[shapes.shape_id == si]
	ls =LineString(section[['shape_pt_lon','shape_pt_lat']].to_numpy().tolist())
	features.append(
	Feature(geometry=ls, properties=dict(label=str(si)))
	)
	import os, sys, requests, zipfile
	import pandas as pd
	from geojson import LineString, Feature, FeatureCollection, dump

	GTFSDIR = 'gtfs-nl'
	NDOVDIR = 'ndov'

	displaydate = '20210309'

	def min2str(minutes):