-
-
Save hjgoode3/06da27e571eba6a77947c239c62eaf8c to your computer and use it in GitHub Desktop.
Script finds docks that have bike surplus and bike deficit and proposes steps for Citi BIke employees to rebalance the system
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#read in dataframe of projected bike availabilities | |
#full = pd.read_csv('') | |
#User inputs to select specific hour and date | |
month = 12 | |
num_day = 2 | |
hour = 23 | |
#End user inputs | |
query = full[(full['month'] == month) & (full['num_day'] == num_day) & (full['hour'] == hour)] #filter by the specific data and time to analyze | |
#Create function to calculate distance, most accurate measure for NYC grid like street layout | |
def manhattan_distance(start_lat, start_lon, end_lat, end_lon): | |
dist = distance((start_lat, start_lon), (start_lat, end_lon)).miles + \ | |
distance((end_lat, end_lon), (start_lat, end_lon)).miles | |
return dist | |
#Define user input variables | |
low_bike_threshold = 1/3 | |
high_bike_threshold = 2/3 | |
max_distance = 2 | |
max_bikes_rebalanced = 500 | |
min_cargo_size = 3 | |
#End user input variables | |
data_low = query[query['avail_bikes_proportion'] <= low_bike_threshold] #find docks with low bike availability | |
data_high = query[query['avail_bikes_proportion'] >= high_bike_threshold] #find docks with high bike availability | |
data_low['deficit'] = round((low_bike_threshold - data_low['avail_bikes_proportion']) * data_low['tot_docks']).astype('int') #calculate the bike defecit for low availability docks | |
data_high['surplus'] = round((data_high['avail_bikes_proportion'] - high_bike_threshold) * data_high['tot_docks']).astype('int') #calculate the bike surplus for high availability docks | |
data_low = data_low.sort_values(by = 'deficit', ascending = False) #sort docks by deficit quantity | |
data_high = data_high.sort_values(by = 'surplus', ascending = False) #sort docks by surplus quantity | |
rebalancing_dict = {} #create empty dictionary to record rebalancing steps | |
low_copy = data_low.copy() #create copy of low availability dock data frame | |
high_copy = data_high.copy() #create copy of high availability dock data frame | |
bikes_rebalanced = 0 #initiate count of bikes rebalanced | |
for low in low_copy.index: #iterate through low availability docks | |
if low_copy.loc[low, 'deficit'] == 0: #if there is no deficit move to next dock | |
continue | |
for high in high_copy.index: #iterate through high availability docks | |
if high_copy.loc[high, 'surplus'] == 0: #if there is no surplus move to next dock | |
continue | |
if manhattan_distance(low_copy.loc[low, 'latitude'], low_copy.loc[low, 'longitude'], | |
high_copy.loc[high, 'latitude'], high_copy.loc[high, 'longitude']) < max_distance: #only move bikes between docks based on distance constraint | |
stations_key = (low_copy.loc[low, 'dock_id'], high_copy.loc[high, 'dock_id']) #record stations | |
change = min(low_copy.loc[low, 'deficit'], high_copy.loc[high, 'surplus']) # record bikes moved | |
low_copy.loc[low, 'deficit'] -= change | |
high_copy.loc[high, 'surplus'] -= change | |
bikes_rebalanced += change #update count of total bikes moved | |
if stations_key in rebalancing_dict.keys(): #update log of proposed bike movements | |
rebalancing_dict[stations_key] += change | |
else: | |
rebalancing_dict[stations_key] = change | |
if low_copy.loc[low, 'deficit'] == 0: | |
break | |
sorted_rebalancing = dict(sorted(rebalancing_dict.items(), key=lambda x: x[1], reverse = True)) #sort by quantity of bikes moved at each step | |
filtered_rebalancing = {key: value for key, value in sorted_rebalancing.items() if value >= min_cargo_size} # filter out proposed movements by min cargo size between each move | |
filtered_bikes_rebalanced = 0 | |
final_rebalancing_dict = {} | |
for k, v in filtered_rebalancing.items(): # sets a cap for maximum number of bikes moved each hour based on staffing capacity | |
if filtered_bikes_rebalanced < max_bikes_rebalanced: | |
final_rebalancing_dict[k] = v | |
filtered_bikes_rebalanced += v | |
rebalancing_df = pd.DataFrame(final_rebalancing_dict.items(), columns = ['dock_ids', 'num_bikes']) #create data frame to show all proposed rebalancing actions |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment