Skip to content

Instantly share code, notes, and snippets.

@hjgoode3
Last active December 28, 2021 17:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hjgoode3/06da27e571eba6a77947c239c62eaf8c to your computer and use it in GitHub Desktop.
Save hjgoode3/06da27e571eba6a77947c239c62eaf8c to your computer and use it in GitHub Desktop.
Script finds docks that have bike surplus and bike deficit and proposes steps for Citi BIke employees to rebalance the system
#read in dataframe of projected bike availabilities
#full = pd.read_csv('')
#User inputs to select specific hour and date
month = 12
num_day = 2
hour = 23
#End user inputs
query = full[(full['month'] == month) & (full['num_day'] == num_day) & (full['hour'] == hour)] #filter by the specific data and time to analyze
#Create function to calculate distance, most accurate measure for NYC grid like street layout
def manhattan_distance(start_lat, start_lon, end_lat, end_lon):
dist = distance((start_lat, start_lon), (start_lat, end_lon)).miles + \
distance((end_lat, end_lon), (start_lat, end_lon)).miles
return dist
#Define user input variables
low_bike_threshold = 1/3
high_bike_threshold = 2/3
max_distance = 2
max_bikes_rebalanced = 500
min_cargo_size = 3
#End user input variables
data_low = query[query['avail_bikes_proportion'] <= low_bike_threshold] #find docks with low bike availability
data_high = query[query['avail_bikes_proportion'] >= high_bike_threshold] #find docks with high bike availability
data_low['deficit'] = round((low_bike_threshold - data_low['avail_bikes_proportion']) * data_low['tot_docks']).astype('int') #calculate the bike defecit for low availability docks
data_high['surplus'] = round((data_high['avail_bikes_proportion'] - high_bike_threshold) * data_high['tot_docks']).astype('int') #calculate the bike surplus for high availability docks
data_low = data_low.sort_values(by = 'deficit', ascending = False) #sort docks by deficit quantity
data_high = data_high.sort_values(by = 'surplus', ascending = False) #sort docks by surplus quantity
rebalancing_dict = {} #create empty dictionary to record rebalancing steps
low_copy = data_low.copy() #create copy of low availability dock data frame
high_copy = data_high.copy() #create copy of high availability dock data frame
bikes_rebalanced = 0 #initiate count of bikes rebalanced
for low in low_copy.index: #iterate through low availability docks
if low_copy.loc[low, 'deficit'] == 0: #if there is no deficit move to next dock
continue
for high in high_copy.index: #iterate through high availability docks
if high_copy.loc[high, 'surplus'] == 0: #if there is no surplus move to next dock
continue
if manhattan_distance(low_copy.loc[low, 'latitude'], low_copy.loc[low, 'longitude'],
high_copy.loc[high, 'latitude'], high_copy.loc[high, 'longitude']) < max_distance: #only move bikes between docks based on distance constraint
stations_key = (low_copy.loc[low, 'dock_id'], high_copy.loc[high, 'dock_id']) #record stations
change = min(low_copy.loc[low, 'deficit'], high_copy.loc[high, 'surplus']) # record bikes moved
low_copy.loc[low, 'deficit'] -= change
high_copy.loc[high, 'surplus'] -= change
bikes_rebalanced += change #update count of total bikes moved
if stations_key in rebalancing_dict.keys(): #update log of proposed bike movements
rebalancing_dict[stations_key] += change
else:
rebalancing_dict[stations_key] = change
if low_copy.loc[low, 'deficit'] == 0:
break
sorted_rebalancing = dict(sorted(rebalancing_dict.items(), key=lambda x: x[1], reverse = True)) #sort by quantity of bikes moved at each step
filtered_rebalancing = {key: value for key, value in sorted_rebalancing.items() if value >= min_cargo_size} # filter out proposed movements by min cargo size between each move
filtered_bikes_rebalanced = 0
final_rebalancing_dict = {}
for k, v in filtered_rebalancing.items(): # sets a cap for maximum number of bikes moved each hour based on staffing capacity
if filtered_bikes_rebalanced < max_bikes_rebalanced:
final_rebalancing_dict[k] = v
filtered_bikes_rebalanced += v
rebalancing_df = pd.DataFrame(final_rebalancing_dict.items(), columns = ['dock_ids', 'num_bikes']) #create data frame to show all proposed rebalancing actions
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment