hjgoode3/rebalance.py Secret

## rebalance.py
#read in dataframe of projected bike availabilities
#full = pd.read_csv('')

#User inputs to select specific hour and date
month = 12
num_day = 2
hour = 23
#End user inputs

query = full[(full['month'] == month) & (full['num_day'] == num_day) & (full['hour'] == hour)] #filter by the specific data and time to analyze

#Create function to calculate distance, most accurate measure for NYC grid like street layout
def manhattan_distance(start_lat, start_lon, end_lat, end_lon):
    dist = distance((start_lat, start_lon), (start_lat, end_lon)).miles + \
           distance((end_lat, end_lon), (start_lat, end_lon)).miles
    return dist

#Define user input variables
low_bike_threshold = 1/3
high_bike_threshold = 2/3
max_distance = 2
max_bikes_rebalanced = 500
min_cargo_size = 3
#End user input variables

data_low = query[query['avail_bikes_proportion'] <= low_bike_threshold] #find docks with low bike availability
data_high = query[query['avail_bikes_proportion'] >= high_bike_threshold] #find docks with high bike availability

data_low['deficit'] = round((low_bike_threshold - data_low['avail_bikes_proportion']) * data_low['tot_docks']).astype('int') #calculate the bike defecit for low availability docks
data_high['surplus'] = round((data_high['avail_bikes_proportion'] - high_bike_threshold) * data_high['tot_docks']).astype('int') #calculate the bike surplus for high availability docks

data_low = data_low.sort_values(by = 'deficit', ascending = False) #sort docks by deficit quantity
data_high = data_high.sort_values(by = 'surplus', ascending = False) #sort docks by surplus quantity

rebalancing_dict = {} #create empty dictionary to record rebalancing steps

low_copy = data_low.copy() #create copy of low availability dock data frame
high_copy = data_high.copy() #create copy of high availability dock data frame

bikes_rebalanced = 0 #initiate count of bikes rebalanced
for low in low_copy.index: #iterate through low availability docks
    if low_copy.loc[low, 'deficit'] == 0: #if there is no deficit move to next dock
        continue
    for high in high_copy.index: #iterate through high availability docks
        if high_copy.loc[high, 'surplus'] == 0: #if there is no surplus move to next dock
            continue
        if manhattan_distance(low_copy.loc[low, 'latitude'], low_copy.loc[low, 'longitude'],
                              high_copy.loc[high, 'latitude'], high_copy.loc[high, 'longitude']) < max_distance: #only move bikes between docks based on distance constraint
            stations_key = (low_copy.loc[low, 'dock_id'], high_copy.loc[high, 'dock_id']) #record stations
            change = min(low_copy.loc[low, 'deficit'], high_copy.loc[high, 'surplus']) # record bikes moved
            low_copy.loc[low, 'deficit'] -= change
            high_copy.loc[high, 'surplus'] -= change
            bikes_rebalanced += change #update count of total bikes moved
            if stations_key in rebalancing_dict.keys(): #update log of proposed bike movements
                rebalancing_dict[stations_key] += change
            else:
                rebalancing_dict[stations_key] = change
            if low_copy.loc[low, 'deficit'] == 0:
                break

sorted_rebalancing = dict(sorted(rebalancing_dict.items(), key=lambda x: x[1], reverse = True)) #sort by quantity of bikes moved at each step

filtered_rebalancing = {key: value for key, value in sorted_rebalancing.items() if value >= min_cargo_size} # filter out proposed movements by min cargo size between each move

filtered_bikes_rebalanced = 0
final_rebalancing_dict = {}
for k, v in filtered_rebalancing.items(): # sets a cap for maximum number of bikes moved each hour based on staffing capacity
    if filtered_bikes_rebalanced < max_bikes_rebalanced:
        final_rebalancing_dict[k] = v
        filtered_bikes_rebalanced += v

rebalancing_df = pd.DataFrame(final_rebalancing_dict.items(), columns = ['dock_ids', 'num_bikes']) #create data frame to show all proposed rebalancing actions
	#read in dataframe of projected bike availabilities
	#full = pd.read_csv('')

	#User inputs to select specific hour and date
	month = 12
	num_day = 2
	hour = 23
	#End user inputs

	query = full[(full['month'] == month) & (full['num_day'] == num_day) & (full['hour'] == hour)] #filter by the specific data and time to analyze

	#Create function to calculate distance, most accurate measure for NYC grid like street layout
	def manhattan_distance(start_lat, start_lon, end_lat, end_lon):
	dist = distance((start_lat, start_lon), (start_lat, end_lon)).miles + \
	distance((end_lat, end_lon), (start_lat, end_lon)).miles
	return dist

	#Define user input variables
	low_bike_threshold = 1/3
	high_bike_threshold = 2/3
	max_distance = 2
	max_bikes_rebalanced = 500
	min_cargo_size = 3
	#End user input variables

	data_low = query[query['avail_bikes_proportion'] <= low_bike_threshold] #find docks with low bike availability
	data_high = query[query['avail_bikes_proportion'] >= high_bike_threshold] #find docks with high bike availability

	data_low['deficit'] = round((low_bike_threshold - data_low['avail_bikes_proportion']) * data_low['tot_docks']).astype('int') #calculate the bike defecit for low availability docks
	data_high['surplus'] = round((data_high['avail_bikes_proportion'] - high_bike_threshold) * data_high['tot_docks']).astype('int') #calculate the bike surplus for high availability docks

	data_low = data_low.sort_values(by = 'deficit', ascending = False) #sort docks by deficit quantity
	data_high = data_high.sort_values(by = 'surplus', ascending = False) #sort docks by surplus quantity

	rebalancing_dict = {} #create empty dictionary to record rebalancing steps

	low_copy = data_low.copy() #create copy of low availability dock data frame
	high_copy = data_high.copy() #create copy of high availability dock data frame

	bikes_rebalanced = 0 #initiate count of bikes rebalanced
	for low in low_copy.index: #iterate through low availability docks
	if low_copy.loc[low, 'deficit'] == 0: #if there is no deficit move to next dock
	continue
	for high in high_copy.index: #iterate through high availability docks
	if high_copy.loc[high, 'surplus'] == 0: #if there is no surplus move to next dock
	continue
	if manhattan_distance(low_copy.loc[low, 'latitude'], low_copy.loc[low, 'longitude'],
	high_copy.loc[high, 'latitude'], high_copy.loc[high, 'longitude']) < max_distance: #only move bikes between docks based on distance constraint
	stations_key = (low_copy.loc[low, 'dock_id'], high_copy.loc[high, 'dock_id']) #record stations
	change = min(low_copy.loc[low, 'deficit'], high_copy.loc[high, 'surplus']) # record bikes moved
	low_copy.loc[low, 'deficit'] -= change
	high_copy.loc[high, 'surplus'] -= change
	bikes_rebalanced += change #update count of total bikes moved
	if stations_key in rebalancing_dict.keys(): #update log of proposed bike movements
	rebalancing_dict[stations_key] += change
	else:
	rebalancing_dict[stations_key] = change
	if low_copy.loc[low, 'deficit'] == 0:
	break

	sorted_rebalancing = dict(sorted(rebalancing_dict.items(), key=lambda x: x[1], reverse = True)) #sort by quantity of bikes moved at each step

	filtered_rebalancing = {key: value for key, value in sorted_rebalancing.items() if value >= min_cargo_size} # filter out proposed movements by min cargo size between each move

	filtered_bikes_rebalanced = 0
	final_rebalancing_dict = {}
	for k, v in filtered_rebalancing.items(): # sets a cap for maximum number of bikes moved each hour based on staffing capacity
	if filtered_bikes_rebalanced < max_bikes_rebalanced:
	final_rebalancing_dict[k] = v
	filtered_bikes_rebalanced += v

	rebalancing_df = pd.DataFrame(final_rebalancing_dict.items(), columns = ['dock_ids', 'num_bikes']) #create data frame to show all proposed rebalancing actions