Skip to content

Instantly share code, notes, and snippets.

@MrN00b0t
Created May 11, 2020 13:54
Show Gist options
  • Save MrN00b0t/99f4478b3287489b186b24a676c67d3b to your computer and use it in GitHub Desktop.
Save MrN00b0t/99f4478b3287489b186b24a676c67d3b to your computer and use it in GitHub Desktop.
Codecademy: Roller Coaster
import codecademylib3_seaborn
import pandas as pd
import matplotlib.pyplot as plt
# load rankings data here:
wood_coaster = pd.read_csv('Golden_Ticket_Award_Winners_Wood.csv')
steel_coaster = pd.read_csv('Golden_Ticket_Award_Winners_Steel.csv')
#Examine datasets
print(wood_coaster.head(5))
print(steel_coaster.head(5))
#Enumerated columns: Rank, Name, Park, Location, Supplier, Year Built, Points, Year of Rank
#Determine number of entries in each dataset:
print(len(wood_coaster))
print(len(steel_coaster))
#Each dataset contains 180 entries
#Determine unique suppliers - start by performing UNION on both datasets creating a new index
coaster = pd.concat([wood_coaster, steel_coaster]).reset_index()
coaster.rename(columns={'index': 'type_index'}, inplace= True)
#Display number of unique entries in the Supplier column (=46)
print(len(coaster.Supplier.unique()))
#Do some years include more rankings than others? (10 each from 2013-2015, 50 each from 2016-2018)
by_year = coaster.groupby('Year of Rank').Rank.count().reset_index()
print(by_year)
#Adding some utility functions to search for specific parameters
def find_city(frame, city_state):
search = lambda x: city_state in x
return frame.loc[frame['Location'].apply(search)]
def find_park(frame, park):
search = lambda x: park in x
return frame.loc[frame['Park'].apply(search)]
def find_ride(frame, ride):
search = lambda x: ride in x
return frame.loc[frame['Name'].apply(search)]
#Test functions
print(find_city(coaster, 'Ohio'))
print(find_park(coaster, 'Kings Island'))
print(find_ride(coaster, 'Boulder'))
# write function to plot rankings over time for 1 roller coaster here:
#Added second argument 'park' for case where two parks have coaster with same name
def ranking_plot(ride, park):
ride_name = find_ride(coaster, ride)
ride_data = find_park(ride_name, park)
plt.plot(ride_data['Year of Rank'], ride_data['Rank'], marker='o')
plt.title('Ride Ranking Over Time for ' + ride)
plt.xlabel('Year')
plt.ylabel('Ranking')
ax = plt.subplot()
ax.invert_yaxis()
plt.show()
#Test ranking_plot
ranking_plot('El Toro', 'Six Flags')
plt.clf()
# write function to plot rankings over time for 2 roller coasters here:
def comparative_ranking(ride1, park1, ride2, park2):
ride1_name = find_ride(coaster, ride1)
ride1_data = find_park(ride1_name, park1)
ride2_name = find_ride(coaster, ride2)
ride2_data = find_park(ride2_name, park2)
plt.plot(ride1_data['Year of Rank'], ride1_data['Rank'], marker='o')
plt.plot(ride2_data['Year of Rank'], ride2_data['Rank'], marker='o')
plt.title('Ride Ranking Over Time')
plt.xlabel('Year')
plt.ylabel('Ranking')
ax = plt.subplot()
ax.invert_yaxis()
plt.legend([ride1 + '@' + park1, ride2 + '@' + park2])
plt.show()
#Test comparative_ranking
comparative_ranking('El Toro', 'Six Flags', 'Boulder Dash', 'Compounce')
plt.clf()
# write function to plot top n rankings over time here:
def top_ranked (n, frame):
top_rides = frame[frame['Rank'] <= n]
legend = []
plt.figure(figsize=(10,10))
plt.title('Top 5 Ride Rankings Over Time', y = 0)
plt.xlabel('Year')
plt.ylabel('Ranking')
for name in set(top_rides['Name']):
rankings = top_rides[top_rides['Name'] == name]
ax = plt.subplot()
ax.plot(rankings['Year of Rank'], rankings['Rank'], marker = 'o')
ax.set_yticks(range(1, 2*n-2))
ax.set_yticklabels(range(1, n+1))
ax.invert_yaxis()
ax.xaxis.tick_top()
ax.xaxis.set_label_position('top')
legend.append(name)
plt.legend(legend, loc=4)
plt.show()
#Test function
top_ranked(5, wood_coaster)
plt.clf()
# load roller coaster data here:
captain_coaster = pd.read_csv('roller_coasters.csv')
#Inspect data
print(captain_coaster.head())
#Enumerated Columns: name, material_type, seating_type, speed, height, length, num_inversions, manufacturer, park, status
# write function to plot histogram of column values here:
def histogram(frame, column):
plt.figure(figsize=[10,10])
clean_frame = frame.dropna()
plt.hist(clean_frame[column], bins=30, normed = True)
plt.title('Histogram of Ride ' + column + ' Distribution')
plt.ylabel('Frequency')
plt.xlabel(column)
plt.show()
#Test function histogram()
histogram(captain_coaster, 'speed')
plt.clf()
# write function to plot inversions by coaster at a park here:
def inversions(frame, park):
park_data = frame[frame['park'] == park]
park_data = park_data.dropna()
park_data = park_data.sort_values('num_inversions', ascending=False)
plt.figure(figsize=[10,10])
ax = plt.subplot()
plt.bar(range(len(park_data['name'])), park_data['num_inversions'])
plt.title('Number of Inversions by ride at ' + str(park_data['park'].unique()).strip('[]\''))
plt.ylabel('No. Of Inversions')
ax.set_xticks(range(len(park_data['name'])))
ax.set_xticklabels(park_data['name'].tolist(), rotation='vertical')
plt.show()
#Test function inversions()
inversions(captain_coaster, 'Six Flags Great Adventure')
plt.clf()
# write function to plot pie chart of operating status here:
def pie_chart(frame):
operating = frame[frame['status'] == 'status.operating']
closed = frame[frame['status'] == 'status.closed.definitely']
pie_plot = [len(operating), len(closed)]
plt.pie(pie_plot, autopct='%0.1f%%', labels= ['Operating', 'Closed'])
plt.axis('equal')
plt.title('Proportion of Operating vs Closed Roller Coasters')
plt.show()
#Test function pie_chart()
pie_chart(captain_coaster)
plt.clf()
# write function to create scatter plot of any two numeric columns here:
def scatter_plot(frame, col1, col2):
clean_frame = frame.dropna()
plt.scatter(clean_frame[col1], clean_frame[col2])
plt.title('Distribution of Number of Inversions vs Speed of Coaster')
plt.xlabel('No. Of Inversions')
plt.ylabel('Speed')
plt.show()
#Test function scatter_plot()
scatter_plot(captain_coaster, 'num_inversions', 'speed')
plt.clf()
#Most popular seating type
def display_seating(frame):
clean_frame = frame.dropna()
seat_type = clean_frame['seating_type'].unique()
pie_plot = []
for i in seat_type:
value = clean_frame[clean_frame['seating_type'] == i]
pie_plot.append(len(value))
plt.figure(figsize=[10,10])
plt.pie(pie_plot, autopct='%0.1f%%')
plt.axis('equal')
plt.title('Distribution of Seat Types')
plt.legend(seat_type)
plt.show()
#Test function display_seating:
display_seating(captain_coaster)
#Effect of seating on mean value of another parameter
def seat_means(frame, parameter):
clean_frame = frame.dropna()
seat_type = clean_frame['seating_type'].unique()
bar_plot = []
for i in seat_type:
by_seat = clean_frame[clean_frame['seating_type'] == i]
value = by_seat[parameter].median()
bar_plot.append(value)
plt.figure(figsize=[10,10])
ax = plt.subplot()
plt.bar(range(len(seat_type)), bar_plot)
ax.set_xticks(range(len(seat_type)))
ax.set_xticklabels(seat_type, rotation='vertical')
plt.ylabel('Median '+ parameter)
plt.title('Median speed of coasters against seating type')
plt.show()
#Test function seat_means()
seat_means(captain_coaster, 'speed')
plt.clf()
#Display manufacturer data compared to whole for numeric parameter
def manufacturer (frame, manufacturer, parameter):
clean_frame = frame.dropna()
search = lambda x: manufacturer in x
manu_data = clean_frame.loc[clean_frame['manufacturer'].apply(search)].reset_index()
plt.figure(figsize=[10,10])
plt.hist(manu_data[parameter], bins=20, normed = True, label = manufacturer + ' Rides')
plt.hist(clean_frame[parameter], bins=20, histtype='step', linewidth =2, normed = True, label = 'All Rides')
plt.title('Histogram of Ride ' + parameter + ' Distribution for '+ manufacturer + ' Rides')
plt.ylabel('Frequency')
plt.xlabel(parameter)
plt.legend()
plt.show()
#test function manufacturer()
manufacturer(captain_coaster, 'Vekoma', 'num_inversions')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment