Last active
June 26, 2017 10:45
-
-
Save saliksyed/f23df53e891116def34a96e1a7068130 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import matplotlib.pyplot as plt | |
import numpy as np | |
import matplotlib.pyplot as plt | |
# Let's read in the countries for each airport: | |
data = open("airports.dat", "r").readlines() | |
final_data = [] | |
for row in data: | |
row_items = row.split(",") # split by the comma | |
data_dict = {} | |
data_dict["airport_country"] = row_items[3].decode('utf-8', 'ignore') | |
data_dict["altitude"] = float(row_items[8]) | |
final_data.append(data_dict) | |
# now let's keep a count of each countries | |
counts = {} | |
for data_point in final_data: | |
country_name = data_point["airport_country"] | |
altitude = data_point["altitude"] | |
if not country_name in counts: | |
counts[country_name] = [] | |
counts[country_name].append(altitude) | |
averages = {} | |
for country in counts: | |
averages[country] = np.average(counts[country]) | |
fig, ax = plt.subplots() | |
#fig = the figure containing the visualization | |
# ax = the axes that are attached to the visualization | |
# Get the countries and their counts using hte map | |
countries = averages.iterkeys() | |
airport_counts = averages.itervalues() | |
#### HERE is the magic: | |
# I'll break it down step by step | |
# zip() takes two arrays and merges the elements | |
# so if you had zip(['a','b','c'], [1,2,3]) you would get a new array | |
# [('a',1), ('b', 2), ('c', 3)] | |
# pretty cool right? | |
# now we sort the zipped array using a lambda. The lambda tells the sort algorithm | |
# how it should choose the value to sort by (what the "key" is). in this case we want to sort by the airport count | |
# this airport count is the second element so we say key=lambda x : x[1]. Finally we specify reverse=True | |
# because we want the highest valued airports 1st. Finally we just pick the top 25! | |
final_data = sorted(zip(countries,airport_counts), key=lambda x : x[1], reverse=True)[:25] | |
# now we have the final data but it's in zipped format so we breka it back up into individual arrays: | |
countries = [x[0] for x in final_data] | |
airport_counts = [x[1] for x in final_data] | |
# "arange" returns an evenly spaced interval of the specified length | |
y_pos = np.arange(len(airport_counts)) | |
# add bars evenly spaced according to y_pos. The length of the bar should be the count of the airports | |
ax.barh(y_pos, airport_counts, align='center', | |
color='green') | |
# add ticks to the axes | |
ax.set_yticks(y_pos) | |
# add a label to the axis with the name of each country | |
ax.set_yticklabels(countries) | |
ax.invert_yaxis() # labels read top-to-bottom | |
# set the x axis label | |
ax.set_xlabel('Average altitude') | |
# set the chart title | |
ax.set_title('Countries with the highest average altitude of airports') | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment