Skip to content

Instantly share code, notes, and snippets.

@saliksyed
Last active June 26, 2017 10:45
Show Gist options
  • Save saliksyed/f23df53e891116def34a96e1a7068130 to your computer and use it in GitHub Desktop.
Save saliksyed/f23df53e891116def34a96e1a7068130 to your computer and use it in GitHub Desktop.
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.pyplot as plt
# Let's read in the countries for each airport:
data = open("airports.dat", "r").readlines()
final_data = []
for row in data:
row_items = row.split(",") # split by the comma
data_dict = {}
data_dict["airport_country"] = row_items[3].decode('utf-8', 'ignore')
data_dict["altitude"] = float(row_items[8])
final_data.append(data_dict)
# now let's keep a count of each countries
counts = {}
for data_point in final_data:
country_name = data_point["airport_country"]
altitude = data_point["altitude"]
if not country_name in counts:
counts[country_name] = []
counts[country_name].append(altitude)
averages = {}
for country in counts:
averages[country] = np.average(counts[country])
fig, ax = plt.subplots()
#fig = the figure containing the visualization
# ax = the axes that are attached to the visualization
# Get the countries and their counts using hte map
countries = averages.iterkeys()
airport_counts = averages.itervalues()
#### HERE is the magic:
# I'll break it down step by step
# zip() takes two arrays and merges the elements
# so if you had zip(['a','b','c'], [1,2,3]) you would get a new array
# [('a',1), ('b', 2), ('c', 3)]
# pretty cool right?
# now we sort the zipped array using a lambda. The lambda tells the sort algorithm
# how it should choose the value to sort by (what the "key" is). in this case we want to sort by the airport count
# this airport count is the second element so we say key=lambda x : x[1]. Finally we specify reverse=True
# because we want the highest valued airports 1st. Finally we just pick the top 25!
final_data = sorted(zip(countries,airport_counts), key=lambda x : x[1], reverse=True)[:25]
# now we have the final data but it's in zipped format so we breka it back up into individual arrays:
countries = [x[0] for x in final_data]
airport_counts = [x[1] for x in final_data]
# "arange" returns an evenly spaced interval of the specified length
y_pos = np.arange(len(airport_counts))
# add bars evenly spaced according to y_pos. The length of the bar should be the count of the airports
ax.barh(y_pos, airport_counts, align='center',
color='green')
# add ticks to the axes
ax.set_yticks(y_pos)
# add a label to the axis with the name of each country
ax.set_yticklabels(countries)
ax.invert_yaxis() # labels read top-to-bottom
# set the x axis label
ax.set_xlabel('Average altitude')
# set the chart title
ax.set_title('Countries with the highest average altitude of airports')
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment