saliksyed/average_altitudes.py

## average_altitudes.py
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.pyplot as plt


# Let's read in the countries for each airport:

data = open("airports.dat", "r").readlines()

final_data = []

for row in data:
	row_items = row.split(",") # split by the comma
	data_dict = {}
	data_dict["airport_country"] = row_items[3].decode('utf-8', 'ignore')
	data_dict["altitude"] = float(row_items[8])
	final_data.append(data_dict)

# now let's keep a count of each countries

counts = {}
for data_point in final_data:
	country_name = data_point["airport_country"]
	altitude = data_point["altitude"]
	if not country_name in counts:
		counts[country_name] = []
	counts[country_name].append(altitude)

averages = {}
for country in counts:
	averages[country] = np.average(counts[country])


fig, ax = plt.subplots()

#fig = the figure containing the visualization
# ax = the axes that are attached to the visualization

# Get the countries and their counts using hte map
countries = averages.iterkeys()
airport_counts = averages.itervalues()

#### HERE is the magic:
# I'll break it down step by step
# zip() takes two arrays and merges the elements
# so if you had zip(['a','b','c'], [1,2,3]) you would get a new array
# [('a',1), ('b', 2), ('c', 3)]
# pretty cool right?
# now we sort the zipped array using a lambda. The lambda tells the sort algorithm
# how it should choose the value to sort by (what the "key" is). in this case we want to sort by the airport count
# this airport count is the second element so we say key=lambda x : x[1]. Finally we specify reverse=True
# because we want the highest valued airports 1st. Finally we just pick the top 25!

final_data = sorted(zip(countries,airport_counts), key=lambda x : x[1], reverse=True)[:25]

# now we have the final data but it's in zipped format so we breka it back up into individual arrays:
countries = [x[0] for x in final_data]
airport_counts =  [x[1] for x in final_data]


# "arange" returns an evenly spaced interval of the specified length
y_pos = np.arange(len(airport_counts))


# add bars evenly spaced according to y_pos. The length of the bar should be the count of the airports
ax.barh(y_pos, airport_counts,  align='center',
        color='green')

# add ticks to the axes
ax.set_yticks(y_pos)

# add a label to the axis with the name of each country
ax.set_yticklabels(countries)


ax.invert_yaxis()  # labels read top-to-bottom

# set the x axis label
ax.set_xlabel('Average altitude')

# set the chart title
ax.set_title('Countries with the highest average altitude of airports')

plt.show()
	import matplotlib.pyplot as plt
	import numpy as np
	import matplotlib.pyplot as plt


	# Let's read in the countries for each airport:

	data = open("airports.dat", "r").readlines()

	final_data = []

	for row in data:
	row_items = row.split(",") # split by the comma
	data_dict = {}
	data_dict["airport_country"] = row_items[3].decode('utf-8', 'ignore')
	data_dict["altitude"] = float(row_items[8])
	final_data.append(data_dict)

	# now let's keep a count of each countries

	counts = {}
	for data_point in final_data:
	country_name = data_point["airport_country"]
	altitude = data_point["altitude"]
	if not country_name in counts:
	counts[country_name] = []
	counts[country_name].append(altitude)

	averages = {}
	for country in counts:
	averages[country] = np.average(counts[country])


	fig, ax = plt.subplots()

	#fig = the figure containing the visualization
	# ax = the axes that are attached to the visualization

	# Get the countries and their counts using hte map
	countries = averages.iterkeys()
	airport_counts = averages.itervalues()

	#### HERE is the magic:
	# I'll break it down step by step
	# zip() takes two arrays and merges the elements
	# so if you had zip(['a','b','c'], [1,2,3]) you would get a new array
	# [('a',1), ('b', 2), ('c', 3)]
	# pretty cool right?
	# now we sort the zipped array using a lambda. The lambda tells the sort algorithm
	# how it should choose the value to sort by (what the "key" is). in this case we want to sort by the airport count
	# this airport count is the second element so we say key=lambda x : x[1]. Finally we specify reverse=True
	# because we want the highest valued airports 1st. Finally we just pick the top 25!

	final_data = sorted(zip(countries,airport_counts), key=lambda x : x[1], reverse=True)[:25]

	# now we have the final data but it's in zipped format so we breka it back up into individual arrays:
	countries = [x[0] for x in final_data]
	airport_counts = [x[1] for x in final_data]


	# "arange" returns an evenly spaced interval of the specified length
	y_pos = np.arange(len(airport_counts))


	# add bars evenly spaced according to y_pos. The length of the bar should be the count of the airports
	ax.barh(y_pos, airport_counts, align='center',
	color='green')

	# add ticks to the axes
	ax.set_yticks(y_pos)

	# add a label to the axis with the name of each country
	ax.set_yticklabels(countries)


	ax.invert_yaxis() # labels read top-to-bottom

	# set the x axis label
	ax.set_xlabel('Average altitude')

	# set the chart title
	ax.set_title('Countries with the highest average altitude of airports')

	plt.show()