Population projections
# -*- coding: utf-8 -*- | |
""" | |
Created on Mon Aug 17 10:53:51 2015 | |
Population projections with low-discrepancy sequence based colour scheme. | |
http://www.storytellingwithdata.com/blog/2015/8/3/visualization-challenge-world-population-forecast | |
@author: naught101 | |
""" | |
import pandas as pd | |
import seaborn as sns | |
import pylab as plt | |
import sobol_seq as ss | |
import numpy as np | |
import matplotlib as mpl | |
def rank_argsort(array): | |
ranks = array.argsort().argsort() | |
return(ranks) | |
# Load country data | |
cntry_data_wide = pd.DataFrame.from_csv('./UN population forecasts from Economist charts - Country.csv') | |
cntry_data = pd.melt(cntry_data_wide, ['Variant', 'Region', 'Country', 'Notes', 'Country code'], | |
var_name='year', value_name='population',) | |
# Calulate means over time - for sorting later | |
cntry_means = cntry_data.ix[:,['Country','population']].groupby('Country').mean() | |
cntry_means = cntry_means.join(cntry_data_wide[['Country', 'Region']].set_index('Country')) | |
cntry_means['ranks'] = cntry_means.groupby('Region')['population'].apply(lambda x: len(x) - 1 - rank_argsort(x)) | |
rgn_means = cntry_data.ix[:,['Region', 'population']].groupby('Region').mean() | |
rgn_means['ranks'] = 5 - (rank_argsort(rgn_means['population'])) | |
# generate Sobol-sorted colours | |
hues = np.linspace(0, 1, rgn_means.shape[0] + 1, endpoint=False) | |
region_hsv = np.array([[hue, 0.8, 0.8] for hue in hues]) #[rgn_means['ranks'].values,:] | |
colour_perturbations = ss.i4_sobol_generate(3, cntry_means['ranks'].max() + 1) | |
colour_perturbations = np.apply_along_axis( | |
lambda x: (x - 0.5)*[0.1, 0.2, 0.2], 1, colour_perturbations) | |
rgn_means['colour'] = np.apply_along_axis(mpl.colors.rgb2hex, 1, | |
(mpl.colors.hsv_to_rgb( | |
region_hsv[rgn_means['ranks'], :]))) | |
cntry_means['colour'] = np.apply_along_axis(mpl.colors.rgb2hex, 1, | |
mpl.colors.hsv_to_rgb( | |
np.mod( | |
region_hsv[rgn_means.ix[cntry_means['Region'], 'ranks'], :] + | |
colour_perturbations[cntry_means.ranks, :], 1 | |
) | |
) | |
) | |
# There must be a better way to do this | |
cntry_data[['ranks','colours']] = cntry_data[['Country']].merge(cntry_means.reset_index())[['ranks', 'colour']] | |
cntry_data = cntry_data.sort(['Region', 'year', 'ranks']) | |
# Plot data | |
countries = list(cntry_means.sort(['Region', 'ranks']).index) | |
x = np.array(range(2015, 2101)) | |
y = cntry_data.ix[:,['Country','year','population']].pivot(index='year', columns='Country', values='population') | |
y = y.ix[:, countries]/1e6 | |
colours = cntry_means.sort(['Region', 'ranks'])['colour'] | |
fbk = {'lw': 0.0, 'edgecolor': None} | |
# Plot! | |
plt.stackplot(x, y.T, colors=colours, **fbk) | |
plt.xlim((2015,2100)) | |
plt.ylabel('Population - billions', fontsize=14) | |
plt.xlabel('Year', fontsize=14) | |
plt.title('UN population projections 2015-2100', fontsize=16, fontweight='bold') | |
patches = [mpl.patches.Patch(color=row[1]['colour'], label=row[0]) for row in rgn_means.sort(ascending=False).iterrows()] | |
plt.legend(handles=patches, loc='upper left') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment