Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Population projections
# -*- coding: utf-8 -*-
"""
Created on Mon Aug 17 10:53:51 2015
Population projections with low-discrepancy sequence based colour scheme.
http://www.storytellingwithdata.com/blog/2015/8/3/visualization-challenge-world-population-forecast
@author: naught101
"""
import pandas as pd
import seaborn as sns
import pylab as plt
import sobol_seq as ss
import numpy as np
import matplotlib as mpl
def rank_argsort(array):
ranks = array.argsort().argsort()
return(ranks)
# Load country data
cntry_data_wide = pd.DataFrame.from_csv('./UN population forecasts from Economist charts - Country.csv')
cntry_data = pd.melt(cntry_data_wide, ['Variant', 'Region', 'Country', 'Notes', 'Country code'],
var_name='year', value_name='population',)
# Calulate means over time - for sorting later
cntry_means = cntry_data.ix[:,['Country','population']].groupby('Country').mean()
cntry_means = cntry_means.join(cntry_data_wide[['Country', 'Region']].set_index('Country'))
cntry_means['ranks'] = cntry_means.groupby('Region')['population'].apply(lambda x: len(x) - 1 - rank_argsort(x))
rgn_means = cntry_data.ix[:,['Region', 'population']].groupby('Region').mean()
rgn_means['ranks'] = 5 - (rank_argsort(rgn_means['population']))
# generate Sobol-sorted colours
hues = np.linspace(0, 1, rgn_means.shape[0] + 1, endpoint=False)
region_hsv = np.array([[hue, 0.8, 0.8] for hue in hues]) #[rgn_means['ranks'].values,:]
colour_perturbations = ss.i4_sobol_generate(3, cntry_means['ranks'].max() + 1)
colour_perturbations = np.apply_along_axis(
lambda x: (x - 0.5)*[0.1, 0.2, 0.2], 1, colour_perturbations)
rgn_means['colour'] = np.apply_along_axis(mpl.colors.rgb2hex, 1,
(mpl.colors.hsv_to_rgb(
region_hsv[rgn_means['ranks'], :])))
cntry_means['colour'] = np.apply_along_axis(mpl.colors.rgb2hex, 1,
mpl.colors.hsv_to_rgb(
np.mod(
region_hsv[rgn_means.ix[cntry_means['Region'], 'ranks'], :] +
colour_perturbations[cntry_means.ranks, :], 1
)
)
)
# There must be a better way to do this
cntry_data[['ranks','colours']] = cntry_data[['Country']].merge(cntry_means.reset_index())[['ranks', 'colour']]
cntry_data = cntry_data.sort(['Region', 'year', 'ranks'])
# Plot data
countries = list(cntry_means.sort(['Region', 'ranks']).index)
x = np.array(range(2015, 2101))
y = cntry_data.ix[:,['Country','year','population']].pivot(index='year', columns='Country', values='population')
y = y.ix[:, countries]/1e6
colours = cntry_means.sort(['Region', 'ranks'])['colour']
fbk = {'lw': 0.0, 'edgecolor': None}
# Plot!
plt.stackplot(x, y.T, colors=colours, **fbk)
plt.xlim((2015,2100))
plt.ylabel('Population - billions', fontsize=14)
plt.xlabel('Year', fontsize=14)
plt.title('UN population projections 2015-2100', fontsize=16, fontweight='bold')
patches = [mpl.patches.Patch(color=row[1]['colour'], label=row[0]) for row in rgn_means.sort(ascending=False).iterrows()]
plt.legend(handles=patches, loc='upper left')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.