Created
October 6, 2016 05:48
-
-
Save ivanku/da93b609046a83df1a04d2099680ddb0 to your computer and use it in GitHub Desktop.
Are People in Colder Countries Taller? Code for downloading data and creating a visualization of average human male height as a function of country average annual temperature.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using Requests | |
using DataFrames | |
using ExcelReaders | |
using Plotly | |
using Colors | |
# read population height dataset | |
heightsUrl = "http://www.ncdrisc.org/downloads/height/NCD_RisC_eLife_2016_height_age18_countries.csv"; | |
heights = get(heightsUrl; timeout = 30.0) | |
if heights.status != 200 | |
error("Error downloading heights data") | |
end | |
heightsData = readtable(IOBuffer(heights.data)); | |
# read temperatures dataset | |
tempsUrl = "http://databank.worldbank.org/data/download/catalog/cckp_historical_data_0.xls"; | |
tempsFile = download(tempsUrl); | |
tempsData = readxlsheet(DataFrame, tempsFile, "Country_temperatureCRU"); | |
# read region and income dataset | |
regionIncomeUrl = "http://databank.worldbank.org/data/download/site-content/CLASS.xls"; | |
regionIncomeFile = download(regionIncomeUrl); | |
regionIncomeData = readxl(DataFrame, regionIncomeFile, "List of economies!C7:G224", header=false); | |
# read population dataset | |
populationUrl = "http://api.worldbank.org/v2/en/indicator/SP.POP.TOTL?downloadformat=excel"; | |
populationFile = download(populationUrl); | |
populationData = readxl(DataFrame, populationFile, "Data!A4:BH268"); | |
# prepare data for joining | |
heightsData = heightsData[(heightsData[:Sex] .== "Men") & (heightsData[:Year_of_birth] .== maximum(heightsData[:Year_of_birth])), :]; | |
heightsData = heightsData[:, [:ISO, :Mean_height_cm_]]; | |
rename!(heightsData, Dict(:ISO => :country, :Mean_height_cm_ => :meanHeight)); | |
tempsData = tempsData[:, [:ISO_3DIGIT, :Annual_temp]]; | |
rename!(tempsData, Dict(:ISO_3DIGIT => :country, :Annual_temp => :temperature)); | |
regionIncomeData = regionIncomeData[:, [:x1, :x2, :x4, :x5]]; | |
rename!(regionIncomeData, Dict(:x1 => :countryName, :x2 => :country, :x4 => :region, :x5 => :wealth)); | |
populationData = populationData[:, [symbol("Country Code"), symbol("2015")]]; | |
rename!(populationData, Dict(symbol("2015") => :population, symbol("Country Code") => :country)); | |
# join data to get plottable data frame | |
df = join(populationData, heightsData, on = :country); | |
df = df[isna(df[:population]).==false,:]; | |
df = join(df, tempsData, on = :country); | |
df = join(df, regionIncomeData, on = :country); | |
# fix Argentina - should be upper-middle income | |
df[df[:country].=="ARG",:wealth] = "Upper middle income"; | |
# plot chart | |
layout = Layout(showlegend = true, | |
hovermode = "closest", | |
xaxis = Dict(:title => "Average Annual Temperature, C", :zeroline => false), | |
yaxis = Dict(:title => "Average Male Height, cm", :zeroline => false)); | |
df[:sizes] = map(x -> 10000 * x / maximum(df[:population]), df[:population]); | |
worldAvgTemp = mean(df[:temperature]); | |
worldAvgHeight = mean(df[:meanHeight]); | |
data = [ | |
scatter( | |
x = [-10, 30], | |
y = [worldAvgHeight, worldAvgHeight], | |
mode = "lines+text", | |
showlegend = false, | |
text = ["World Average", ""], | |
textposition = "bottom", | |
line = Dict(:color => "rgba(60, 60, 60, 0.5)", :width => 1)), | |
scatter( | |
x = [worldAvgTemp, worldAvgTemp], | |
y = [150, 190], | |
mode = "lines+text", | |
showlegend = false, | |
text = ["World Average", ""], | |
textposition = "bottom", | |
line = Dict(:color => "rgba(60, 60, 60, 0.5)", :width => 1)), | |
scatter( | |
x = [-10, -10, 30, 30], | |
y = [150, 190, 150, 190], | |
mode = "text", | |
showlegend = false, | |
text = ["COLDER & SHORTER", "COLDER & TALLER", "HOTTER & SHORTER", "HOTTER & TALLER"]) | |
]; | |
wealthColors = Dict(zip( | |
["High income", "Lower middle income", "Low income", "Upper middle income"], | |
[RGB(60/256, 186/256, 84/256), | |
RGB(244/256, 194/256, 13/256), | |
RGB(219/256, 50/256, 54/256), | |
RGB(72/256, 133/256, 237/256)] | |
)); | |
for wealth in unique(df[:wealth]) | |
wealthSegment = df[df[:wealth] .== wealth, :]; | |
push!(data, | |
scatter( | |
x = wealthSegment[:temperature], | |
y = wealthSegment[:meanHeight], | |
mode = "markers", | |
name = wealth, | |
marker = Dict(:color => wealthColors[wealth], :size => wealthSegment[:sizes], :sizemode => "area"), | |
text = wealthSegment[:countryName], | |
hoverinfo = "text" | |
) | |
); | |
end | |
# annotate some countries | |
for country in ["RUS", "CAN", "FIN", "USA", "CHN", "IND", "AFG", "AUS", "KOR", "NLD"] | |
d = df[df[:country] .== country, :]; | |
push!(data, | |
scatter( | |
x = d[:temperature], | |
y = d[:meanHeight], | |
mode = "text", | |
text = d[:countryName], | |
showlegend = false, | |
textposition = "right" | |
) | |
); | |
end | |
my_plot = plot(data, layout) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment