Are People in Colder Countries Taller? Code for downloading data and creating a visualization of average human male height as a function of country average annual temperature.
using Requests | |
using DataFrames | |
using ExcelReaders | |
using Plotly | |
using Colors | |
# read population height dataset | |
heightsUrl = "http://www.ncdrisc.org/downloads/height/NCD_RisC_eLife_2016_height_age18_countries.csv"; | |
heights = get(heightsUrl; timeout = 30.0) | |
if heights.status != 200 | |
error("Error downloading heights data") | |
end | |
heightsData = readtable(IOBuffer(heights.data)); | |
# read temperatures dataset | |
tempsUrl = "http://databank.worldbank.org/data/download/catalog/cckp_historical_data_0.xls"; | |
tempsFile = download(tempsUrl); | |
tempsData = readxlsheet(DataFrame, tempsFile, "Country_temperatureCRU"); | |
# read region and income dataset | |
regionIncomeUrl = "http://databank.worldbank.org/data/download/site-content/CLASS.xls"; | |
regionIncomeFile = download(regionIncomeUrl); | |
regionIncomeData = readxl(DataFrame, regionIncomeFile, "List of economies!C7:G224", header=false); | |
# read population dataset | |
populationUrl = "http://api.worldbank.org/v2/en/indicator/SP.POP.TOTL?downloadformat=excel"; | |
populationFile = download(populationUrl); | |
populationData = readxl(DataFrame, populationFile, "Data!A4:BH268"); | |
# prepare data for joining | |
heightsData = heightsData[(heightsData[:Sex] .== "Men") & (heightsData[:Year_of_birth] .== maximum(heightsData[:Year_of_birth])), :]; | |
heightsData = heightsData[:, [:ISO, :Mean_height_cm_]]; | |
rename!(heightsData, Dict(:ISO => :country, :Mean_height_cm_ => :meanHeight)); | |
tempsData = tempsData[:, [:ISO_3DIGIT, :Annual_temp]]; | |
rename!(tempsData, Dict(:ISO_3DIGIT => :country, :Annual_temp => :temperature)); | |
regionIncomeData = regionIncomeData[:, [:x1, :x2, :x4, :x5]]; | |
rename!(regionIncomeData, Dict(:x1 => :countryName, :x2 => :country, :x4 => :region, :x5 => :wealth)); | |
populationData = populationData[:, [symbol("Country Code"), symbol("2015")]]; | |
rename!(populationData, Dict(symbol("2015") => :population, symbol("Country Code") => :country)); | |
# join data to get plottable data frame | |
df = join(populationData, heightsData, on = :country); | |
df = df[isna(df[:population]).==false,:]; | |
df = join(df, tempsData, on = :country); | |
df = join(df, regionIncomeData, on = :country); | |
# fix Argentina - should be upper-middle income | |
df[df[:country].=="ARG",:wealth] = "Upper middle income"; | |
# plot chart | |
layout = Layout(showlegend = true, | |
hovermode = "closest", | |
xaxis = Dict(:title => "Average Annual Temperature, C", :zeroline => false), | |
yaxis = Dict(:title => "Average Male Height, cm", :zeroline => false)); | |
df[:sizes] = map(x -> 10000 * x / maximum(df[:population]), df[:population]); | |
worldAvgTemp = mean(df[:temperature]); | |
worldAvgHeight = mean(df[:meanHeight]); | |
data = [ | |
scatter( | |
x = [-10, 30], | |
y = [worldAvgHeight, worldAvgHeight], | |
mode = "lines+text", | |
showlegend = false, | |
text = ["World Average", ""], | |
textposition = "bottom", | |
line = Dict(:color => "rgba(60, 60, 60, 0.5)", :width => 1)), | |
scatter( | |
x = [worldAvgTemp, worldAvgTemp], | |
y = [150, 190], | |
mode = "lines+text", | |
showlegend = false, | |
text = ["World Average", ""], | |
textposition = "bottom", | |
line = Dict(:color => "rgba(60, 60, 60, 0.5)", :width => 1)), | |
scatter( | |
x = [-10, -10, 30, 30], | |
y = [150, 190, 150, 190], | |
mode = "text", | |
showlegend = false, | |
text = ["COLDER & SHORTER", "COLDER & TALLER", "HOTTER & SHORTER", "HOTTER & TALLER"]) | |
]; | |
wealthColors = Dict(zip( | |
["High income", "Lower middle income", "Low income", "Upper middle income"], | |
[RGB(60/256, 186/256, 84/256), | |
RGB(244/256, 194/256, 13/256), | |
RGB(219/256, 50/256, 54/256), | |
RGB(72/256, 133/256, 237/256)] | |
)); | |
for wealth in unique(df[:wealth]) | |
wealthSegment = df[df[:wealth] .== wealth, :]; | |
push!(data, | |
scatter( | |
x = wealthSegment[:temperature], | |
y = wealthSegment[:meanHeight], | |
mode = "markers", | |
name = wealth, | |
marker = Dict(:color => wealthColors[wealth], :size => wealthSegment[:sizes], :sizemode => "area"), | |
text = wealthSegment[:countryName], | |
hoverinfo = "text" | |
) | |
); | |
end | |
# annotate some countries | |
for country in ["RUS", "CAN", "FIN", "USA", "CHN", "IND", "AFG", "AUS", "KOR", "NLD"] | |
d = df[df[:country] .== country, :]; | |
push!(data, | |
scatter( | |
x = d[:temperature], | |
y = d[:meanHeight], | |
mode = "text", | |
text = d[:countryName], | |
showlegend = false, | |
textposition = "right" | |
) | |
); | |
end | |
my_plot = plot(data, layout) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment