Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Are People in Colder Countries Taller? Code for downloading data and creating a visualization of average human male height as a function of country average annual temperature.
using Requests
using DataFrames
using ExcelReaders
using Plotly
using Colors
# read population height dataset
heightsUrl = "http://www.ncdrisc.org/downloads/height/NCD_RisC_eLife_2016_height_age18_countries.csv";
heights = get(heightsUrl; timeout = 30.0)
if heights.status != 200
error("Error downloading heights data")
end
heightsData = readtable(IOBuffer(heights.data));
# read temperatures dataset
tempsUrl = "http://databank.worldbank.org/data/download/catalog/cckp_historical_data_0.xls";
tempsFile = download(tempsUrl);
tempsData = readxlsheet(DataFrame, tempsFile, "Country_temperatureCRU");
# read region and income dataset
regionIncomeUrl = "http://databank.worldbank.org/data/download/site-content/CLASS.xls";
regionIncomeFile = download(regionIncomeUrl);
regionIncomeData = readxl(DataFrame, regionIncomeFile, "List of economies!C7:G224", header=false);
# read population dataset
populationUrl = "http://api.worldbank.org/v2/en/indicator/SP.POP.TOTL?downloadformat=excel";
populationFile = download(populationUrl);
populationData = readxl(DataFrame, populationFile, "Data!A4:BH268");
# prepare data for joining
heightsData = heightsData[(heightsData[:Sex] .== "Men") & (heightsData[:Year_of_birth] .== maximum(heightsData[:Year_of_birth])), :];
heightsData = heightsData[:, [:ISO, :Mean_height_cm_]];
rename!(heightsData, Dict(:ISO => :country, :Mean_height_cm_ => :meanHeight));
tempsData = tempsData[:, [:ISO_3DIGIT, :Annual_temp]];
rename!(tempsData, Dict(:ISO_3DIGIT => :country, :Annual_temp => :temperature));
regionIncomeData = regionIncomeData[:, [:x1, :x2, :x4, :x5]];
rename!(regionIncomeData, Dict(:x1 => :countryName, :x2 => :country, :x4 => :region, :x5 => :wealth));
populationData = populationData[:, [symbol("Country Code"), symbol("2015")]];
rename!(populationData, Dict(symbol("2015") => :population, symbol("Country Code") => :country));
# join data to get plottable data frame
df = join(populationData, heightsData, on = :country);
df = df[isna(df[:population]).==false,:];
df = join(df, tempsData, on = :country);
df = join(df, regionIncomeData, on = :country);
# fix Argentina - should be upper-middle income
df[df[:country].=="ARG",:wealth] = "Upper middle income";
# plot chart
layout = Layout(showlegend = true,
hovermode = "closest",
xaxis = Dict(:title => "Average Annual Temperature, C", :zeroline => false),
yaxis = Dict(:title => "Average Male Height, cm", :zeroline => false));
df[:sizes] = map(x -> 10000 * x / maximum(df[:population]), df[:population]);
worldAvgTemp = mean(df[:temperature]);
worldAvgHeight = mean(df[:meanHeight]);
data = [
scatter(
x = [-10, 30],
y = [worldAvgHeight, worldAvgHeight],
mode = "lines+text",
showlegend = false,
text = ["World Average", ""],
textposition = "bottom",
line = Dict(:color => "rgba(60, 60, 60, 0.5)", :width => 1)),
scatter(
x = [worldAvgTemp, worldAvgTemp],
y = [150, 190],
mode = "lines+text",
showlegend = false,
text = ["World Average", ""],
textposition = "bottom",
line = Dict(:color => "rgba(60, 60, 60, 0.5)", :width => 1)),
scatter(
x = [-10, -10, 30, 30],
y = [150, 190, 150, 190],
mode = "text",
showlegend = false,
text = ["COLDER & SHORTER", "COLDER & TALLER", "HOTTER & SHORTER", "HOTTER & TALLER"])
];
wealthColors = Dict(zip(
["High income", "Lower middle income", "Low income", "Upper middle income"],
[RGB(60/256, 186/256, 84/256),
RGB(244/256, 194/256, 13/256),
RGB(219/256, 50/256, 54/256),
RGB(72/256, 133/256, 237/256)]
));
for wealth in unique(df[:wealth])
wealthSegment = df[df[:wealth] .== wealth, :];
push!(data,
scatter(
x = wealthSegment[:temperature],
y = wealthSegment[:meanHeight],
mode = "markers",
name = wealth,
marker = Dict(:color => wealthColors[wealth], :size => wealthSegment[:sizes], :sizemode => "area"),
text = wealthSegment[:countryName],
hoverinfo = "text"
)
);
end
# annotate some countries
for country in ["RUS", "CAN", "FIN", "USA", "CHN", "IND", "AFG", "AUS", "KOR", "NLD"]
d = df[df[:country] .== country, :];
push!(data,
scatter(
x = d[:temperature],
y = d[:meanHeight],
mode = "text",
text = d[:countryName],
showlegend = false,
textposition = "right"
)
);
end
my_plot = plot(data, layout)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.
You signed in with another tab or window. Reload to refresh your session. You signed out in another tab or window. Reload to refresh your session.