Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
Are People in Colder Countries Taller? Code for downloading data and creating a visualization of average human male height as a function of country average annual temperature.
using Requests
using DataFrames
using ExcelReaders
using Plotly
using Colors
# read population height dataset
heightsUrl = "http://www.ncdrisc.org/downloads/height/NCD_RisC_eLife_2016_height_age18_countries.csv";
heights = get(heightsUrl; timeout = 30.0)
if heights.status != 200
error("Error downloading heights data")
end
heightsData = readtable(IOBuffer(heights.data));
# read temperatures dataset
tempsUrl = "http://databank.worldbank.org/data/download/catalog/cckp_historical_data_0.xls";
tempsFile = download(tempsUrl);
tempsData = readxlsheet(DataFrame, tempsFile, "Country_temperatureCRU");
# read region and income dataset
regionIncomeUrl = "http://databank.worldbank.org/data/download/site-content/CLASS.xls";
regionIncomeFile = download(regionIncomeUrl);
regionIncomeData = readxl(DataFrame, regionIncomeFile, "List of economies!C7:G224", header=false);
# read population dataset
populationUrl = "http://api.worldbank.org/v2/en/indicator/SP.POP.TOTL?downloadformat=excel";
populationFile = download(populationUrl);
populationData = readxl(DataFrame, populationFile, "Data!A4:BH268");
# prepare data for joining
heightsData = heightsData[(heightsData[:Sex] .== "Men") & (heightsData[:Year_of_birth] .== maximum(heightsData[:Year_of_birth])), :];
heightsData = heightsData[:, [:ISO, :Mean_height_cm_]];
rename!(heightsData, Dict(:ISO => :country, :Mean_height_cm_ => :meanHeight));
tempsData = tempsData[:, [:ISO_3DIGIT, :Annual_temp]];
rename!(tempsData, Dict(:ISO_3DIGIT => :country, :Annual_temp => :temperature));
regionIncomeData = regionIncomeData[:, [:x1, :x2, :x4, :x5]];
rename!(regionIncomeData, Dict(:x1 => :countryName, :x2 => :country, :x4 => :region, :x5 => :wealth));
populationData = populationData[:, [symbol("Country Code"), symbol("2015")]];
rename!(populationData, Dict(symbol("2015") => :population, symbol("Country Code") => :country));
# join data to get plottable data frame
df = join(populationData, heightsData, on = :country);
df = df[isna(df[:population]).==false,:];
df = join(df, tempsData, on = :country);
df = join(df, regionIncomeData, on = :country);
# fix Argentina - should be upper-middle income
df[df[:country].=="ARG",:wealth] = "Upper middle income";
# plot chart
layout = Layout(showlegend = true,
hovermode = "closest",
xaxis = Dict(:title => "Average Annual Temperature, C", :zeroline => false),
yaxis = Dict(:title => "Average Male Height, cm", :zeroline => false));
df[:sizes] = map(x -> 10000 * x / maximum(df[:population]), df[:population]);
worldAvgTemp = mean(df[:temperature]);
worldAvgHeight = mean(df[:meanHeight]);
data = [
scatter(
x = [-10, 30],
y = [worldAvgHeight, worldAvgHeight],
mode = "lines+text",
showlegend = false,
text = ["World Average", ""],
textposition = "bottom",
line = Dict(:color => "rgba(60, 60, 60, 0.5)", :width => 1)),
scatter(
x = [worldAvgTemp, worldAvgTemp],
y = [150, 190],
mode = "lines+text",
showlegend = false,
text = ["World Average", ""],
textposition = "bottom",
line = Dict(:color => "rgba(60, 60, 60, 0.5)", :width => 1)),
scatter(
x = [-10, -10, 30, 30],
y = [150, 190, 150, 190],
mode = "text",
showlegend = false,
text = ["COLDER & SHORTER", "COLDER & TALLER", "HOTTER & SHORTER", "HOTTER & TALLER"])
];
wealthColors = Dict(zip(
["High income", "Lower middle income", "Low income", "Upper middle income"],
[RGB(60/256, 186/256, 84/256),
RGB(244/256, 194/256, 13/256),
RGB(219/256, 50/256, 54/256),
RGB(72/256, 133/256, 237/256)]
));
for wealth in unique(df[:wealth])
wealthSegment = df[df[:wealth] .== wealth, :];
push!(data,
scatter(
x = wealthSegment[:temperature],
y = wealthSegment[:meanHeight],
mode = "markers",
name = wealth,
marker = Dict(:color => wealthColors[wealth], :size => wealthSegment[:sizes], :sizemode => "area"),
text = wealthSegment[:countryName],
hoverinfo = "text"
)
);
end
# annotate some countries
for country in ["RUS", "CAN", "FIN", "USA", "CHN", "IND", "AFG", "AUS", "KOR", "NLD"]
d = df[df[:country] .== country, :];
push!(data,
scatter(
x = d[:temperature],
y = d[:meanHeight],
mode = "text",
text = d[:countryName],
showlegend = false,
textposition = "right"
)
);
end
my_plot = plot(data, layout)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment