ikashnitsky/data-acquisition-one-two-three.R

## data-acquisition-one-two-three.R
################################################################################
#
# ikashnitsky.github.io 2017-12-27
# Data acquisition in R - Parts 1/4, 2/4, 3/4
# For Russian translation at https://habrahabr.ru/post/345664/
# Ilya Kashnitsky, ilya.kashnitsky@gmail.com
#
################################################################################

# load required packages
library(tidyverse)      # data manipulation and viz


# built-in ---------------------------------------------------------------------
# Swiss Fertility and Socioeconomic Indicators (1888) Data. Let's check the difference in fertility based of rurality and domination of Catholic population.

swiss %>%
        ggplot(aes(x = Agriculture, y = Fertility,
                   color = Catholic > 50))+
        geom_point()+
        stat_ellipse()+
        theme_minimal(base_family = "mono")

ggsave("swiss.png", width = 8, height = 5)


# gapminder --------------------------------------------------------------------

library(gapminder)

gapminder %>%
        ggplot(aes(x = year, y = lifeExp,
                   color = continent))+
        geom_jitter(size = 1, alpha = .2, width = .75)+
        stat_summary(geom = "path", fun.y = mean, size = 1)+
        theme_minimal(base_family = "mono")

ggsave("gapminder.png", width = 8, height = 5)


# URL --------------------------------------------------------------------------

library(tidyverse)

galton <- read_csv("https://raw.githubusercontent.com/vincentarelbundock/Rdatasets/master/csv/HistData/Galton.csv")

galton %>%
        ggplot(aes(x = father, y = height))+
        geom_point(alpha = .2)+
        stat_smooth(method = "lm")+
        theme_minimal(base_family = "mono")

ggsave("galton.png", width = 8, height = 5)


# UNZIP ------------------------------------------------------------------------

# create a directory for the unzipped data
ifelse(!dir.exists("unzipped"), dir.create("unzipped"), "Directory already exists")

# specify the URL of the archive
url_zip <- "http://www.nyc.gov/html/nypd/downloads/zip/analysis_and_planning/citywide_historical_crime_data_archive.zip"

# download, unzip and read data
f <- tempfile()
download.file(url_zip, destfile = f)
unzip(f, exdir = "unzipped/.")

# if we want to keep the .zip file
path_unzip <- "unzipped/data_archive.zip"
ifelse(!file.exists(path_unzip),
       download.file(url_zip, path_unzip, mode="wb"),
       'file alredy exists')
unzip(path_unzip, exdir = "unzipped/.")

library(readxl)

murder <- read_xls("unzipped/Web Data 2010-2011/Seven Major Felony Offenses 2000 - 2011.xls",
                   sheet = 1, range = "A5:M13") %>%
        filter(OFFENSE %>% substr(1, 6) == "MURDER") %>%
        gather("year", "value", 2:13) %>%
        mutate(year = year %>% as.numeric())

# plot
murder %>%
        ggplot(aes(year, value))+
        geom_point()+
        stat_smooth(method = "lm")+
        theme_minimal(base_family = "mono")+
        labs(title = "Murders in New York")

ggsave("new-york.png", width = 8, height = 5)


# Figshare ---------------------------------------------------------------------

library(rfigshare)

# find the dataset
# fs_search("ice hockey players") # not working

url <- fs_download(article_id = "3394735")

hockey <- read_csv(url)

hockey %>%
        ggplot(aes(x = year, y = height))+
        geom_jitter(size = 2, color = "#35978f", alpha = .1, width = .25)+
        stat_smooth(method = "lm", size = 1)+
        ylab("height, cm")+
        xlab("year of competition")+
        scale_x_continuous(breaks = seq(2005, 2015, 5), labels = seq(2005, 2015, 5))+
        theme_minimal(base_family = "mono")

ggsave("ice-hockey.png", width = 8, height = 5)


# Eurostat ---------------------------------------------------------------------

library(tidyverse)
library(eurostat)
library(lubridate)
library(viridis)

search_eurostat("life expectancy")

# download the selected dataset
e0 <- get_eurostat("demo_mlexpec")

e0 %>%
        filter(! sex == "T",
               age == "Y65",
               geo %in% c("DE", "FR", "IT", "RU", "ES", "UK")) %>%
        ggplot(aes(x = time %>% year(), y = values, color = sex))+
        geom_path()+
        facet_wrap(~ geo, ncol = 3)+
        labs(y = "Life expectancy at age 65", x = NULL)+
        theme_minimal(base_family = "mono")

ggsave("eurostat.png", width = 8, height = 5)


# World Bank -------------------------------------------------------------------
library(tidyverse)
library(wbstats)

# search for a dataset of interest
wbsearch("fertility") %>% View

# fetch the selected dataset
df_wb <- wb(indicator = "SH.MMR.RISK.ZS", startdate = 2000, enddate = 2015)

# have look at the data for one year
df_wb %>% filter(date == 2015) %>% View

df_wb %>%
        filter(iso2c %in% c("V4", "V1", "1W")) %>%
        ggplot(aes(x = date %>% as.numeric(), y = value, color = country))+
        geom_path(size = 1)+
        scale_color_brewer(NULL, palette = "Dark2")+
        labs(x = NULL, y = NULL, title = "Lifetime risk of maternal death (%)")+
        theme_minimal(base_family = "mono")+
        theme(panel.grid.minor = element_blank(),
              legend.position = c(.8, .9))

ggsave("worldbank.png", width = 8, height = 5)


# OECD -------------------------------------------------------------------------
library(tidyverse)
library(viridis)
library(OECD)

search_dataset("unemployment") %>% View

df_oecd <- get_dataset("AVD_DUR")
names(df_oecd) <- names(df_oecd) %>% tolower()

df_oecd %>%
        filter(country %in% c("EU16", "EU28", "USA"), sex == "MEN", ! age == "1524") %>%
        ggplot(aes(obstime, age, fill = obsvalue))+
        geom_tile()+
        scale_fill_viridis("Months", option = "B")+
        scale_x_discrete(breaks = seq(1970, 2015, 5) %>% paste)+
        facet_wrap(~ country, ncol = 1)+
        labs(x = NULL, y = "Age groups",
             title = "Average duration of unemployment in months, males")+
        theme_minimal(base_family = "mono")

ggsave("oecd.png", width = 8, height = 5)


# WID --------------------------------------------------------------------------
library(tidyverse)

#install.packages("devtools")
devtools::install_github("WIDworld/wid-r-tool")
library(wid)

?wid_series_type
?wid_concepts

df_wid <- download_wid(
        indicators = "shweal", # Shares of personal wealth
        areas = c("FR", "GB"), # In France an Italy
        perc = c("p90p100", "p99p100") # Top 1% and top 10%
)

df_wid %>%
        ggplot(aes(x = year, y = value, color = country)) +
        geom_path()+
        labs(title = "Top 1% and top 10% personal wealth shares in France and Great Britain",
             y = "top share")+
        facet_wrap(~ percentile)+
        theme_minimal(base_family = "mono")

ggsave("wid.png", width = 8, height = 5)


# HMD --------------------------------------------------------------------------

# load required packages
library(HMDHFDplus)
library(tidyverse)
library(purrr)

# help function to list the available countries
country <- getHMDcountries()

# remove optional populations
opt_pop <- c("FRACNP", "DEUTE", "DEUTW", "GBRCENW", "GBR_NP")
country <- country[!country %in% opt_pop]

# temporary function to download HMD data for a simgle county (dot = input)
tempf_get_hmd <- . %>% readHMDweb("Exposures_1x1", ik_user_hmd, ik_pass_hmd)

# download the data iteratively for all countries using purrr::map()
exposures <- country %>% map(tempf_get_hmd)

# data transformation to apply to each county dataframe
tempf_trans_data <- . %>%
        select(Year, Age, Female, Male) %>%
        filter(Year %in% 2012) %>%
        select(-Year) %>%
        transmute(age = Age, ratio = Male / Female * 100)

# perform transformation
df_hmd <- exposures %>%
        map(tempf_trans_data) %>%
        bind_rows(.id = "country")

# summarize all ages older than 90 (too jerky)
df_hmd_90 <- df_hmd %>%
        filter(age %in% 90:110) %>%
        group_by(country) %>%
        summarise(ratio = ratio %>% mean(na.rm = T)) %>%
        ungroup() %>%
        transmute(country, age = 90, ratio)

# insert summarized 90+
df_hmd_fin <- bind_rows(df_hmd %>% filter(!age %in% 90:110), df_hmd_90)

# finaly - plot
df_hmd_fin %>%
        ggplot(aes(age, ratio, color = country, group = country))+
        geom_hline(yintercept = 100, color = "grey50", size = 1)+
        geom_line(size = 1)+
        scale_y_continuous(limits = c(0, 120),
                           expand = c(0, 0),
                           breaks = seq(0, 120, 20))+
        scale_x_continuous(limits = c(0, 90),
                           expand = c(0, 0),
                           breaks = seq(0, 80, 20))+
        facet_wrap(~country, ncol = 6)+
        theme_minimal(base_family = "mono", base_size = 15)+
        theme(legend.position = "none",
              panel.border = element_rect(size = .5, fill = NA,
                                          color = "grey50"))+
        labs(x = "Age",
             y = "Sex ratio, males per 100 females",
             title = "Sex ratio in all countries from Human Mortality Database",
             subtitle = "HMD 2012, via HMDHFDplus by @timriffe1",
             caption = "ikashnitsky.github.io")

ggsave("hmd.png", width = 10, height = 12)


# wpp 2015 ---------------------------------------------------------------------
library(wpp2015)
library(tidyverse)
library(ggridges)
library(viridis)

# get the UN country names
data(UNlocations)

countries <- UNlocations %>% pull(name) %>% paste

# data on male life expectancy at birth
data(e0M)

e0M %>%
        filter(country %in% countries) %>%
        select(-last.observed) %>%
        gather(period, value, 3:15) %>%
        ggplot(aes(x = value, y = period %>% fct_rev()))+
        geom_density_ridges(aes(fill = period))+
        scale_fill_viridis(discrete = T, option = "B", direction = -1,
                           begin = .1, end = .9)+
        labs(x = "Male life expectancy at birth",
             y = "Period",
             title = "Global convergence in male life expectancy at birth since 1950",
             subtitle = "UNPD World Population Prospects 2015 Revision, via wpp2015",
             caption = "ikashnitsky.github.io")+
        theme_minimal(base_family =  "mono", base_size = 14)+
        theme(legend.position = "none")

ggsave("wpp2015.png", width = 10, height = 7)


# ESS (Jorge) -------------------------------------------------------------
library(ess)
library(tidyverse)

# help gunction to see the available countries
show_countries()

# check the available rounds for a selected country
show_country_rounds("Netherlands")

# get the full dataset of the last (8) round
df_ess <- ess_rounds(8, your_email =  ik_email)

# select a variable and calculate mean value
df_ess_select <- df_ess %>%
        bind_rows() %>%
        select(idno, cntry, trstplc) %>%
        group_by(cntry) %>%
        mutate(avg = trstplc %>% mean(na.rm = T)) %>%
        ungroup() %>%
        mutate(cntry = cntry %>% as_factor() %>% fct_reorder(avg))

df_ess_select %>%
        ggplot(aes(trstplc, fill = avg))+
        geom_histogram()+
        scale_x_continuous(limits = c(0, 11), breaks = seq(2, 10, 2))+
        scale_fill_gradient("Average\ntrust\nscore",
                            low = "black", high = "aquamarine")+
        facet_wrap(~cntry, ncol = 6)+
        theme_minimal(base_family = "mono")+
        labs(x = "Trust score [0 -- 10]",
             y = "# of respondents",
             title = "Trust in police",
             subtitle = "ESS wave 8 2017, via ess by @cimentadaj",
             caption = "ikashnitsky.github.io")

ggsave("ess.png", width = 8, height = 6)


# American Census data ----------------------------------------------------

library(tidycensus)
library(tidyverse)
library(viridis)
library(janitor)
library(sf)
# to use geom_sf we need the latest development version of ggplot2
devtools::install_github("tidyverse/ggplot2", "develop")
library(ggplot2)

# you need a personal API key, available free at
# https://api.census.gov/data/key_signup.html
# normally, this key is to be stored in .Renviron

# see state and county codes and names
fips_codes %>% View

# the available variables
load_variables(year = 2015, dataset = "acs5") %>% View

# data on median age of population in Chicago
df_acs <- get_acs(
        geography = "tract",
        county = "Cook County",
        state = "IL",
        variables = "B01002_001E",
        year = 2015,
        key = ik_api_acs,
        geometry = TRUE
) %>% clean_names()

# map the data
df_acs %>%
        ggplot()+
        geom_sf(aes(fill = estimate %>%
                            cut(breaks = seq(20, 60, 10))),
                color = NA)+
        scale_fill_viridis_d("Median age", begin = .4)+
        coord_sf(datum = NA)+
        theme_void(base_family =  "mono")+
        theme(legend.position = c(.15, .15))+
        labs(title = "Median age of population in Chicago\nby census tracts\n",
             subtitle = "ACS 2015, via tidycensus by @kyle_e_walker",
             caption = "ikashnitsky.github.io",
             x = NULL, y = NULL)

ggsave("tidycensus.png", width = 6, height = 6)
	################################################################################
	#
	# ikashnitsky.github.io 2017-12-27
	# Data acquisition in R - Parts 1/4, 2/4, 3/4
	# For Russian translation at https://habrahabr.ru/post/345664/
	# Ilya Kashnitsky, ilya.kashnitsky@gmail.com
	#
	################################################################################

	# load required packages
	library(tidyverse) # data manipulation and viz


	# built-in ---------------------------------------------------------------------
	# Swiss Fertility and Socioeconomic Indicators (1888) Data. Let's check the difference in fertility based of rurality and domination of Catholic population.

	swiss %>%
	ggplot(aes(x = Agriculture, y = Fertility,
	color = Catholic > 50))+
	geom_point()+
	stat_ellipse()+
	theme_minimal(base_family = "mono")

	ggsave("swiss.png", width = 8, height = 5)


	# gapminder --------------------------------------------------------------------

	library(gapminder)

	gapminder %>%
	ggplot(aes(x = year, y = lifeExp,
	color = continent))+
	geom_jitter(size = 1, alpha = .2, width = .75)+
	stat_summary(geom = "path", fun.y = mean, size = 1)+
	theme_minimal(base_family = "mono")

	ggsave("gapminder.png", width = 8, height = 5)


	# URL --------------------------------------------------------------------------

	library(tidyverse)

	galton <- read_csv("https://raw.githubusercontent.com/vincentarelbundock/Rdatasets/master/csv/HistData/Galton.csv")

	galton %>%
	ggplot(aes(x = father, y = height))+
	geom_point(alpha = .2)+
	stat_smooth(method = "lm")+
	theme_minimal(base_family = "mono")

	ggsave("galton.png", width = 8, height = 5)


	# UNZIP ------------------------------------------------------------------------

	# create a directory for the unzipped data
	ifelse(!dir.exists("unzipped"), dir.create("unzipped"), "Directory already exists")

	# specify the URL of the archive
	url_zip <- "http://www.nyc.gov/html/nypd/downloads/zip/analysis_and_planning/citywide_historical_crime_data_archive.zip"

	# download, unzip and read data
	f <- tempfile()
	download.file(url_zip, destfile = f)
	unzip(f, exdir = "unzipped/.")

	# if we want to keep the .zip file
	path_unzip <- "unzipped/data_archive.zip"
	ifelse(!file.exists(path_unzip),
	download.file(url_zip, path_unzip, mode="wb"),
	'file alredy exists')
	unzip(path_unzip, exdir = "unzipped/.")

	library(readxl)

	murder <- read_xls("unzipped/Web Data 2010-2011/Seven Major Felony Offenses 2000 - 2011.xls",
	sheet = 1, range = "A5:M13") %>%
	filter(OFFENSE %>% substr(1, 6) == "MURDER") %>%
	gather("year", "value", 2:13) %>%
	mutate(year = year %>% as.numeric())

	# plot
	murder %>%
	ggplot(aes(year, value))+
	geom_point()+
	stat_smooth(method = "lm")+
	theme_minimal(base_family = "mono")+
	labs(title = "Murders in New York")

	ggsave("new-york.png", width = 8, height = 5)


	# Figshare ---------------------------------------------------------------------

	library(rfigshare)

	# find the dataset
	# fs_search("ice hockey players") # not working

	url <- fs_download(article_id = "3394735")

	hockey <- read_csv(url)

	hockey %>%
	ggplot(aes(x = year, y = height))+
	geom_jitter(size = 2, color = "#35978f", alpha = .1, width = .25)+
	stat_smooth(method = "lm", size = 1)+
	ylab("height, cm")+
	xlab("year of competition")+
	scale_x_continuous(breaks = seq(2005, 2015, 5), labels = seq(2005, 2015, 5))+
	theme_minimal(base_family = "mono")

	ggsave("ice-hockey.png", width = 8, height = 5)


	# Eurostat ---------------------------------------------------------------------

	library(tidyverse)
	library(eurostat)
	library(lubridate)
	library(viridis)

	search_eurostat("life expectancy")

	# download the selected dataset
	e0 <- get_eurostat("demo_mlexpec")

	e0 %>%
	filter(! sex == "T",
	age == "Y65",
	geo %in% c("DE", "FR", "IT", "RU", "ES", "UK")) %>%
	ggplot(aes(x = time %>% year(), y = values, color = sex))+
	geom_path()+
	facet_wrap(~ geo, ncol = 3)+
	labs(y = "Life expectancy at age 65", x = NULL)+
	theme_minimal(base_family = "mono")

	ggsave("eurostat.png", width = 8, height = 5)


	# World Bank -------------------------------------------------------------------
	library(tidyverse)
	library(wbstats)

	# search for a dataset of interest
	wbsearch("fertility") %>% View

	# fetch the selected dataset
	df_wb <- wb(indicator = "SH.MMR.RISK.ZS", startdate = 2000, enddate = 2015)

	# have look at the data for one year
	df_wb %>% filter(date == 2015) %>% View

	df_wb %>%
	filter(iso2c %in% c("V4", "V1", "1W")) %>%
	ggplot(aes(x = date %>% as.numeric(), y = value, color = country))+
	geom_path(size = 1)+
	scale_color_brewer(NULL, palette = "Dark2")+
	labs(x = NULL, y = NULL, title = "Lifetime risk of maternal death (%)")+
	theme_minimal(base_family = "mono")+
	theme(panel.grid.minor = element_blank(),
	legend.position = c(.8, .9))

	ggsave("worldbank.png", width = 8, height = 5)


	# OECD -------------------------------------------------------------------------
	library(tidyverse)
	library(viridis)
	library(OECD)

	search_dataset("unemployment") %>% View

	df_oecd <- get_dataset("AVD_DUR")
	names(df_oecd) <- names(df_oecd) %>% tolower()

	df_oecd %>%
	filter(country %in% c("EU16", "EU28", "USA"), sex == "MEN", ! age == "1524") %>%
	ggplot(aes(obstime, age, fill = obsvalue))+
	geom_tile()+
	scale_fill_viridis("Months", option = "B")+
	scale_x_discrete(breaks = seq(1970, 2015, 5) %>% paste)+
	facet_wrap(~ country, ncol = 1)+
	labs(x = NULL, y = "Age groups",
	title = "Average duration of unemployment in months, males")+
	theme_minimal(base_family = "mono")

	ggsave("oecd.png", width = 8, height = 5)


	# WID --------------------------------------------------------------------------
	library(tidyverse)

	#install.packages("devtools")
	devtools::install_github("WIDworld/wid-r-tool")
	library(wid)

	?wid_series_type
	?wid_concepts

	df_wid <- download_wid(
	indicators = "shweal", # Shares of personal wealth
	areas = c("FR", "GB"), # In France an Italy
	perc = c("p90p100", "p99p100") # Top 1% and top 10%
	)

	df_wid %>%
	ggplot(aes(x = year, y = value, color = country)) +
	geom_path()+
	labs(title = "Top 1% and top 10% personal wealth shares in France and Great Britain",
	y = "top share")+
	facet_wrap(~ percentile)+
	theme_minimal(base_family = "mono")

	ggsave("wid.png", width = 8, height = 5)


	# HMD --------------------------------------------------------------------------

	# load required packages
	library(HMDHFDplus)
	library(tidyverse)
	library(purrr)

	# help function to list the available countries
	country <- getHMDcountries()

	# remove optional populations
	opt_pop <- c("FRACNP", "DEUTE", "DEUTW", "GBRCENW", "GBR_NP")
	country <- country[!country %in% opt_pop]

	# temporary function to download HMD data for a simgle county (dot = input)
	tempf_get_hmd <- . %>% readHMDweb("Exposures_1x1", ik_user_hmd, ik_pass_hmd)

	# download the data iteratively for all countries using purrr::map()
	exposures <- country %>% map(tempf_get_hmd)

	# data transformation to apply to each county dataframe
	tempf_trans_data <- . %>%
	select(Year, Age, Female, Male) %>%
	filter(Year %in% 2012) %>%
	select(-Year) %>%
	transmute(age = Age, ratio = Male / Female * 100)

	# perform transformation
	df_hmd <- exposures %>%
	map(tempf_trans_data) %>%
	bind_rows(.id = "country")

	# summarize all ages older than 90 (too jerky)
	df_hmd_90 <- df_hmd %>%
	filter(age %in% 90:110) %>%
	group_by(country) %>%
	summarise(ratio = ratio %>% mean(na.rm = T)) %>%
	ungroup() %>%
	transmute(country, age = 90, ratio)

	# insert summarized 90+
	df_hmd_fin <- bind_rows(df_hmd %>% filter(!age %in% 90:110), df_hmd_90)

	# finaly - plot
	df_hmd_fin %>%
	ggplot(aes(age, ratio, color = country, group = country))+
	geom_hline(yintercept = 100, color = "grey50", size = 1)+
	geom_line(size = 1)+
	scale_y_continuous(limits = c(0, 120),
	expand = c(0, 0),
	breaks = seq(0, 120, 20))+
	scale_x_continuous(limits = c(0, 90),
	expand = c(0, 0),
	breaks = seq(0, 80, 20))+
	facet_wrap(~country, ncol = 6)+
	theme_minimal(base_family = "mono", base_size = 15)+
	theme(legend.position = "none",
	panel.border = element_rect(size = .5, fill = NA,
	color = "grey50"))+
	labs(x = "Age",
	y = "Sex ratio, males per 100 females",
	title = "Sex ratio in all countries from Human Mortality Database",
	subtitle = "HMD 2012, via HMDHFDplus by @timriffe1",
	caption = "ikashnitsky.github.io")

	ggsave("hmd.png", width = 10, height = 12)


	# wpp 2015 ---------------------------------------------------------------------
	library(wpp2015)
	library(tidyverse)
	library(ggridges)
	library(viridis)

	# get the UN country names
	data(UNlocations)

	countries <- UNlocations %>% pull(name) %>% paste

	# data on male life expectancy at birth
	data(e0M)

	e0M %>%
	filter(country %in% countries) %>%
	select(-last.observed) %>%
	gather(period, value, 3:15) %>%
	ggplot(aes(x = value, y = period %>% fct_rev()))+
	geom_density_ridges(aes(fill = period))+
	scale_fill_viridis(discrete = T, option = "B", direction = -1,
	begin = .1, end = .9)+
	labs(x = "Male life expectancy at birth",
	y = "Period",
	title = "Global convergence in male life expectancy at birth since 1950",
	subtitle = "UNPD World Population Prospects 2015 Revision, via wpp2015",
	caption = "ikashnitsky.github.io")+
	theme_minimal(base_family = "mono", base_size = 14)+
	theme(legend.position = "none")

	ggsave("wpp2015.png", width = 10, height = 7)


	# ESS (Jorge) -------------------------------------------------------------
	library(ess)
	library(tidyverse)

	# help gunction to see the available countries
	show_countries()

	# check the available rounds for a selected country
	show_country_rounds("Netherlands")

	# get the full dataset of the last (8) round
	df_ess <- ess_rounds(8, your_email = ik_email)

	# select a variable and calculate mean value
	df_ess_select <- df_ess %>%
	bind_rows() %>%
	select(idno, cntry, trstplc) %>%
	group_by(cntry) %>%
	mutate(avg = trstplc %>% mean(na.rm = T)) %>%
	ungroup() %>%
	mutate(cntry = cntry %>% as_factor() %>% fct_reorder(avg))

	df_ess_select %>%
	ggplot(aes(trstplc, fill = avg))+
	geom_histogram()+
	scale_x_continuous(limits = c(0, 11), breaks = seq(2, 10, 2))+
	scale_fill_gradient("Average\ntrust\nscore",
	low = "black", high = "aquamarine")+
	facet_wrap(~cntry, ncol = 6)+
	theme_minimal(base_family = "mono")+
	labs(x = "Trust score [0 -- 10]",
	y = "# of respondents",
	title = "Trust in police",
	subtitle = "ESS wave 8 2017, via ess by @cimentadaj",
	caption = "ikashnitsky.github.io")

	ggsave("ess.png", width = 8, height = 6)


	# American Census data ----------------------------------------------------

	library(tidycensus)
	library(tidyverse)
	library(viridis)
	library(janitor)
	library(sf)
	# to use geom_sf we need the latest development version of ggplot2
	devtools::install_github("tidyverse/ggplot2", "develop")
	library(ggplot2)

	# you need a personal API key, available free at
	# https://api.census.gov/data/key_signup.html
	# normally, this key is to be stored in .Renviron

	# see state and county codes and names
	fips_codes %>% View

	# the available variables
	load_variables(year = 2015, dataset = "acs5") %>% View

	# data on median age of population in Chicago
	df_acs <- get_acs(
	geography = "tract",
	county = "Cook County",
	state = "IL",
	variables = "B01002_001E",
	year = 2015,
	key = ik_api_acs,
	geometry = TRUE
	) %>% clean_names()

	# map the data
	df_acs %>%
	ggplot()+
	geom_sf(aes(fill = estimate %>%
	cut(breaks = seq(20, 60, 10))),
	color = NA)+
	scale_fill_viridis_d("Median age", begin = .4)+
	coord_sf(datum = NA)+
	theme_void(base_family = "mono")+
	theme(legend.position = c(.15, .15))+
	labs(title = "Median age of population in Chicago\nby census tracts\n",
	subtitle = "ACS 2015, via tidycensus by @kyle_e_walker",
	caption = "ikashnitsky.github.io",
	x = NULL, y = NULL)

	ggsave("tidycensus.png", width = 6, height = 6)