Skip to content

Instantly share code, notes, and snippets.

@emordonez
Last active May 11, 2021 21:31
Show Gist options
  • Save emordonez/de0a8019c830285f327bbd39a152cfa7 to your computer and use it in GitHub Desktop.
Save emordonez/de0a8019c830285f327bbd39a152cfa7 to your computer and use it in GitHub Desktop.
Example breakdown of PSA data on regions of the Philippines by GDP.
if (!require("pacman")) install.packages("pacman")
pacman::p_load(
"dplyr", "ggplot2", "readr", "reticulate",
"sf", "showtext", "stringr"
)
# Utility function to save ggplots
save_image <- function(filename, plot, width = 7, height = 7) {
ggsave(
sprintf("%s.png", filename),
plot,
width = width, height = height,
units = "in", dpi = 96
)
}
# ggplot theme and text settings
plot_settings <- list(
labs(
caption = "Data: Philippine Statistics Authority | github.com/emordonez"
),
theme_minimal(),
theme(
legend.position = "none",
plot.margin = margin(10, 10, 10, 10, "pt"),
text = element_text(family = "Open Sans"),
plot.title = element_text(face = "bold"),
plot.subtitle = element_text(face = "plain"),
plot.caption = element_text(face = "italic"),
axis.title.x = element_text(margin = margin(7, 0, 0, 0, "pt")),
axis.title.y = element_text(margin = margin(0, 7, 0, 0, "pt"))
)
)
font_add_google("Open Sans", "Open Sans")
showtext_auto(enable = TRUE)
# Read data
source_python("regional_data.py")
df <- format_data("./regional-gdp.csv", "./regional-population.csv")
# Compute regional shares of national aggregates
ph <- df %>%
group_by(year) %>%
dplyr::summarize(
national_gdp = sum(gdp_constant_php), national_pop = sum(population)
) %>%
mutate(national_gdp_pc = national_gdp / national_pop)
df <- merge(df, ph, by = "year") %>%
mutate(
pop_share = population / national_pop,
gdp_pc_ratio = gdp_per_capita / national_gdp_pc,
region = case_when(
# Numbered regions have region name in parentheses
startsWith(region, "Region") ~ region %>%
str_extract(., "\\([^()]+\\)") %>%
substring(2, nchar(.) - 1) %>%
str_to_title(),
region == "National Capital Region (NCR)" ~ "Metro Manila",
region == "Cordillera Administrative Region (CAR)" ~ "Cordillera Region",
region == "MIMAROPA Region" ~ "Mimaropa",
region == "Bangsamoro Autonomous Region in Muslim Mindanao (BARMM)" ~ "Bangsamoro"
)
)
#'
#' PLOT 1: Philippine regions ranked by real GDP per capita
#'
region_rankings <- df %>%
# Rankings in 2000, 2010, and 2020
filter(year == 2000 | year == 2010 | year == 2020) %>%
select(year, region, gdp_per_capita) %>%
group_by(year) %>%
mutate(ranking = dense_rank(desc(gdp_per_capita))) %>%
ungroup() %>%
# Net movement up or down the rankings from 2000 to 2020
group_by(region) %>%
mutate(trend = case_when(
ranking[year == 2020] < ranking[year == 2000] ~ "increase",
ranking[year == 2020] > ranking[year == 2000] ~ "decrease",
TRUE ~ "same"
)) %>%
ungroup()
rankings_2000 <- region_rankings %>% filter(year == 2000)
viz_region_rankings <- region_rankings %>%
ggplot(aes(x = year, y = ranking, group = region, color = trend)) +
geom_line() +
geom_point() +
scale_x_continuous(breaks = c(2000, 2010, 2020)) +
scale_y_reverse(
labels = rankings_2000$region,
breaks = rankings_2000$ranking,
sec.axis = dup_axis(labels = rankings_2000$ranking)
) +
scale_color_manual(values = c("red", "blue", "black")) +
labs(
title = "Philippine regions ranked by real GDP per capita",
subtitle = paste(
"The fastest growing regions over the past 20 years have been",
"\nNorthern Mindanao, Mimaropa, Western Visayas, and Caraga."
),
x = NULL,
y = NULL
) +
plot_settings
save_image("1-region-rankings", viz_region_rankings)
#'
#' PLOT 2: Regional GDP per capita relative to the national average
#'
# Using a 2011 PhilGIS shapefile
# 2011 region boundaries are coterminous with post-2018 region boundaries,
# but some regions are named differently
download.file(
"https://drive.google.com/uc?export=download&id=0B-JUh-O7Rg6YZmRwVEF5aWFsWVU",
destfile = "Regions.7z"
)
system("7z e -oRegions Regions.7z")
regions_shp <- read_sf("./Regions/", "Regions")
# Match region names
regions_shp <- regions_shp %>%
mutate(
REGION = case_when(
# Numbered regions end in " (Region ___)"
grepl("(Region", REGION, fixed = TRUE) ~
gsub(" \\([^)]*\\)", "", REGION, perl = TRUE) %>%
str_to_title(),
REGION == "Metropolitan Manila" ~ "Metro Manila",
REGION == "Cordillera Administrative Region (CAR)" ~ "Cordillera Region",
REGION == "Autonomous Region of Muslim Mindanao (ARMM)" ~ "Bangsamoro"
)
)
gdp_pc_ratios_2020 <- df %>%
filter(year == 2020) %>%
select(region, gdp_pc_ratio, national_gdp_pc) %>%
merge(y = regions_shp, by.x = "region", by.y = "REGION")
gdp_pc_ratios_2020$ratio_bins <- cut(
gdp_pc_ratios_2020$gdp_pc_ratio,
breaks = c(
0, 0.5, 0.6, 0.7, 0.8, 0.9, 1, Inf
),
labels = c(
"< 50", "50-60", "60-70", "70-80", "80-90", "90-100", "> 100"
)
)
ph_gdp_pc_2020 <- tail(ph$national_gdp_pc, n = 1)
manila_ratio_2020 <- tail(
filter(gdp_pc_ratios_2020, region == "Metro Manila")$gdp_pc_ratio,n = 1
)
northern_mindanao_ratio_2020 <- tail(
filter(gdp_pc_ratios_2020, region == "Northern Mindanao")$gdp_pc_ratio,
n = 1
)
viz_gdp_pc_ratios_2020 <- ggplot(data = gdp_pc_ratios_2020, aes(geometry = geometry)) +
geom_sf(aes(fill = ratio_bins), color = "transparent", show.legend = "polygon") +
geom_sf(fill = "transparent", color = "black", size = 0.25) +
annotate(
"text",
x = 118.5,
y = 14.5,
label = sprintf(
"Metro Manila: %0.0f%%",
manila_ratio_2020 * 100
),
size = 4
) +
annotate(
"text",
x = 121,
y = 8.75,
label = sprintf(
"Northern Mindanao: %0.0f%%",
northern_mindanao_ratio_2020 * 100
),
size = 4
) +
scale_fill_manual(
# There are no values in what should be the fourth bin
values = c(
"#003f5c", "#374c80", "#7a5195", #"#bc5090",
"#ef5675", "#ff764a", "#ffa600"
),
name = "Percent of national average",
guide = guide_legend(
keyheight = unit(8, units = "mm"),
keywidth = unit(8, units = "mm"),
title.position = "top",
label.position = "right",
nrow = 8
)
) +
labs(
title = "Regional GDP per capita relative to the national average",
subtitle = paste(
"The most productive regions per capita are those surrounding",
"Manila and \nDavao. National GDP per capita in 2020 was",
sprintf("%0.2f", ph_gdp_pc_2020),
"thousand pesos at \nconstant 2018 prices."
),
x = NULL,
y = NULL
) +
plot_settings +
theme(
legend.position = c(0, 0.975),
legend.justification = c(0, 1),
axis.text.x = element_blank(),
axis.text.y = element_blank()
)
save_image("2-regional-gdp-ratios-2020", viz_gdp_pc_ratios_2020, height = 10)
#'
#' PLOT 3: Real GDP per capita vs. share of national population
#'
ph_pop_shares_2020 <- df %>%
filter(year == 2020) %>%
select(region, gdp_per_capita, pop_share, national_gdp, national_pop) %>%
arrange(gdp_per_capita) %>%
# Cumulative share of population with hack to get the plot width right
mutate(
pop_share = pop_share * 100,
cum_share = cumsum(pop_share),
loc = cum_share - pop_share / 2
)
# IMF's 2018 average exchange rate for PHP to USD
xtr <- 52.661
# 2020 national aggregates
ph_gdp_2020 <- tail(ph$national_gdp, n = 1)
ph_pop_2020 <- tail(ph$national_pop, n = 1)
ph_gdp_pc_2020 <- tail(ph$national_gdp_pc, n = 1)
viz_ph_pop_shares_2020 <- ph_pop_shares_2020 %>%
ggplot(aes(x = loc, y = gdp_per_capita, width = pop_share)) +
geom_bar(
aes(fill = region), alpha = 0.75,
stat = "identity", position = "identity"
) +
geom_hline(yintercept = ph_gdp_pc_2020) +
scale_x_continuous(
name = "Cumulative share of population (%)",
breaks = seq(0, 100, 10),
labels = as.character(seq(0, 100, 10)),
sec.axis = sec_axis(
~ . * 1,
breaks = ph_pop_shares_2020$loc,
labels = ph_pop_shares_2020$region,
),
expand = c(0.01, 0.25)
) +
scale_y_continuous(
expand = c(0.01, 0.25)
) +
annotate(
"text",
x = 50,
y = 275,
label = sprintf("National GDP per capita, %0.0f", ph_gdp_pc_2020),
size = 4
) +
labs(
title = "Real GDP per capita vs. share of national population",
subtitle = paste(
"The population of the Philippines in 2020 was",
sprintf("%0.2f million,", ph_pop_2020 / 1e6),
"and total GDP at \nconstant 2018 prices was",
sprintf("%0.2f trillion PHP", ph_gdp_2020 / 1e9),
sprintf("(%0.0f billion USD, 1 USD = %0.2f PHP).",
ph_gdp_2020 / 1e6 / xtr, xtr
)
),
x = NULL,
y = "Real GDP per capita, thousand PHP (2018)"
) +
plot_settings +
theme(
plot.caption = element_text(hjust = 0)
) +
coord_flip()
save_image("3-rgdp-vs-population", viz_ph_pop_shares_2020)
Region At Constant 2018 Prices 2000 At Constant 2018 Prices 2001 At Constant 2018 Prices 2002 At Constant 2018 Prices 2003 At Constant 2018 Prices 2004 At Constant 2018 Prices 2005 At Constant 2018 Prices 2006 At Constant 2018 Prices 2007 At Constant 2018 Prices 2008 At Constant 2018 Prices 2009 At Constant 2018 Prices 2010 At Constant 2018 Prices 2011 At Constant 2018 Prices 2012 At Constant 2018 Prices 2013 At Constant 2018 Prices 2014 At Constant 2018 Prices 2015 At Constant 2018 Prices 2016 At Constant 2018 Prices 2017 At Constant 2018 Prices 2018 At Constant 2018 Prices 2019 At Constant 2018 Prices 2020
..National Capital Region (NCR) 2416391870 2483504980 2507171644 2624052475 2841837836 2988546218 3157102605 3372129301 3524902698 3522558104 3745124533 3833040760 4072189800 4339858704 4576914911 4865073608 5216091453 5507681038 5814440130 6224134457 5596389427
..Cordillera Administrative Region (CAR) 143658283 149394641 156294714 164434850 173236667 176315299 182923099 196165501 203317240 207767713 220343710 221420475 221950621 236871362 248982342 259526063 266691916 291775205 308267122 321722276 289898072
..Region I (Ilocos Region) 240124758 245289940 252727880 264116263 277856799 290757292 307818091 326055249 335759337 335356395 364366356 378621382 398616600 432692177 461217885 486062080 525389666 554680762 587597251 630362667 581894468
..Region II (Cagayan Valley) 158218411 163979540 166495687 170783478 186059551 182540224 200542676 213644370 218872182 223636839 232748963 247587301 264876624 288512422 312814849 325728274 341540501 368250759 385061271 411513567 370865964
..Region III (Central Luzon) 707245644 749289527 791683392 826169061 852296755 885593565 927301530 981902705 1032623864 1039474095 1143777786 1233431610 1336963348 1417025690 1526621058 1619134911 1747565400 1929193858 2062393875 2183779631 1880093241
..Region IV-A (CALABARZON) 1050505077 1068135114 1120500727 1179350176 1244396888 1310160075 1369674210 1446188225 1497994676 1486472826 1638482343 1676719329 1809708699 1942476533 2054936917 2196313576 2346818115 2527658901 2706994745 2831599919 2535284422
..MIMAROPA Region 124948855 129576632 148590811 163145111 169206374 184832219 191998208 210149963 220607038 230687037 232547423 241561347 254595082 265363628 292033215 305952533 321170118 341505717 370744808 386783632 357386005
..Region V (Bicol Region) 176358450 181995859 189219301 199696474 210676947 222207767 229731835 246621525 259226365 277463581 293875751 304077556 334658891 368570786 390488251 431896619 462821105 488370132 522014835 564941774 517464559
..Region VI (Western Visayas) 323742052 331474894 340556420 360566385 388102456 407685036 430039527 458869067 480892689 507844955 531457374 564705465 605376449 631660677 665091892 715495909 758042789 820773095 860107768 913909365 825445426
..Region VII (Central Visayas) 382350583 398961448 406689373 424323970 453194307 485139298 507783275 549142789 577396656 594859094 666353769 713605729 779760030 841425750 903755199 958557189 1029641257 1102761503 1180945761 1254113393 1129843546
..Region VIII (Eastern Visayas) 197055277 201720341 207527497 219236477 231739076 245209038 261794426 270150926 283219881 294361211 311990058 320330763 312295545 331113701 331605886 352680877 402993768 415257479 444384029 469292504 433414325
..Region IX (Zamboanga Peninsula) 153478479 156632083 161152567 168261350 175819789 188931320 196123519 211043562 217350412 232017626 239770872 240130505 267034112 281049615 301747243 329285978 346005979 356798449 379428020 396878798 376048233
..Region X (Northern Mindanao) 254909002 272386379 299873812 319682061 346219338 364973391 389672303 418466127 441550494 456214263 484523975 513466854 551141548 583766826 633651974 670356960 723308958 766435831 821122015 867432424 822553826
..Region XI (Davao Region) 286212024 293475895 309524303 331450707 352703211 367442175 384620255 408318046 424207244 445109409 474508281 493082130 528097604 560762183 611602549 660489974 721550629 785051181 841429225 900885668 832093644
..Region XII (SOCCSKSARGEN) 175244994 176820019 194583624 206949168 218756257 223108764 237237885 252449432 264630696 269570793 278784645 292232263 315391176 343680815 364657867 377171442 394076962 424972061 454304549 469982193 450001744
..Region XIII (Caraga) 96921630 101609542 107557223 112632685 119223129 124555442 132729349 141280994 145216552 148064425 163468004 174932167 193102956 211269822 230076396 245363390 260522888 276299208 290561794 306260878 284138649
..Bangsamoro Autonomous Region in Muslim Mindanao (BARMM) 98017851 94136902 105745055 110826733 119752524 126327483 133711481 140661469 143109506 148174653 161737153 166414758 170707107 178542938 189848311 191818068 198444393 218512906 235393060 249157465 244418555
Region 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020
..National Capital Region (NCR) 9961971 10153254 10344788 10536574 10729137 10921427 11113967 11306759 11500331 11693627 11887173 12080971 12275553 12469854 12664407 12859211 13066832 13264805 13453701 13633497 13804656
..Cordillera Administrative Region (CAR) 1369249 1397362 1424800 1451561 1477718 1503126 1527858 1551914 1575358 1598061 1620087 1641438 1662169 1682167 1701488 1720134 1739380 1757717 1775210 1791881 1807738
..Region I (Ilocos Region) 4209083 4265007 4320673 4376079 4431377 4486265 4540893 4595262 4649520 4703370 4756961 4810293 4863510 4916323 4968877 5021171 5076184 5128542 5178410 5225800 5270807
..Region II (Cagayan Valley) 2819641 2860861 2902169 2943564 2985160 3026730 3068387 3110132 3152079 3193998 3236006 3278100 3320398 3362667 3405024 3447468 3493662 3537703 3579715 3619689 3657741
..Region III (Central Luzon) 8233671 8420004 8607944 8797491 8989170 9181933 9376303 9572279 9770406 9969599 10170399 10372806 10577379 10783003 10990233 11199069 11437442 11667642 11890314 12105494 12313718
..Region IV-A (CALABARZON) 9367205 9687547 10009909 10334289 10661585 10990009 11320451 11652912 11988312 12324816 12663339 13003881 13347384 13691969 14038573 14387196 14741686 15085285 15418944 15742673 16057299
..MIMAROPA Region 2305919 2352183 2398060 2443548 2488772 2533484 2577808 2621744 2665411 2708570 2751341 2793725 2835835 2877441 2918660 2959491 3006430 3051342 3094357 3135503 3174859
..Region V (Bicol Region) 4698058 4772451 4846614 4920546 4994449 5067918 5141157 5214165 5287141 5359687 5432002 5504085 5576135 5647756 5719147 5790307 5865520 5937321 6005949 6071398 6133836
..Region VI (Western Visayas) 6224949 6317904 6409990 6501206 6591799 6681274 6769880 6857616 6944719 7030714 7115840 7200096 7283710 7366225 7447870 7528646 7610389 7688734 7763898 7835883 7904899
..Region VII (Central Visayas) 5723559 5830498 5937986 6046025 6154912 6264053 6373743 6483984 6595079 6706422 6818314 6930757 7044060 7157605 7271699 7386344 7511565 7631003 7745017 7853606 7957046
..Region VIII (Eastern Visayas) 3618043 3658304 3700580 3744872 3791309 3839638 3889981 3942341 3996867 4053263 4111674 4172100 4234716 4299179 4365658 4434152 4495990 4557614 4619183 4680701 4742337
..Region IX (Zamboanga Peninsula) 2840160 2906575 2970981 3033380 3093932 3152309 3208677 3263037 3315530 3365869 3414199 3460521 3504953 3547254 3587546 3625830 3660328 3693191 3724550 3754387 3782761
..Region X (Northern Mindanao) 3517631 3599472 3680709 3761342 3841588 3921010 3999827 4078040 4155859 4232861 4309258 4385050 4460443 4535025 4609002 4682374 4755673 4825658 4892512 4956259 5017051
..Region XI (Davao Region) 3688138 3766504 3845076 3923852 4003049 4082235 4161626 4241222 4321241 4401248 4481458 4561874 4642716 4723542 4804572 4885808 4967320 5048419 5129342 5210081 5290869
..Region XII (SOCCSKSARGEN) 2980515 3054507 3128080 3201234 3274168 3346482 3418378 3489855 3561107 3631744 3701963 3771762 3841332 3910293 3978834 4046956 4120287 4190004 4256317 4319226 4378871
..Region XIII (Caraga) 2100540 2134809 2168877 2202744 2236502 2269967 2303231 2336294 2369246 2401907 2434366 2466625 2498771 2530627 2562283 2593738 2627491 2660236 2692072 2723012 2753109
..Bangsamoro Autonomous Region in Muslim Mindanao (BARMM) 3064720 3096339 3134866 3180302 3232800 3292071 3358251 3431340 3511566 3598490 3692323 3793064 3901019 4015596 4137081 4265476 4354017 4444014 4535689 4629060 4724381
import pandas as pd
pd.options.mode.chained_assignment = None
def format_data(gdp_csv, population_csv):
def read_data(filename, indicator):
value_name = ''
if indicator == 'gdp':
value_name = 'gdp_constant_php'
elif indicator == 'pop':
value_name = 'population'
else:
return
df = pd.read_csv(filename)
df = pd.melt(
df,
id_vars='Region',
value_vars=df.columns[1:],
var_name='year',
value_name=value_name
)
df.rename(columns={'Region': 'region'}, inplace=True)
if indicator == 'gdp':
df['year'] = df.year.apply(lambda x: x[-4:])
df['year'] = pd.to_numeric(df.year, downcast='integer')
return df
gdp = read_data(gdp_csv, 'gdp')
population = read_data(population_csv, 'pop')
df = gdp.merge(population, on=['region', 'year'])
df['region'] = df.region.apply(lambda x: x[2:])
df['gdp_per_capita'] = df.gdp_constant_php / df.population
cols = ['gdp_constant_php', 'population', 'gdp_per_capita']
growth_cols = ['gdp_growth', 'pop_growth', 'gdp_pc_growth']
df_list = []
for region in set(df.region):
region_df = df.loc[df.region == region]
for col, growth_col in zip(cols, growth_cols):
region_df[growth_col] = region_df[col].pct_change().fillna(0)
df_list.append(region_df)
return pd.concat(df_list, ignore_index=True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment