Last active
May 11, 2021 21:31
-
-
Save emordonez/de0a8019c830285f327bbd39a152cfa7 to your computer and use it in GitHub Desktop.
Example breakdown of PSA data on regions of the Philippines by GDP.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
if (!require("pacman")) install.packages("pacman") | |
pacman::p_load( | |
"dplyr", "ggplot2", "readr", "reticulate", | |
"sf", "showtext", "stringr" | |
) | |
# Utility function to save ggplots | |
save_image <- function(filename, plot, width = 7, height = 7) { | |
ggsave( | |
sprintf("%s.png", filename), | |
plot, | |
width = width, height = height, | |
units = "in", dpi = 96 | |
) | |
} | |
# ggplot theme and text settings | |
plot_settings <- list( | |
labs( | |
caption = "Data: Philippine Statistics Authority | github.com/emordonez" | |
), | |
theme_minimal(), | |
theme( | |
legend.position = "none", | |
plot.margin = margin(10, 10, 10, 10, "pt"), | |
text = element_text(family = "Open Sans"), | |
plot.title = element_text(face = "bold"), | |
plot.subtitle = element_text(face = "plain"), | |
plot.caption = element_text(face = "italic"), | |
axis.title.x = element_text(margin = margin(7, 0, 0, 0, "pt")), | |
axis.title.y = element_text(margin = margin(0, 7, 0, 0, "pt")) | |
) | |
) | |
font_add_google("Open Sans", "Open Sans") | |
showtext_auto(enable = TRUE) | |
# Read data | |
source_python("regional_data.py") | |
df <- format_data("./regional-gdp.csv", "./regional-population.csv") | |
# Compute regional shares of national aggregates | |
ph <- df %>% | |
group_by(year) %>% | |
dplyr::summarize( | |
national_gdp = sum(gdp_constant_php), national_pop = sum(population) | |
) %>% | |
mutate(national_gdp_pc = national_gdp / national_pop) | |
df <- merge(df, ph, by = "year") %>% | |
mutate( | |
pop_share = population / national_pop, | |
gdp_pc_ratio = gdp_per_capita / national_gdp_pc, | |
region = case_when( | |
# Numbered regions have region name in parentheses | |
startsWith(region, "Region") ~ region %>% | |
str_extract(., "\\([^()]+\\)") %>% | |
substring(2, nchar(.) - 1) %>% | |
str_to_title(), | |
region == "National Capital Region (NCR)" ~ "Metro Manila", | |
region == "Cordillera Administrative Region (CAR)" ~ "Cordillera Region", | |
region == "MIMAROPA Region" ~ "Mimaropa", | |
region == "Bangsamoro Autonomous Region in Muslim Mindanao (BARMM)" ~ "Bangsamoro" | |
) | |
) | |
#' | |
#' PLOT 1: Philippine regions ranked by real GDP per capita | |
#' | |
region_rankings <- df %>% | |
# Rankings in 2000, 2010, and 2020 | |
filter(year == 2000 | year == 2010 | year == 2020) %>% | |
select(year, region, gdp_per_capita) %>% | |
group_by(year) %>% | |
mutate(ranking = dense_rank(desc(gdp_per_capita))) %>% | |
ungroup() %>% | |
# Net movement up or down the rankings from 2000 to 2020 | |
group_by(region) %>% | |
mutate(trend = case_when( | |
ranking[year == 2020] < ranking[year == 2000] ~ "increase", | |
ranking[year == 2020] > ranking[year == 2000] ~ "decrease", | |
TRUE ~ "same" | |
)) %>% | |
ungroup() | |
rankings_2000 <- region_rankings %>% filter(year == 2000) | |
viz_region_rankings <- region_rankings %>% | |
ggplot(aes(x = year, y = ranking, group = region, color = trend)) + | |
geom_line() + | |
geom_point() + | |
scale_x_continuous(breaks = c(2000, 2010, 2020)) + | |
scale_y_reverse( | |
labels = rankings_2000$region, | |
breaks = rankings_2000$ranking, | |
sec.axis = dup_axis(labels = rankings_2000$ranking) | |
) + | |
scale_color_manual(values = c("red", "blue", "black")) + | |
labs( | |
title = "Philippine regions ranked by real GDP per capita", | |
subtitle = paste( | |
"The fastest growing regions over the past 20 years have been", | |
"\nNorthern Mindanao, Mimaropa, Western Visayas, and Caraga." | |
), | |
x = NULL, | |
y = NULL | |
) + | |
plot_settings | |
save_image("1-region-rankings", viz_region_rankings) | |
#' | |
#' PLOT 2: Regional GDP per capita relative to the national average | |
#' | |
# Using a 2011 PhilGIS shapefile | |
# 2011 region boundaries are coterminous with post-2018 region boundaries, | |
# but some regions are named differently | |
download.file( | |
"https://drive.google.com/uc?export=download&id=0B-JUh-O7Rg6YZmRwVEF5aWFsWVU", | |
destfile = "Regions.7z" | |
) | |
system("7z e -oRegions Regions.7z") | |
regions_shp <- read_sf("./Regions/", "Regions") | |
# Match region names | |
regions_shp <- regions_shp %>% | |
mutate( | |
REGION = case_when( | |
# Numbered regions end in " (Region ___)" | |
grepl("(Region", REGION, fixed = TRUE) ~ | |
gsub(" \\([^)]*\\)", "", REGION, perl = TRUE) %>% | |
str_to_title(), | |
REGION == "Metropolitan Manila" ~ "Metro Manila", | |
REGION == "Cordillera Administrative Region (CAR)" ~ "Cordillera Region", | |
REGION == "Autonomous Region of Muslim Mindanao (ARMM)" ~ "Bangsamoro" | |
) | |
) | |
gdp_pc_ratios_2020 <- df %>% | |
filter(year == 2020) %>% | |
select(region, gdp_pc_ratio, national_gdp_pc) %>% | |
merge(y = regions_shp, by.x = "region", by.y = "REGION") | |
gdp_pc_ratios_2020$ratio_bins <- cut( | |
gdp_pc_ratios_2020$gdp_pc_ratio, | |
breaks = c( | |
0, 0.5, 0.6, 0.7, 0.8, 0.9, 1, Inf | |
), | |
labels = c( | |
"< 50", "50-60", "60-70", "70-80", "80-90", "90-100", "> 100" | |
) | |
) | |
ph_gdp_pc_2020 <- tail(ph$national_gdp_pc, n = 1) | |
manila_ratio_2020 <- tail( | |
filter(gdp_pc_ratios_2020, region == "Metro Manila")$gdp_pc_ratio,n = 1 | |
) | |
northern_mindanao_ratio_2020 <- tail( | |
filter(gdp_pc_ratios_2020, region == "Northern Mindanao")$gdp_pc_ratio, | |
n = 1 | |
) | |
viz_gdp_pc_ratios_2020 <- ggplot(data = gdp_pc_ratios_2020, aes(geometry = geometry)) + | |
geom_sf(aes(fill = ratio_bins), color = "transparent", show.legend = "polygon") + | |
geom_sf(fill = "transparent", color = "black", size = 0.25) + | |
annotate( | |
"text", | |
x = 118.5, | |
y = 14.5, | |
label = sprintf( | |
"Metro Manila: %0.0f%%", | |
manila_ratio_2020 * 100 | |
), | |
size = 4 | |
) + | |
annotate( | |
"text", | |
x = 121, | |
y = 8.75, | |
label = sprintf( | |
"Northern Mindanao: %0.0f%%", | |
northern_mindanao_ratio_2020 * 100 | |
), | |
size = 4 | |
) + | |
scale_fill_manual( | |
# There are no values in what should be the fourth bin | |
values = c( | |
"#003f5c", "#374c80", "#7a5195", #"#bc5090", | |
"#ef5675", "#ff764a", "#ffa600" | |
), | |
name = "Percent of national average", | |
guide = guide_legend( | |
keyheight = unit(8, units = "mm"), | |
keywidth = unit(8, units = "mm"), | |
title.position = "top", | |
label.position = "right", | |
nrow = 8 | |
) | |
) + | |
labs( | |
title = "Regional GDP per capita relative to the national average", | |
subtitle = paste( | |
"The most productive regions per capita are those surrounding", | |
"Manila and \nDavao. National GDP per capita in 2020 was", | |
sprintf("%0.2f", ph_gdp_pc_2020), | |
"thousand pesos at \nconstant 2018 prices." | |
), | |
x = NULL, | |
y = NULL | |
) + | |
plot_settings + | |
theme( | |
legend.position = c(0, 0.975), | |
legend.justification = c(0, 1), | |
axis.text.x = element_blank(), | |
axis.text.y = element_blank() | |
) | |
save_image("2-regional-gdp-ratios-2020", viz_gdp_pc_ratios_2020, height = 10) | |
#' | |
#' PLOT 3: Real GDP per capita vs. share of national population | |
#' | |
ph_pop_shares_2020 <- df %>% | |
filter(year == 2020) %>% | |
select(region, gdp_per_capita, pop_share, national_gdp, national_pop) %>% | |
arrange(gdp_per_capita) %>% | |
# Cumulative share of population with hack to get the plot width right | |
mutate( | |
pop_share = pop_share * 100, | |
cum_share = cumsum(pop_share), | |
loc = cum_share - pop_share / 2 | |
) | |
# IMF's 2018 average exchange rate for PHP to USD | |
xtr <- 52.661 | |
# 2020 national aggregates | |
ph_gdp_2020 <- tail(ph$national_gdp, n = 1) | |
ph_pop_2020 <- tail(ph$national_pop, n = 1) | |
ph_gdp_pc_2020 <- tail(ph$national_gdp_pc, n = 1) | |
viz_ph_pop_shares_2020 <- ph_pop_shares_2020 %>% | |
ggplot(aes(x = loc, y = gdp_per_capita, width = pop_share)) + | |
geom_bar( | |
aes(fill = region), alpha = 0.75, | |
stat = "identity", position = "identity" | |
) + | |
geom_hline(yintercept = ph_gdp_pc_2020) + | |
scale_x_continuous( | |
name = "Cumulative share of population (%)", | |
breaks = seq(0, 100, 10), | |
labels = as.character(seq(0, 100, 10)), | |
sec.axis = sec_axis( | |
~ . * 1, | |
breaks = ph_pop_shares_2020$loc, | |
labels = ph_pop_shares_2020$region, | |
), | |
expand = c(0.01, 0.25) | |
) + | |
scale_y_continuous( | |
expand = c(0.01, 0.25) | |
) + | |
annotate( | |
"text", | |
x = 50, | |
y = 275, | |
label = sprintf("National GDP per capita, %0.0f", ph_gdp_pc_2020), | |
size = 4 | |
) + | |
labs( | |
title = "Real GDP per capita vs. share of national population", | |
subtitle = paste( | |
"The population of the Philippines in 2020 was", | |
sprintf("%0.2f million,", ph_pop_2020 / 1e6), | |
"and total GDP at \nconstant 2018 prices was", | |
sprintf("%0.2f trillion PHP", ph_gdp_2020 / 1e9), | |
sprintf("(%0.0f billion USD, 1 USD = %0.2f PHP).", | |
ph_gdp_2020 / 1e6 / xtr, xtr | |
) | |
), | |
x = NULL, | |
y = "Real GDP per capita, thousand PHP (2018)" | |
) + | |
plot_settings + | |
theme( | |
plot.caption = element_text(hjust = 0) | |
) + | |
coord_flip() | |
save_image("3-rgdp-vs-population", viz_ph_pop_shares_2020) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Region | At Constant 2018 Prices 2000 | At Constant 2018 Prices 2001 | At Constant 2018 Prices 2002 | At Constant 2018 Prices 2003 | At Constant 2018 Prices 2004 | At Constant 2018 Prices 2005 | At Constant 2018 Prices 2006 | At Constant 2018 Prices 2007 | At Constant 2018 Prices 2008 | At Constant 2018 Prices 2009 | At Constant 2018 Prices 2010 | At Constant 2018 Prices 2011 | At Constant 2018 Prices 2012 | At Constant 2018 Prices 2013 | At Constant 2018 Prices 2014 | At Constant 2018 Prices 2015 | At Constant 2018 Prices 2016 | At Constant 2018 Prices 2017 | At Constant 2018 Prices 2018 | At Constant 2018 Prices 2019 | At Constant 2018 Prices 2020 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
..National Capital Region (NCR) | 2416391870 | 2483504980 | 2507171644 | 2624052475 | 2841837836 | 2988546218 | 3157102605 | 3372129301 | 3524902698 | 3522558104 | 3745124533 | 3833040760 | 4072189800 | 4339858704 | 4576914911 | 4865073608 | 5216091453 | 5507681038 | 5814440130 | 6224134457 | 5596389427 | |
..Cordillera Administrative Region (CAR) | 143658283 | 149394641 | 156294714 | 164434850 | 173236667 | 176315299 | 182923099 | 196165501 | 203317240 | 207767713 | 220343710 | 221420475 | 221950621 | 236871362 | 248982342 | 259526063 | 266691916 | 291775205 | 308267122 | 321722276 | 289898072 | |
..Region I (Ilocos Region) | 240124758 | 245289940 | 252727880 | 264116263 | 277856799 | 290757292 | 307818091 | 326055249 | 335759337 | 335356395 | 364366356 | 378621382 | 398616600 | 432692177 | 461217885 | 486062080 | 525389666 | 554680762 | 587597251 | 630362667 | 581894468 | |
..Region II (Cagayan Valley) | 158218411 | 163979540 | 166495687 | 170783478 | 186059551 | 182540224 | 200542676 | 213644370 | 218872182 | 223636839 | 232748963 | 247587301 | 264876624 | 288512422 | 312814849 | 325728274 | 341540501 | 368250759 | 385061271 | 411513567 | 370865964 | |
..Region III (Central Luzon) | 707245644 | 749289527 | 791683392 | 826169061 | 852296755 | 885593565 | 927301530 | 981902705 | 1032623864 | 1039474095 | 1143777786 | 1233431610 | 1336963348 | 1417025690 | 1526621058 | 1619134911 | 1747565400 | 1929193858 | 2062393875 | 2183779631 | 1880093241 | |
..Region IV-A (CALABARZON) | 1050505077 | 1068135114 | 1120500727 | 1179350176 | 1244396888 | 1310160075 | 1369674210 | 1446188225 | 1497994676 | 1486472826 | 1638482343 | 1676719329 | 1809708699 | 1942476533 | 2054936917 | 2196313576 | 2346818115 | 2527658901 | 2706994745 | 2831599919 | 2535284422 | |
..MIMAROPA Region | 124948855 | 129576632 | 148590811 | 163145111 | 169206374 | 184832219 | 191998208 | 210149963 | 220607038 | 230687037 | 232547423 | 241561347 | 254595082 | 265363628 | 292033215 | 305952533 | 321170118 | 341505717 | 370744808 | 386783632 | 357386005 | |
..Region V (Bicol Region) | 176358450 | 181995859 | 189219301 | 199696474 | 210676947 | 222207767 | 229731835 | 246621525 | 259226365 | 277463581 | 293875751 | 304077556 | 334658891 | 368570786 | 390488251 | 431896619 | 462821105 | 488370132 | 522014835 | 564941774 | 517464559 | |
..Region VI (Western Visayas) | 323742052 | 331474894 | 340556420 | 360566385 | 388102456 | 407685036 | 430039527 | 458869067 | 480892689 | 507844955 | 531457374 | 564705465 | 605376449 | 631660677 | 665091892 | 715495909 | 758042789 | 820773095 | 860107768 | 913909365 | 825445426 | |
..Region VII (Central Visayas) | 382350583 | 398961448 | 406689373 | 424323970 | 453194307 | 485139298 | 507783275 | 549142789 | 577396656 | 594859094 | 666353769 | 713605729 | 779760030 | 841425750 | 903755199 | 958557189 | 1029641257 | 1102761503 | 1180945761 | 1254113393 | 1129843546 | |
..Region VIII (Eastern Visayas) | 197055277 | 201720341 | 207527497 | 219236477 | 231739076 | 245209038 | 261794426 | 270150926 | 283219881 | 294361211 | 311990058 | 320330763 | 312295545 | 331113701 | 331605886 | 352680877 | 402993768 | 415257479 | 444384029 | 469292504 | 433414325 | |
..Region IX (Zamboanga Peninsula) | 153478479 | 156632083 | 161152567 | 168261350 | 175819789 | 188931320 | 196123519 | 211043562 | 217350412 | 232017626 | 239770872 | 240130505 | 267034112 | 281049615 | 301747243 | 329285978 | 346005979 | 356798449 | 379428020 | 396878798 | 376048233 | |
..Region X (Northern Mindanao) | 254909002 | 272386379 | 299873812 | 319682061 | 346219338 | 364973391 | 389672303 | 418466127 | 441550494 | 456214263 | 484523975 | 513466854 | 551141548 | 583766826 | 633651974 | 670356960 | 723308958 | 766435831 | 821122015 | 867432424 | 822553826 | |
..Region XI (Davao Region) | 286212024 | 293475895 | 309524303 | 331450707 | 352703211 | 367442175 | 384620255 | 408318046 | 424207244 | 445109409 | 474508281 | 493082130 | 528097604 | 560762183 | 611602549 | 660489974 | 721550629 | 785051181 | 841429225 | 900885668 | 832093644 | |
..Region XII (SOCCSKSARGEN) | 175244994 | 176820019 | 194583624 | 206949168 | 218756257 | 223108764 | 237237885 | 252449432 | 264630696 | 269570793 | 278784645 | 292232263 | 315391176 | 343680815 | 364657867 | 377171442 | 394076962 | 424972061 | 454304549 | 469982193 | 450001744 | |
..Region XIII (Caraga) | 96921630 | 101609542 | 107557223 | 112632685 | 119223129 | 124555442 | 132729349 | 141280994 | 145216552 | 148064425 | 163468004 | 174932167 | 193102956 | 211269822 | 230076396 | 245363390 | 260522888 | 276299208 | 290561794 | 306260878 | 284138649 | |
..Bangsamoro Autonomous Region in Muslim Mindanao (BARMM) | 98017851 | 94136902 | 105745055 | 110826733 | 119752524 | 126327483 | 133711481 | 140661469 | 143109506 | 148174653 | 161737153 | 166414758 | 170707107 | 178542938 | 189848311 | 191818068 | 198444393 | 218512906 | 235393060 | 249157465 | 244418555 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Region | 2000 | 2001 | 2002 | 2003 | 2004 | 2005 | 2006 | 2007 | 2008 | 2009 | 2010 | 2011 | 2012 | 2013 | 2014 | 2015 | 2016 | 2017 | 2018 | 2019 | 2020 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
..National Capital Region (NCR) | 9961971 | 10153254 | 10344788 | 10536574 | 10729137 | 10921427 | 11113967 | 11306759 | 11500331 | 11693627 | 11887173 | 12080971 | 12275553 | 12469854 | 12664407 | 12859211 | 13066832 | 13264805 | 13453701 | 13633497 | 13804656 | |
..Cordillera Administrative Region (CAR) | 1369249 | 1397362 | 1424800 | 1451561 | 1477718 | 1503126 | 1527858 | 1551914 | 1575358 | 1598061 | 1620087 | 1641438 | 1662169 | 1682167 | 1701488 | 1720134 | 1739380 | 1757717 | 1775210 | 1791881 | 1807738 | |
..Region I (Ilocos Region) | 4209083 | 4265007 | 4320673 | 4376079 | 4431377 | 4486265 | 4540893 | 4595262 | 4649520 | 4703370 | 4756961 | 4810293 | 4863510 | 4916323 | 4968877 | 5021171 | 5076184 | 5128542 | 5178410 | 5225800 | 5270807 | |
..Region II (Cagayan Valley) | 2819641 | 2860861 | 2902169 | 2943564 | 2985160 | 3026730 | 3068387 | 3110132 | 3152079 | 3193998 | 3236006 | 3278100 | 3320398 | 3362667 | 3405024 | 3447468 | 3493662 | 3537703 | 3579715 | 3619689 | 3657741 | |
..Region III (Central Luzon) | 8233671 | 8420004 | 8607944 | 8797491 | 8989170 | 9181933 | 9376303 | 9572279 | 9770406 | 9969599 | 10170399 | 10372806 | 10577379 | 10783003 | 10990233 | 11199069 | 11437442 | 11667642 | 11890314 | 12105494 | 12313718 | |
..Region IV-A (CALABARZON) | 9367205 | 9687547 | 10009909 | 10334289 | 10661585 | 10990009 | 11320451 | 11652912 | 11988312 | 12324816 | 12663339 | 13003881 | 13347384 | 13691969 | 14038573 | 14387196 | 14741686 | 15085285 | 15418944 | 15742673 | 16057299 | |
..MIMAROPA Region | 2305919 | 2352183 | 2398060 | 2443548 | 2488772 | 2533484 | 2577808 | 2621744 | 2665411 | 2708570 | 2751341 | 2793725 | 2835835 | 2877441 | 2918660 | 2959491 | 3006430 | 3051342 | 3094357 | 3135503 | 3174859 | |
..Region V (Bicol Region) | 4698058 | 4772451 | 4846614 | 4920546 | 4994449 | 5067918 | 5141157 | 5214165 | 5287141 | 5359687 | 5432002 | 5504085 | 5576135 | 5647756 | 5719147 | 5790307 | 5865520 | 5937321 | 6005949 | 6071398 | 6133836 | |
..Region VI (Western Visayas) | 6224949 | 6317904 | 6409990 | 6501206 | 6591799 | 6681274 | 6769880 | 6857616 | 6944719 | 7030714 | 7115840 | 7200096 | 7283710 | 7366225 | 7447870 | 7528646 | 7610389 | 7688734 | 7763898 | 7835883 | 7904899 | |
..Region VII (Central Visayas) | 5723559 | 5830498 | 5937986 | 6046025 | 6154912 | 6264053 | 6373743 | 6483984 | 6595079 | 6706422 | 6818314 | 6930757 | 7044060 | 7157605 | 7271699 | 7386344 | 7511565 | 7631003 | 7745017 | 7853606 | 7957046 | |
..Region VIII (Eastern Visayas) | 3618043 | 3658304 | 3700580 | 3744872 | 3791309 | 3839638 | 3889981 | 3942341 | 3996867 | 4053263 | 4111674 | 4172100 | 4234716 | 4299179 | 4365658 | 4434152 | 4495990 | 4557614 | 4619183 | 4680701 | 4742337 | |
..Region IX (Zamboanga Peninsula) | 2840160 | 2906575 | 2970981 | 3033380 | 3093932 | 3152309 | 3208677 | 3263037 | 3315530 | 3365869 | 3414199 | 3460521 | 3504953 | 3547254 | 3587546 | 3625830 | 3660328 | 3693191 | 3724550 | 3754387 | 3782761 | |
..Region X (Northern Mindanao) | 3517631 | 3599472 | 3680709 | 3761342 | 3841588 | 3921010 | 3999827 | 4078040 | 4155859 | 4232861 | 4309258 | 4385050 | 4460443 | 4535025 | 4609002 | 4682374 | 4755673 | 4825658 | 4892512 | 4956259 | 5017051 | |
..Region XI (Davao Region) | 3688138 | 3766504 | 3845076 | 3923852 | 4003049 | 4082235 | 4161626 | 4241222 | 4321241 | 4401248 | 4481458 | 4561874 | 4642716 | 4723542 | 4804572 | 4885808 | 4967320 | 5048419 | 5129342 | 5210081 | 5290869 | |
..Region XII (SOCCSKSARGEN) | 2980515 | 3054507 | 3128080 | 3201234 | 3274168 | 3346482 | 3418378 | 3489855 | 3561107 | 3631744 | 3701963 | 3771762 | 3841332 | 3910293 | 3978834 | 4046956 | 4120287 | 4190004 | 4256317 | 4319226 | 4378871 | |
..Region XIII (Caraga) | 2100540 | 2134809 | 2168877 | 2202744 | 2236502 | 2269967 | 2303231 | 2336294 | 2369246 | 2401907 | 2434366 | 2466625 | 2498771 | 2530627 | 2562283 | 2593738 | 2627491 | 2660236 | 2692072 | 2723012 | 2753109 | |
..Bangsamoro Autonomous Region in Muslim Mindanao (BARMM) | 3064720 | 3096339 | 3134866 | 3180302 | 3232800 | 3292071 | 3358251 | 3431340 | 3511566 | 3598490 | 3692323 | 3793064 | 3901019 | 4015596 | 4137081 | 4265476 | 4354017 | 4444014 | 4535689 | 4629060 | 4724381 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
pd.options.mode.chained_assignment = None | |
def format_data(gdp_csv, population_csv): | |
def read_data(filename, indicator): | |
value_name = '' | |
if indicator == 'gdp': | |
value_name = 'gdp_constant_php' | |
elif indicator == 'pop': | |
value_name = 'population' | |
else: | |
return | |
df = pd.read_csv(filename) | |
df = pd.melt( | |
df, | |
id_vars='Region', | |
value_vars=df.columns[1:], | |
var_name='year', | |
value_name=value_name | |
) | |
df.rename(columns={'Region': 'region'}, inplace=True) | |
if indicator == 'gdp': | |
df['year'] = df.year.apply(lambda x: x[-4:]) | |
df['year'] = pd.to_numeric(df.year, downcast='integer') | |
return df | |
gdp = read_data(gdp_csv, 'gdp') | |
population = read_data(population_csv, 'pop') | |
df = gdp.merge(population, on=['region', 'year']) | |
df['region'] = df.region.apply(lambda x: x[2:]) | |
df['gdp_per_capita'] = df.gdp_constant_php / df.population | |
cols = ['gdp_constant_php', 'population', 'gdp_per_capita'] | |
growth_cols = ['gdp_growth', 'pop_growth', 'gdp_pc_growth'] | |
df_list = [] | |
for region in set(df.region): | |
region_df = df.loc[df.region == region] | |
for col, growth_col in zip(cols, growth_cols): | |
region_df[growth_col] = region_df[col].pct_change().fillna(0) | |
df_list.append(region_df) | |
return pd.concat(df_list, ignore_index=True) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment