Skip to content

Instantly share code, notes, and snippets.

@timriffe
Created April 7, 2022 18:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save timriffe/9bea7eb01158f04dc8ae445edd5e4af0 to your computer and use it in GitHub Desktop.
Save timriffe/9bea7eb01158f04dc8ae445edd5e4af0 to your computer and use it in GitHub Desktop.
A script to test splitting age-grouped census data using a long prior birth series
library(tidyverse)
library(HMDHFDplus)
library(ungroup)
getwd()
# B <- readRDS("Data/SWE_final.Rdata")
# Bt <- B %>%
# group_by(Year) %>%
# summarize(Births = sum(Births))
#
# SWEpop <- readHMDweb("SWE","Population5")
# SWEpop1 <- readHMDweb("SWE","Population")
#
# P1850 <-
# SWEpop %>%
# filter(Year == 1850) %>%
# pull(Total1)
#
# P18501 <-
# SWEpop1 %>%
# filter(Year == 1850) %>%
# pull(Total1)
# # last two age groups are 0
# P1850 <- P1850[P1850 >0]
#
# offsets <-
# Bt %>%
# filter(between(Year, 1749,1849)) %>%
# pull(Births) %>%
# rev()
# this is the 101 years from 1849 backward. Because 1849 births are age 0 on Jan 1
# 1850. We reverse so that cohorts align w ages.
# dput(offsets)
offsets <-c(114360.338149985, 104416.997456112, 100899.903011274, 101568.113977982,
105506.369020935, 106562.755925652, 101036.974515779, 102858.353638234,
97484.6688393768, 99919.4827809871, 92928.2263232973, 92121.7571193661,
96040.2377038759, 98531.9574217228, 99798.5273986025, 101752.213092052,
101635.125204392, 91101.3036005965, 89488.1992384203, 95684.9875271359,
100670.627637078, 96480.352342805, 89265.9556466956, 98282.2152210177,
101553.77599966, 94632.0936362788, 99355.2654649755, 95385.6735852167,
93065.2411373773, 85724.6251632838, 85107.6573534897, 86535.1045889448,
84636.1829615902, 88401.700247799, 85918.6924830507, 76361.6315370878,
72764.4236537387, 81773.3970849707, 85539.6501223113, 79562.617445194,
64872.7604431904, 74316.2884551781, 76562.3697572838, 75256.1706260714,
77168.5066749947, 77076.8054120232, 74783.4599866075, 75647.317174569,
69971.4571889974, 66919.4386018076, 74744.0619721645, 76813.7386033038,
80475.9798853536, 79004.4706976403, 72771.3992325007, 76344.5851403123,
76352.6277791657, 80927.199006093, 71793.9675928921, 66332.2317908893,
69979.4475639117, 73675.5125634678, 67988.9973205336, 70385.1153333111,
66368.2575029669, 67044.8966577169, 64854.528608772, 68374.3556187991,
70952.0070124208, 75473.317120037, 75777.7799663606, 70756.5432763295,
66636.6836267458, 66356.7829924891, 68669.5909881095, 68389.8824372759,
50910.1588980166, 58559.1455842469, 65644.4740573077, 67171.731993771,
66766.2280569444, 67225.4673631395, 70033.7865619517, 66579.7203192416,
65871.9355073894, 67746.6997841969, 67735.6058010992, 67682.6060483445,
66949.6702686209, 68383.5440028044, 63677.2082403174, 62975.8001953278,
61424.6718349747, 67848.70837249, 70007.7279908484, 68716.5662491235,
65955.3860178138, 64933.6406568207, 69266.4273548409, 64510.4861756017,
59482.946553662)
# dput(P1850)
P1850 <- c(96039, 337014.5, 364964.52, 335581.5, 337215.01, 311368, 296341,
248592.49, 210727.5, 179383, 168523.99, 154044, 131508.51, 104184,
72807.99, 50246.5, 26032.5, 11931.47, 3821.49, 469.84, 64.64,
3.55)
# dput(P18501)
P18501 <- c(96039, 88781.02, 83310.48, 81801.81, 83121.19, 80561.03, 73231.98,
71872.39, 68590.86, 70708.26, 67795.42, 64997.28, 65251.66, 66901.99,
70635.15, 70971.65, 70492.82, 66870.37, 63526.88, 65353.29, 65891.74,
63809.83, 58033.03, 60482.17, 63151.23, 60692.74, 61257.2, 59969,
59516.01, 54906.05, 50565.36, 50527.21, 49295.94, 50004.94, 48199.04,
44622.6, 40079.88, 42223.08, 42875.55, 40926.39, 35583.8, 34886.82,
37663.29, 36122.65, 35126.44, 33687.78, 33859.39, 35636.9, 33555.29,
31784.63, 32247.91, 31924.95, 31590.94, 30307.47, 27972.73, 27287.02,
27159.86, 27164.78, 25923.07, 23973.78, 23149.94, 22636.24, 20982.85,
19492.67, 17922.3, 16274.54, 15112, 14292.56, 13759.99, 13368.9,
12433.68, 11296.55, 10049.36, 8825.76, 7641.15, 6815.61, 5943.9,
4772.12, 4487, 4013.87, 3264.13, 2787.32, 2407.77, 1924.18, 1548.07,
1250.68, 959.67, 720.55, 539.58, 351.01, 177.14, 120.8, 82, 55.66,
34.24, 22.89, 15.02, 12.86, 9.41, 4.46, 1.55, 0, 0.51, 1.02,
0.47, 0, 0, 0, 0, 0, 0)
P18501[101] <- sum(P18501[101:111])
P18501 <- P18501[1:101]
# input age vector
x <- c(0, 1,seq(5, 100, by = 5))
age_int <- c(1,4,rep(5,18))
# split abridged ages using prior birth series as offsets
mod <- pclm(x=x, y = P1850, offset = offsets, nlast=1 )
plot(offsets)
P1850hat <- mod$fitted * offsets
# Well, seems to work, but it is sound?
plot(x,P1850 / age_int, type = 's')
lines(0:100, P1850hat, type = "s",col="red")
# stack next to the HMD single-age estimate, quite different
# Their approach takes single age structure from a future
# census and propagates it backward through the cohorts.
plot(0:100,P18501)
lines(0:100,P1850hat,type = "s")
# Seems consequential?
plot(P1850hat / P18501[1:101], log = 'y')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment