library(dplyr, warn.conflicts = FALSE)
library(DBI)
pg <- dbConnect(RPostgres::Postgres())
ccmxpf_lnkhist <- tbl(pg, sql("SELECT * FROM crsp.ccmxpf_lnkhist"))
ccmxpf_lnkused <- tbl(pg, sql("SELECT * FROM crsp.ccmxpf_lnkused"))
ccmxpf_linktable <- tbl(pg, sql("SELECT * FROM crsp.ccmxpf_linktable"))
apermnos <-
ccmxpf_lnkhist %>%
filter(!is.na(lpermno)) %>%
select(gvkey, lpermno) %>%
rename(apermno = lpermno) %>%
distinct()
ccmxpf_lnkused_calc<-
ccmxpf_lnkhist %>%
inner_join(apermnos, by = "gvkey") %>%
mutate(usedflag = case_when(lpermno==apermno &
linktype %in% c("LC", "LS", "LU") ~ 1,
TRUE ~ -1)) %>%
rename(ugvkey = gvkey,
ulinkprim = linkprim,
upermno = lpermno,
upermco = lpermco,
uiid = liid, ulinktype = linktype,
ulinkdt = linkdt,
ulinkenddt = linkenddt)
# The field ulinkid seems to be of no value and impossible to reproduce
all_equal(ccmxpf_lnkused %>%
select(-ulinkid) %>%
collect(),
ccmxpf_lnkused_calc %>%
collect())
#> [1] TRUE
ccmxpf_linktable_calc <-
ccmxpf_lnkhist %>%
filter(!is.na(lpermno)) %>%
select(gvkey) %>%
distinct() %>%
inner_join(ccmxpf_lnkhist, by = "gvkey") %>%
mutate(lpermno = as.integer(lpermno),
lpermco = as.integer(lpermco)) %>%
mutate(usedflag = case_when(!is.na(lpermno) &
linktype %in% c("LC", "LS", "LU") ~ 1L,
TRUE ~ -1L))
all_equal(ccmxpf_linktable_calc %>%
collect(),
ccmxpf_linktable %>%
mutate(usedflag = as.integer(usedflag),
lpermco = as.integer(lpermco),
lpermno = as.integer(lpermno)) %>%
collect())
#> [1] TRUE
ccmxpf_lnkhist %>%
filter(linktype %in% c("LC", "LS", "LU"),
linkprim %in% c("P", "C")) %>%
count()
#> # Source: lazy query [?? x 1]
#> # Database: postgres [iangow@wrds-pgdata.wharton.upenn.edu:9737/wrds]
#> n
#> <int64>
#> 1 33798
ccmxpf_linktable %>%
filter(linktype %in% c("LC", "LS", "LU"),
linkprim %in% c("P", "C")) %>%
count()
#> # Source: lazy query [?? x 1]
#> # Database: postgres [iangow@wrds-pgdata.wharton.upenn.edu:9737/wrds]
#> n
#> <int64>
#> 1 33798
ccmxpf_lnkused %>%
filter(usedflag == 1,
ulinkprim %in% c("P", "C")) %>%
count()
#> # Source: lazy query [?? x 1]
#> # Database: postgres [iangow@wrds-pgdata.wharton.upenn.edu:9737/wrds]
#> n
#> <int64>
#> 1 33798
Created on 2021-02-12 by the reprex package (v0.3.0)