Skip to content

Instantly share code, notes, and snippets.

@iangow
Created January 13, 2024 16:39
Show Gist options
  • Save iangow/448027ad232cdba6031ebebbe6d45aeb to your computer and use it in GitHub Desktop.
Save iangow/448027ad232cdba6031ebebbe6d45aeb to your computer and use it in GitHub Desktop.
R version of SAS code from Dan Taylor
Sys.setenv(PGHOST= "wrds-pgdata.wharton.upenn.edu",
           PGPORT= 9737L,
           PGDATABASE = "wrds",
           PGUSER = "iangow")

library(dplyr, warn.conflicts = FALSE)
library(DBI)

db <- dbConnect(RPostgres::Postgres())

feed09 <- tbl(db, Id(schema = "audit", 
                     table = "feed09_nonreliance_restatements"))

restatements <-
  feed09 |>
  mutate(res_notif_key = as.integer(res_notif_key),
         cik = as.integer(company_fkey),
         acctg = as.logical(as.integer(res_accounting)),
         fraud = as.logical(as.integer(res_fraud)),
         other = as.logical(as.integer(res_other)),
         error = as.logical(as.integer(res_cler_err)),
         sec = as.logical(as.integer(res_sec_invest))) |>
  rename(start = res_begin_date,
         end = res_end_date,
         acctg_code = res_acc_res_fkey_list) |>
  filter(!is.na(start), !is.na(end), !is.na(cik)) |>
  mutate(neteffect = case_when(res_adverse == 1 ~ -1L,
                               res_improves == 1 ~ 1L,
                               .default = 0L),
         restate = TRUE,
         restatehlm = sec | fraud) |>
  select(cik, res_notif_key, acctg, fraud, other, error, neteffect,
         acctg_code, start, end, sec, restate, restatehlm, file_date) |>
  collect()
 
restatements
#> # A tibble: 21,073 × 14
#>      cik res_notif_key acctg fraud other error neteffect acctg_code start     
#>    <int>         <int> <lgl> <lgl> <lgl> <lgl>     <int> <chr>      <date>    
#>  1  1750         50022 TRUE  FALSE FALSE FALSE         1 |18|       2015-03-01
#>  2  1853         14562 TRUE  FALSE FALSE FALSE        -1 |4|        2006-01-01
#>  3  1853         17140 TRUE  FALSE FALSE FALSE        -1 |17|       2005-01-01
#>  4  1923         42582 TRUE  FALSE FALSE FALSE        -1 |36|       2008-11-01
#>  5  3202         28281 TRUE  FALSE FALSE FALSE        -1 |6|        2007-07-01
#>  6  1923         34814 TRUE  FALSE FALSE FALSE        -1 |18|       2008-05-01
#>  7  1961         28813 TRUE  FALSE FALSE FALSE        -1 |12|       2007-01-01
#>  8  1961         47438 TRUE  FALSE FALSE FALSE        -1 |4|17|     2013-01-01
#>  9  2034         57712 TRUE  FALSE TRUE  FALSE        -1 |6|14|     2014-07-01
#> 10  2034         59905 TRUE  FALSE TRUE  FALSE        -1 |18|       2017-07-01
#> # ℹ 21,063 more rows
#> # ℹ 5 more variables: end <date>, sec <lgl>, restate <lgl>, restatehlm <lgl>,
#> #   file_date <date>

restatements |> count()
#> # A tibble: 1 × 1
#>       n
#>   <int>
#> 1 21073
restatements |> count(neteffect)
#> # A tibble: 3 × 2
#>   neteffect     n
#>       <int> <int>
#> 1        -1 18093
#> 2         0    12
#> 3         1  2968
restatements |> count(restatehlm)
#> # A tibble: 2 × 2
#>   restatehlm     n
#>   <lgl>      <int>
#> 1 FALSE      19616
#> 2 TRUE        1457

Created on 2024-01-13 with reprex v2.0.2

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment