Skip to content

Instantly share code, notes, and snippets.

@patrick-weiss
Last active February 14, 2024 22:12
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save patrick-weiss/3a05b3ab281563b2e94858451c2eb3a4 to your computer and use it in GitHub Desktop.
Save patrick-weiss/3a05b3ab281563b2e94858451c2eb3a4 to your computer and use it in GitHub Desktop.
R function to clean enhanced TRACE from "Tidy Finance with R"
clean_enhanced_trace <- function(cusips,
connection,
start_date = as.Date("2002-01-01"),
end_date = today()) {
# Packages (required)
library(dplyr)
library(lubridate)
library(dbplyr)
library(RPostgres)
# Function checks ---------------------------------------------------------
# Input parameters
## Cusips
if(length(cusips) == 0 | any(is.na(cusips))) stop("Check cusips.")
## Dates
if(!is.Date(start_date) | !is.Date(end_date)) stop("Dates needed")
if(start_date < as.Date("2002-01-01")) stop("TRACE starts later.")
if(end_date > today()) stop("TRACE does not predict the future.")
if(start_date >= end_date) stop("Date conflict.")
## Connection
if(!dbIsValid(connection)) stop("Connection issue.")
# Enhanced Trace ----------------------------------------------------------
# Main file
trace_all <- tbl(connection,
in_schema("trace", "trace_enhanced")) |>
filter(cusip_id %in% cusips) |>
filter(trd_exctn_dt >= start_date & trd_exctn_dt <= end_date) |>
select(cusip_id, msg_seq_nb, orig_msg_seq_nb,
entrd_vol_qt, rptd_pr, yld_pt, rpt_side_cd, cntra_mp_id,
trd_exctn_dt, trd_exctn_tm, trd_rpt_dt, trd_rpt_tm,
pr_trd_dt, trc_st, asof_cd, wis_fl,
days_to_sttl_ct, stlmnt_dt, spcl_trd_fl) |>
collect()
# Enhanced Trace: Post 06-02-2012 -----------------------------------------
# Trades (trc_st = T) and correction (trc_st = R)
trace_post_TR <- trace_all |>
filter((trc_st == "T" | trc_st == "R"),
trd_rpt_dt >= as.Date("2012-02-06"))
# Cancelations (trc_st = X) and correction cancelations (trc_st = C)
trace_post_XC <- trace_all |>
filter((trc_st == "X" | trc_st == "C"),
trd_rpt_dt >= as.Date("2012-02-06"))
# Cleaning corrected and cancelled trades
trace_post_TR <- trace_post_TR |>
anti_join(trace_post_XC,
by = c("cusip_id", "msg_seq_nb", "entrd_vol_qt",
"rptd_pr", "rpt_side_cd", "cntra_mp_id",
"trd_exctn_dt", "trd_exctn_tm"))
# Reversals (trc_st = Y)
trace_post_Y <- trace_all |>
filter(trc_st == "Y",
trd_rpt_dt >= as.Date("2012-02-06"))
# Clean reversals
## match the orig_msg_seq_nb of the Y-message to
## the msg_seq_nb of the main message
trace_post <- trace_post_TR |>
anti_join(trace_post_Y,
by = c("cusip_id", "msg_seq_nb" = "orig_msg_seq_nb",
"entrd_vol_qt", "rptd_pr", "rpt_side_cd",
"cntra_mp_id", "trd_exctn_dt", "trd_exctn_tm"))
# Enhanced TRACE: Pre 06-02-2012 ------------------------------------------
# Cancelations (trc_st = C)
trace_pre_C <- trace_all |>
filter(trc_st == "C",
trd_rpt_dt < as.Date("2012-02-06"))
# Trades w/o cancelations
## match the orig_msg_seq_nb of the C-message
## to the msg_seq_nb of the main message
trace_pre_T <- trace_all |>
filter(trc_st == "T",
trd_rpt_dt < as.Date("2012-02-06")) |>
anti_join(trace_pre_C,
by = c("cusip_id", "msg_seq_nb" = "orig_msg_seq_nb",
"entrd_vol_qt", "rptd_pr", "rpt_side_cd",
"cntra_mp_id", "trd_exctn_dt", "trd_exctn_tm"))
# Corrections (trc_st = W) - W can also correct a previous W
trace_pre_W <- trace_all |>
filter(trc_st == "W",
trd_rpt_dt < as.Date("2012-02-06"))
# Implement corrections in a loop
## Correction control
correction_control <- nrow(trace_pre_W)
correction_control_last <- nrow(trace_pre_W)
## Correction loop
while(correction_control > 0) {
# Corrections that correct some msg
trace_pre_W_correcting <- trace_pre_W |>
semi_join(trace_pre_T,
by = c("cusip_id", "trd_exctn_dt",
"orig_msg_seq_nb" = "msg_seq_nb"))
# Corrections that do not correct some msg
trace_pre_W <- trace_pre_W |>
anti_join(trace_pre_T,
by = c("cusip_id", "trd_exctn_dt",
"orig_msg_seq_nb" = "msg_seq_nb"))
# Delete msgs that are corrected and add correction msgs
trace_pre_T <- trace_pre_T |>
anti_join(trace_pre_W_correcting,
by = c("cusip_id", "trd_exctn_dt",
"msg_seq_nb" = "orig_msg_seq_nb")) |>
union_all(trace_pre_W_correcting)
# Escape if no corrections remain or they cannot be matched
correction_control <- nrow(trace_pre_W)
if(correction_control == correction_control_last) {
correction_control <- 0
}
correction_control_last <- nrow(trace_pre_W)
}
# Clean reversals
## Record reversals
trace_pre_R <- trace_pre_T |>
filter(asof_cd == 'R') |>
group_by(cusip_id, trd_exctn_dt, entrd_vol_qt,
rptd_pr, rpt_side_cd, cntra_mp_id) |>
arrange(trd_exctn_tm, trd_rpt_dt, trd_rpt_tm) |>
mutate(seq = row_number()) |>
ungroup()
## Remove reversals and the reversed trade
trace_pre <- trace_pre_T |>
filter(is.na(asof_cd) | !(asof_cd %in% c('R', 'X', 'D'))) |>
group_by(cusip_id, trd_exctn_dt, entrd_vol_qt,
rptd_pr, rpt_side_cd, cntra_mp_id) |>
arrange(trd_exctn_tm, trd_rpt_dt, trd_rpt_tm) |>
mutate(seq = row_number()) |>
ungroup() |>
anti_join(trace_pre_R,
by = c("cusip_id", "trd_exctn_dt", "entrd_vol_qt",
"rptd_pr", "rpt_side_cd", "cntra_mp_id", "seq")) |>
select(-seq)
# Agency trades -----------------------------------------------------------
# Combine pre and post trades
trace_clean <- trace_post |>
union_all(trace_pre)
# Keep angency sells and unmatched agency buys
## Agency sells
trace_agency_sells <- trace_clean |>
filter(cntra_mp_id == "D",
rpt_side_cd == "S")
# Agency buys that are unmatched
trace_agency_buys_filtered <- trace_clean |>
filter(cntra_mp_id == "D",
rpt_side_cd == "B") |>
anti_join(trace_agency_sells,
by = c("cusip_id", "trd_exctn_dt",
"entrd_vol_qt", "rptd_pr"))
# Agency clean
trace_clean <- trace_clean |>
filter(cntra_mp_id == "C") |>
union_all(trace_agency_sells) |>
union_all(trace_agency_buys_filtered)
# Additional Filters ------------------------------------------------------
trace_add_filters <- trace_clean |>
mutate(days_to_sttl_ct2 = stlmnt_dt - trd_exctn_dt) |>
filter(is.na(days_to_sttl_ct) | as.numeric(days_to_sttl_ct) <= 7,
is.na(days_to_sttl_ct2) | as.numeric(days_to_sttl_ct2) <= 7,
wis_fl == "N",
is.na(spcl_trd_fl) | spcl_trd_fl == "",
is.na(asof_cd) | asof_cd == "")
# Output ------------------------------------------------------------------
# Only keep necessary columns
trace_final <- trace_add_filters |>
arrange(cusip_id, trd_exctn_dt, trd_exctn_tm) |>
select(cusip_id, trd_exctn_dt, trd_exctn_tm,
rptd_pr, entrd_vol_qt, yld_pt, rpt_side_cd, cntra_mp_id) |>
mutate(trd_exctn_tm = format(as_datetime(trd_exctn_tm, tz = "America/New_York"), "%H:%M:%S"))
# Return
return(trace_final)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment