Transaction Fraud Detection Performance Evaluation
# Utility function to use within AzureML for calculating useful account-level performance metrics | |
# for transaction fraud detection models. Each metric is calculated as a function of the score-cutoff | |
# used. These include: | |
# | |
# Account Detection Rate (ADR): what fraction of the fraud accounts received a score above the | |
# threshold at some point after the fraud started | |
# Value Detection Rate (VDR): From the point in time that the fist transaction scored above | |
# the threshold, what fraction of the losses could have been prevented. | |
# | |
# This code specifically calculates the effect of delayed action: it will calculate VDR for various scenarios, | |
# each assuming a specified delay in taking action after the transaction took place. | |
# | |
# Expected data example: | |
# accountID,Timestamp,Amount,Label,Score | |
# Acct01,8/11/2016 12:44:46 PM,0,0,0.009517137 | |
# Acct01,8/11/2016 2:56:51 PM,10,1,0.0080635994 | |
# Acct01,8/11/2016 4:02:01 PM,12.5,1,0.00806359 | |
# Acct02,8/11/2016 4:05:11 PM,2090,0,0.00806359 | |
# Acct02,9/18/2016 8:47:18 AM,100,0,0.009517137 | |
# ... | |
# | |
# Typical use in AzureML Execute R module: | |
# | |
# source('src/fraudEval/fraudEval.R') | |
# trx <- maml.mapInputPort(1) | |
# results <- evaluatePerf(trx) | |
# maml.mapOutputPort("results") | |
# | |
# | |
# Jacob Spoelstra | |
library(tidyr) | |
library(dplyr) | |
library(ggplot2) | |
library(lubridate) | |
evaluatePerf <- function(trx, sample.nonfraud = 0.1, scorebins = 500){ | |
# Parameters | |
# sample.nonfraud = 0.1 # Sample rate for non-fraud accounts. Assuming that Frauds are not sampled | |
# scorebins = 500 # Granularity of perfromance tables | |
delay.range = c(0, 1, 5, 10, 15) # Delays (in minutes) between event and action to simulate. Fixed for now | |
trx <- trx %>% | |
rename(ts = Timestamp) | |
# By account, enforce monotonically rising scores | |
# Select only fraud transactions | |
message('Extracting fraud transactions') | |
# All trx on fraud accounts | |
frd_a <- trx %>% | |
group_by(accountID) %>% | |
filter(max(Label) > 0.5) %>% | |
arrange(accountID, ts) %>% | |
mutate(isfirst = rank(ts)==1) | |
# Only fraud trx, enforce monitonically rising score | |
frd_f <- frd_a %>% | |
group_by(accountID) %>% | |
filter(Label > 0.5) %>% | |
arrange(accountID, ts) %>% | |
mutate(hiScr = floor(cummax(Score) * scorebins)) | |
# For non-fraud trx, set amount to 0 to not influence VDR calc. | |
frd_n <- frd_a %>% | |
group_by(accountID) %>% | |
filter(Label < 0.5) %>% | |
mutate(hiScr = floor(Score * scorebins), | |
Amount = 0) | |
# combine and sort | |
frd <- frd_f %>% | |
rbind(frd_n) %>% | |
arrange(accountID, ts) | |
# Need this to cover scores that don't occur in data | |
scrref <- data_frame(dscore = (scorebins-1):0) | |
message("Calculating performance based on different delays") | |
results1 <- scrref | |
for(delay.minutes in delay.range){ | |
message("Working on delay of ", delay.minutes, " minutes") | |
frd$dscore <- 0 | |
delay <- make_difftime(minute = delay.minutes) | |
# by account, Update scores based on delay | |
for(r in 1:dim(frd)[1]){ | |
if(frd$isfirst[r]){ | |
last.ts <- frd$ts[r] | |
last.score <- frd$hiScr[r] | |
next | |
} | |
# find most recent entry older than delay | |
score.effective <- last.score[last.ts <= frd$ts[r] - delay] | |
if(length(score.effective)){ | |
frd$dscore[r] <- tail(score.effective, 1) | |
} | |
last.ts <- append(last.ts, frd$ts[r]) | |
last.score <- append(last.score, frd$hiScr[r]) | |
} | |
# Calculate $-weighted recall | |
perf <- frd %>% | |
group_by(dscore) %>% | |
summarise(v = sum(Amount)) %>% | |
right_join(scrref, by = 'dscore') %>% | |
mutate(v = ifelse(is.na(v), 0, v)) %>% | |
arrange(desc(dscore)) %>% | |
mutate(vdr = cumsum(v)/sum(v)) | |
# Save in results frame | |
cname.vdr <- paste0("vdr", delay.minutes) | |
results1[cname.vdr] <- perf$vdr | |
} | |
# calculate ADR | |
perf1 <- frd %>% | |
group_by(accountID) %>% | |
summarize(dscore = floor(max(Score)*scorebins)) %>% | |
group_by(dscore) %>% | |
summarise(fa = n()) %>% | |
right_join(scrref, by = 'dscore') %>% | |
mutate(fa = ifelse(is.na(fa), 0, fa)) %>% | |
arrange(desc(dscore)) %>% | |
mutate(nf = cumsum(fa)) | |
# Calculate approximate nr false-positives by day | |
# Filter out all non-fraud accounts | |
message("Extracting non-fraud transactions") | |
nfrd <- trx %>% | |
group_by(accountID) %>% | |
filter(max(Label) < 0.5) %>% | |
arrange(accountID, ts) %>% | |
mutate(day = yday(ts)) %>% | |
group_by(accountID, day) %>% | |
summarize(dscore = floor(max(Score)*scorebins)) | |
message("Calculating AFPR") | |
perf2 <- nfrd %>% | |
group_by(dscore) %>% | |
summarise(nfa = n()/sample.nonfraud) %>% | |
right_join(perf1, by = 'dscore') %>% | |
mutate(nfa = ifelse(is.na(nfa), 0, nfa)) %>% | |
arrange(desc(dscore)) %>% | |
mutate(nnf = cumsum(nfa), | |
AFPR = nnf/nf, | |
ADR = nf/sum(fa), | |
volume = nnf/sum(nfa)) %>% | |
select(-fa, -nfa) | |
message("Calculating Cases per day") | |
nr.of.days = as.integer(max(trx$ts) - min(trx$ts)) | |
results <- results1 %>% | |
left_join(perf2, by = "dscore") %>% | |
mutate(dscore = dscore/scorebins, | |
volperday= (nf+nnf)/nr.of.days) %>% | |
rename(threshold = dscore) | |
message("Done! See 'results'") | |
# plots | |
plt.theme <- theme(axis.text=element_text(size=14), | |
axis.title=element_text(size=14,face="bold"), | |
plot.title=element_text(size=16, face="bold"), | |
legend.title=element_text(size=14, face="bold"), | |
legend.text=element_text(size=12)) | |
afpr.max = 20 | |
score.min <- (results %>% | |
filter(AFPR<=afpr.max) %>% | |
arrange(threshold))[['threshold']][1] | |
volperday.max <- (results %>% | |
filter(AFPR<=afpr.max) %>% | |
arrange(threshold))[['volperday']][1] | |
# ADR-AFPR | |
print(ggplot(results) + | |
geom_line(aes(AFPR, ADR), size=2, colour='darkblue') + | |
plt.theme + | |
ggtitle("Account-Level Fraud Detection") + | |
xlab("Account False-Positive Ratio") + | |
ylab("Account Detection Rate") + | |
xlim(0,afpr.max)) | |
# VDR-AFPR | |
print(ggplot(results %>% | |
gather(vdr0, vdr1, vdr5, vdr10, vdr15, key='delay', value='VDR')) + | |
geom_line(aes(x=AFPR, y=VDR, color=delay), size=2) + | |
plt.theme + | |
ggtitle("Potential Loss Prevention") + | |
xlab("Account False-Positive Ratio") + | |
ylab("Value Detection Rate") + | |
xlim(0,afpr.max) + | |
scale_colour_discrete(breaks=c('vdr0', 'vdr1', 'vdr5', 'vdr10', 'vdr15'), | |
labels=paste(c('0', '1', '5', '10', '15'), "min")) | |
) | |
# Cases per day | |
print(ggplot(results) + | |
geom_line(aes(threshold, volperday), size=2, colour='darkblue') + | |
plt.theme + | |
xlab("Score Threshold") + | |
ylab("Case Volume Per Day") + | |
ggtitle("Score Distribution") + | |
xlim(1, score.min) + | |
ylim(0, volperday.max * 1.2) | |
) | |
# Score-AFPR | |
print(ggplot(results) + | |
geom_line(aes(threshold, AFPR), size=2, colour='darkblue') + | |
plt.theme + | |
xlab("Score Threshold") + | |
ylab("Account False-Positive Ratio") + | |
ggtitle("Score Calibration") + | |
xlim(1, score.min) + | |
ylim(0, afpr.max * 1.2) | |
) | |
# Score-VDR | |
print(ggplot(results %>% | |
gather(vdr0, vdr1, vdr5, vdr10, vdr15, key='delay', value='VDR')) + | |
geom_line(aes(x=threshold, y=VDR, color=delay), size=2) + | |
plt.theme + | |
ggtitle("Potential Loss Prevention") + | |
xlab("Score Threshold") + | |
ylab("Value Detection Rate") + | |
xlim(1, score.min) + | |
scale_colour_discrete(breaks=c('vdr0', 'vdr1', 'vdr5', 'vdr10', 'vdr15'), | |
labels=paste(c('0', '1', '5', '10', '15'), "min")) | |
) | |
return(results) | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This comment has been minimized.
Error in seq.default(from = best$lmin, to = best$lmax, by = best$lstep) :
'from' must be of length 1
In addition: Warning messages:
1: Removed 498 rows containing missing values (geom_path).
2: Removed 2490 rows containing missing values (geom_path).
Can you please share data that goes into this code?