Skip to content

Instantly share code, notes, and snippets.

@jspoelstra jspoelstra/fraudperf.R
Last active May 21, 2019

Embed
What would you like to do?
Transaction Fraud Detection Performance Evaluation
# Utility function to use within AzureML for calculating useful account-level performance metrics
# for transaction fraud detection models. Each metric is calculated as a function of the score-cutoff
# used. These include:
#
# Account Detection Rate (ADR): what fraction of the fraud accounts received a score above the
# threshold at some point after the fraud started
# Value Detection Rate (VDR): From the point in time that the fist transaction scored above
# the threshold, what fraction of the losses could have been prevented.
#
# This code specifically calculates the effect of delayed action: it will calculate VDR for various scenarios,
# each assuming a specified delay in taking action after the transaction took place.
#
# Expected data example:
# accountID,Timestamp,Amount,Label,Score
# Acct01,8/11/2016 12:44:46 PM,0,0,0.009517137
# Acct01,8/11/2016 2:56:51 PM,10,1,0.0080635994
# Acct01,8/11/2016 4:02:01 PM,12.5,1,0.00806359
# Acct02,8/11/2016 4:05:11 PM,2090,0,0.00806359
# Acct02,9/18/2016 8:47:18 AM,100,0,0.009517137
# ...
#
# Typical use in AzureML Execute R module:
#
# source('src/fraudEval/fraudEval.R')
# trx <- maml.mapInputPort(1)
# results <- evaluatePerf(trx)
# maml.mapOutputPort("results")
#
#
# Jacob Spoelstra
library(tidyr)
library(dplyr)
library(ggplot2)
library(lubridate)
evaluatePerf <- function(trx, sample.nonfraud = 0.1, scorebins = 500){
# Parameters
# sample.nonfraud = 0.1 # Sample rate for non-fraud accounts. Assuming that Frauds are not sampled
# scorebins = 500 # Granularity of perfromance tables
delay.range = c(0, 1, 5, 10, 15) # Delays (in minutes) between event and action to simulate. Fixed for now
trx <- trx %>%
rename(ts = Timestamp)
# By account, enforce monotonically rising scores
# Select only fraud transactions
message('Extracting fraud transactions')
# All trx on fraud accounts
frd_a <- trx %>%
group_by(accountID) %>%
filter(max(Label) > 0.5) %>%
arrange(accountID, ts) %>%
mutate(isfirst = rank(ts)==1)
# Only fraud trx, enforce monitonically rising score
frd_f <- frd_a %>%
group_by(accountID) %>%
filter(Label > 0.5) %>%
arrange(accountID, ts) %>%
mutate(hiScr = floor(cummax(Score) * scorebins))
# For non-fraud trx, set amount to 0 to not influence VDR calc.
frd_n <- frd_a %>%
group_by(accountID) %>%
filter(Label < 0.5) %>%
mutate(hiScr = floor(Score * scorebins),
Amount = 0)
# combine and sort
frd <- frd_f %>%
rbind(frd_n) %>%
arrange(accountID, ts)
# Need this to cover scores that don't occur in data
scrref <- data_frame(dscore = (scorebins-1):0)
message("Calculating performance based on different delays")
results1 <- scrref
for(delay.minutes in delay.range){
message("Working on delay of ", delay.minutes, " minutes")
frd$dscore <- 0
delay <- make_difftime(minute = delay.minutes)
# by account, Update scores based on delay
for(r in 1:dim(frd)[1]){
if(frd$isfirst[r]){
last.ts <- frd$ts[r]
last.score <- frd$hiScr[r]
next
}
# find most recent entry older than delay
score.effective <- last.score[last.ts <= frd$ts[r] - delay]
if(length(score.effective)){
frd$dscore[r] <- tail(score.effective, 1)
}
last.ts <- append(last.ts, frd$ts[r])
last.score <- append(last.score, frd$hiScr[r])
}
# Calculate $-weighted recall
perf <- frd %>%
group_by(dscore) %>%
summarise(v = sum(Amount)) %>%
right_join(scrref, by = 'dscore') %>%
mutate(v = ifelse(is.na(v), 0, v)) %>%
arrange(desc(dscore)) %>%
mutate(vdr = cumsum(v)/sum(v))
# Save in results frame
cname.vdr <- paste0("vdr", delay.minutes)
results1[cname.vdr] <- perf$vdr
}
# calculate ADR
perf1 <- frd %>%
group_by(accountID) %>%
summarize(dscore = floor(max(Score)*scorebins)) %>%
group_by(dscore) %>%
summarise(fa = n()) %>%
right_join(scrref, by = 'dscore') %>%
mutate(fa = ifelse(is.na(fa), 0, fa)) %>%
arrange(desc(dscore)) %>%
mutate(nf = cumsum(fa))
# Calculate approximate nr false-positives by day
# Filter out all non-fraud accounts
message("Extracting non-fraud transactions")
nfrd <- trx %>%
group_by(accountID) %>%
filter(max(Label) < 0.5) %>%
arrange(accountID, ts) %>%
mutate(day = yday(ts)) %>%
group_by(accountID, day) %>%
summarize(dscore = floor(max(Score)*scorebins))
message("Calculating AFPR")
perf2 <- nfrd %>%
group_by(dscore) %>%
summarise(nfa = n()/sample.nonfraud) %>%
right_join(perf1, by = 'dscore') %>%
mutate(nfa = ifelse(is.na(nfa), 0, nfa)) %>%
arrange(desc(dscore)) %>%
mutate(nnf = cumsum(nfa),
AFPR = nnf/nf,
ADR = nf/sum(fa),
volume = nnf/sum(nfa)) %>%
select(-fa, -nfa)
message("Calculating Cases per day")
nr.of.days = as.integer(max(trx$ts) - min(trx$ts))
results <- results1 %>%
left_join(perf2, by = "dscore") %>%
mutate(dscore = dscore/scorebins,
volperday= (nf+nnf)/nr.of.days) %>%
rename(threshold = dscore)
message("Done! See 'results'")
# plots
plt.theme <- theme(axis.text=element_text(size=14),
axis.title=element_text(size=14,face="bold"),
plot.title=element_text(size=16, face="bold"),
legend.title=element_text(size=14, face="bold"),
legend.text=element_text(size=12))
afpr.max = 20
score.min <- (results %>%
filter(AFPR<=afpr.max) %>%
arrange(threshold))[['threshold']][1]
volperday.max <- (results %>%
filter(AFPR<=afpr.max) %>%
arrange(threshold))[['volperday']][1]
# ADR-AFPR
print(ggplot(results) +
geom_line(aes(AFPR, ADR), size=2, colour='darkblue') +
plt.theme +
ggtitle("Account-Level Fraud Detection") +
xlab("Account False-Positive Ratio") +
ylab("Account Detection Rate") +
xlim(0,afpr.max))
# VDR-AFPR
print(ggplot(results %>%
gather(vdr0, vdr1, vdr5, vdr10, vdr15, key='delay', value='VDR')) +
geom_line(aes(x=AFPR, y=VDR, color=delay), size=2) +
plt.theme +
ggtitle("Potential Loss Prevention") +
xlab("Account False-Positive Ratio") +
ylab("Value Detection Rate") +
xlim(0,afpr.max) +
scale_colour_discrete(breaks=c('vdr0', 'vdr1', 'vdr5', 'vdr10', 'vdr15'),
labels=paste(c('0', '1', '5', '10', '15'), "min"))
)
# Cases per day
print(ggplot(results) +
geom_line(aes(threshold, volperday), size=2, colour='darkblue') +
plt.theme +
xlab("Score Threshold") +
ylab("Case Volume Per Day") +
ggtitle("Score Distribution") +
xlim(1, score.min) +
ylim(0, volperday.max * 1.2)
)
# Score-AFPR
print(ggplot(results) +
geom_line(aes(threshold, AFPR), size=2, colour='darkblue') +
plt.theme +
xlab("Score Threshold") +
ylab("Account False-Positive Ratio") +
ggtitle("Score Calibration") +
xlim(1, score.min) +
ylim(0, afpr.max * 1.2)
)
# Score-VDR
print(ggplot(results %>%
gather(vdr0, vdr1, vdr5, vdr10, vdr15, key='delay', value='VDR')) +
geom_line(aes(x=threshold, y=VDR, color=delay), size=2) +
plt.theme +
ggtitle("Potential Loss Prevention") +
xlab("Score Threshold") +
ylab("Value Detection Rate") +
xlim(1, score.min) +
scale_colour_discrete(breaks=c('vdr0', 'vdr1', 'vdr5', 'vdr10', 'vdr15'),
labels=paste(c('0', '1', '5', '10', '15'), "min"))
)
return(results)
}
@Eezzeldin

This comment has been minimized.

Copy link

commented May 21, 2019

Error in seq.default(from = best$lmin, to = best$lmax, by = best$lstep) :
'from' must be of length 1
In addition: Warning messages:
1: Removed 498 rows containing missing values (geom_path).
2: Removed 2490 rows containing missing values (geom_path).

Can you please share data that goes into this code?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.