Skip to content

Instantly share code, notes, and snippets.

@jspoelstra

jspoelstra/fraudperf.R

Last active May 21, 2019
Embed
What would you like to do?
Transaction Fraud Detection Performance Evaluation
# Utility function to use within AzureML for calculating useful account-level performance metrics
# for transaction fraud detection models. Each metric is calculated as a function of the score-cutoff
# used. These include:
#
# Account Detection Rate (ADR): what fraction of the fraud accounts received a score above the
# threshold at some point after the fraud started
# Value Detection Rate (VDR): From the point in time that the fist transaction scored above
# the threshold, what fraction of the losses could have been prevented.
#
# This code specifically calculates the effect of delayed action: it will calculate VDR for various scenarios,
# each assuming a specified delay in taking action after the transaction took place.
#
# Expected data example:
# accountID,Timestamp,Amount,Label,Score
# Acct01,8/11/2016 12:44:46 PM,0,0,0.009517137
# Acct01,8/11/2016 2:56:51 PM,10,1,0.0080635994
# Acct01,8/11/2016 4:02:01 PM,12.5,1,0.00806359
# Acct02,8/11/2016 4:05:11 PM,2090,0,0.00806359
# Acct02,9/18/2016 8:47:18 AM,100,0,0.009517137
# ...
#
# Typical use in AzureML Execute R module:
#
# source('src/fraudEval/fraudEval.R')
# trx <- maml.mapInputPort(1)
# results <- evaluatePerf(trx)
# maml.mapOutputPort("results")
#
#
# Jacob Spoelstra
library(tidyr)
library(dplyr)
library(ggplot2)
library(lubridate)
evaluatePerf <- function(trx, sample.nonfraud = 0.1, scorebins = 500){
# Parameters
# sample.nonfraud = 0.1 # Sample rate for non-fraud accounts. Assuming that Frauds are not sampled
# scorebins = 500 # Granularity of perfromance tables
delay.range = c(0, 1, 5, 10, 15) # Delays (in minutes) between event and action to simulate. Fixed for now
trx <- trx %>%
rename(ts = Timestamp)
# By account, enforce monotonically rising scores
# Select only fraud transactions
message('Extracting fraud transactions')
# All trx on fraud accounts
frd_a <- trx %>%
group_by(accountID) %>%
filter(max(Label) > 0.5) %>%
arrange(accountID, ts) %>%
mutate(isfirst = rank(ts)==1)
# Only fraud trx, enforce monitonically rising score
frd_f <- frd_a %>%
group_by(accountID) %>%
filter(Label > 0.5) %>%
arrange(accountID, ts) %>%
mutate(hiScr = floor(cummax(Score) * scorebins))
# For non-fraud trx, set amount to 0 to not influence VDR calc.
frd_n <- frd_a %>%
group_by(accountID) %>%
filter(Label < 0.5) %>%
mutate(hiScr = floor(Score * scorebins),
Amount = 0)
# combine and sort
frd <- frd_f %>%
rbind(frd_n) %>%
arrange(accountID, ts)
# Need this to cover scores that don't occur in data
scrref <- data_frame(dscore = (scorebins-1):0)
message("Calculating performance based on different delays")
results1 <- scrref
for(delay.minutes in delay.range){
message("Working on delay of ", delay.minutes, " minutes")
frd$dscore <- 0
delay <- make_difftime(minute = delay.minutes)
# by account, Update scores based on delay
for(r in 1:dim(frd)[1]){
if(frd$isfirst[r]){
last.ts <- frd$ts[r]
last.score <- frd$hiScr[r]
next
}
# find most recent entry older than delay
score.effective <- last.score[last.ts <= frd$ts[r] - delay]
if(length(score.effective)){
frd$dscore[r] <- tail(score.effective, 1)
}
last.ts <- append(last.ts, frd$ts[r])
last.score <- append(last.score, frd$hiScr[r])
}
# Calculate $-weighted recall
perf <- frd %>%
group_by(dscore) %>%
summarise(v = sum(Amount)) %>%
right_join(scrref, by = 'dscore') %>%
mutate(v = ifelse(is.na(v), 0, v)) %>%
arrange(desc(dscore)) %>%
mutate(vdr = cumsum(v)/sum(v))
# Save in results frame
cname.vdr <- paste0("vdr", delay.minutes)
results1[cname.vdr] <- perf$vdr
}
# calculate ADR
perf1 <- frd %>%
group_by(accountID) %>%
summarize(dscore = floor(max(Score)*scorebins)) %>%
group_by(dscore) %>%
summarise(fa = n()) %>%
right_join(scrref, by = 'dscore') %>%
mutate(fa = ifelse(is.na(fa), 0, fa)) %>%
arrange(desc(dscore)) %>%
mutate(nf = cumsum(fa))
# Calculate approximate nr false-positives by day
# Filter out all non-fraud accounts
message("Extracting non-fraud transactions")
nfrd <- trx %>%
group_by(accountID) %>%
filter(max(Label) < 0.5) %>%
arrange(accountID, ts) %>%
mutate(day = yday(ts)) %>%
group_by(accountID, day) %>%
summarize(dscore = floor(max(Score)*scorebins))
message("Calculating AFPR")
perf2 <- nfrd %>%
group_by(dscore) %>%
summarise(nfa = n()/sample.nonfraud) %>%
right_join(perf1, by = 'dscore') %>%
mutate(nfa = ifelse(is.na(nfa), 0, nfa)) %>%
arrange(desc(dscore)) %>%
mutate(nnf = cumsum(nfa),
AFPR = nnf/nf,
ADR = nf/sum(fa),
volume = nnf/sum(nfa)) %>%
select(-fa, -nfa)
message("Calculating Cases per day")
nr.of.days = as.integer(max(trx$ts) - min(trx$ts))
results <- results1 %>%
left_join(perf2, by = "dscore") %>%
mutate(dscore = dscore/scorebins,
volperday= (nf+nnf)/nr.of.days) %>%
rename(threshold = dscore)
message("Done! See 'results'")
# plots
plt.theme <- theme(axis.text=element_text(size=14),
axis.title=element_text(size=14,face="bold"),
plot.title=element_text(size=16, face="bold"),
legend.title=element_text(size=14, face="bold"),
legend.text=element_text(size=12))
afpr.max = 20
score.min <- (results %>%
filter(AFPR<=afpr.max) %>%
arrange(threshold))[['threshold']][1]
volperday.max <- (results %>%
filter(AFPR<=afpr.max) %>%
arrange(threshold))[['volperday']][1]
# ADR-AFPR
print(ggplot(results) +
geom_line(aes(AFPR, ADR), size=2, colour='darkblue') +
plt.theme +
ggtitle("Account-Level Fraud Detection") +
xlab("Account False-Positive Ratio") +
ylab("Account Detection Rate") +
xlim(0,afpr.max))
# VDR-AFPR
print(ggplot(results %>%
gather(vdr0, vdr1, vdr5, vdr10, vdr15, key='delay', value='VDR')) +
geom_line(aes(x=AFPR, y=VDR, color=delay), size=2) +
plt.theme +
ggtitle("Potential Loss Prevention") +
xlab("Account False-Positive Ratio") +
ylab("Value Detection Rate") +
xlim(0,afpr.max) +
scale_colour_discrete(breaks=c('vdr0', 'vdr1', 'vdr5', 'vdr10', 'vdr15'),
labels=paste(c('0', '1', '5', '10', '15'), "min"))
)
# Cases per day
print(ggplot(results) +
geom_line(aes(threshold, volperday), size=2, colour='darkblue') +
plt.theme +
xlab("Score Threshold") +
ylab("Case Volume Per Day") +
ggtitle("Score Distribution") +
xlim(1, score.min) +
ylim(0, volperday.max * 1.2)
)
# Score-AFPR
print(ggplot(results) +
geom_line(aes(threshold, AFPR), size=2, colour='darkblue') +
plt.theme +
xlab("Score Threshold") +
ylab("Account False-Positive Ratio") +
ggtitle("Score Calibration") +
xlim(1, score.min) +
ylim(0, afpr.max * 1.2)
)
# Score-VDR
print(ggplot(results %>%
gather(vdr0, vdr1, vdr5, vdr10, vdr15, key='delay', value='VDR')) +
geom_line(aes(x=threshold, y=VDR, color=delay), size=2) +
plt.theme +
ggtitle("Potential Loss Prevention") +
xlab("Score Threshold") +
ylab("Value Detection Rate") +
xlim(1, score.min) +
scale_colour_discrete(breaks=c('vdr0', 'vdr1', 'vdr5', 'vdr10', 'vdr15'),
labels=paste(c('0', '1', '5', '10', '15'), "min"))
)
return(results)
}
@Eezzeldin

This comment has been minimized.

Copy link

@Eezzeldin Eezzeldin commented May 21, 2019

Error in seq.default(from = best$lmin, to = best$lmax, by = best$lstep) :
'from' must be of length 1
In addition: Warning messages:
1: Removed 498 rows containing missing values (geom_path).
2: Removed 2490 rows containing missing values (geom_path).

Can you please share data that goes into this code?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment