jspoelstra/fraudperf.R

## fraudperf.R
# Utility function to use within AzureML for calculating useful account-level performance metrics
# for transaction fraud detection models. Each metric is calculated as a function of the score-cutoff
# used. These include:
#
#   Account Detection Rate (ADR): what fraction of the fraud accounts received a score above the
#                                 threshold at some point after the fraud started
#   Value Detection Rate (VDR):   From the point in time that the fist transaction scored above
#                                 the threshold, what fraction of the losses could have been prevented.
#
# This code specifically calculates the effect of delayed action: it will calculate VDR for various scenarios,
# each assuming a specified delay in taking action after the transaction took place.
#
# Expected data example:

# accountID,Timestamp,Amount,Label,Score
# Acct01,8/11/2016 12:44:46 PM,0,0,0.009517137
# Acct01,8/11/2016 2:56:51 PM,10,1,0.0080635994
# Acct01,8/11/2016 4:02:01 PM,12.5,1,0.00806359
# Acct02,8/11/2016 4:05:11 PM,2090,0,0.00806359
# Acct02,9/18/2016 8:47:18 AM,100,0,0.009517137
# ...
#
# Typical use in AzureML Execute R module:
#
# source('src/fraudEval/fraudEval.R')
# trx <- maml.mapInputPort(1)
# results <- evaluatePerf(trx)
# maml.mapOutputPort("results")
#
#
# Jacob Spoelstra

library(tidyr)
library(dplyr)
library(ggplot2)
library(lubridate)


evaluatePerf <- function(trx, sample.nonfraud = 0.1, scorebins = 500){
  # Parameters
  # sample.nonfraud = 0.1  # Sample rate for non-fraud accounts. Assuming that Frauds are not sampled
  # scorebins = 500  # Granularity of perfromance tables

  delay.range = c(0, 1, 5, 10, 15) # Delays (in minutes) between event and action to simulate. Fixed for now

  trx <- trx %>%
    rename(ts = Timestamp)

  # By account, enforce monotonically rising scores
  # Select only fraud transactions
  message('Extracting fraud transactions')
  # All trx on fraud accounts
  frd_a <- trx %>%
    group_by(accountID) %>%
    filter(max(Label) > 0.5) %>%
    arrange(accountID, ts) %>%
    mutate(isfirst = rank(ts)==1)
  # Only fraud trx, enforce monitonically rising score
  frd_f <- frd_a %>%
    group_by(accountID) %>%
    filter(Label > 0.5) %>%
    arrange(accountID, ts) %>%
    mutate(hiScr = floor(cummax(Score) * scorebins))
  # For non-fraud trx, set amount to 0 to not influence VDR calc.
  frd_n <- frd_a %>%
    group_by(accountID) %>%
    filter(Label < 0.5) %>%
    mutate(hiScr = floor(Score * scorebins),
           Amount = 0)
  # combine and sort
  frd <- frd_f %>%
    rbind(frd_n) %>%
    arrange(accountID, ts)


  # Need this to cover scores that don't occur in data
  scrref <- data_frame(dscore = (scorebins-1):0)

  message("Calculating performance based on different delays")
  results1 <- scrref
  for(delay.minutes in delay.range){
    message("Working on delay of ", delay.minutes, " minutes")
    frd$dscore <- 0
    delay <- make_difftime(minute = delay.minutes)

    # by account, Update scores based on delay
    for(r in 1:dim(frd)[1]){
      if(frd$isfirst[r]){
        last.ts <- frd$ts[r]
        last.score <- frd$hiScr[r]
        next
      }
      # find most recent entry older than delay
      score.effective <- last.score[last.ts <= frd$ts[r] - delay]
      if(length(score.effective)){
        frd$dscore[r] <- tail(score.effective, 1)
      }
      last.ts <- append(last.ts, frd$ts[r])
      last.score <- append(last.score, frd$hiScr[r])
    }

    # Calculate $-weighted recall
    perf <- frd %>%
      group_by(dscore) %>%
      summarise(v = sum(Amount)) %>%
      right_join(scrref, by = 'dscore') %>%
      mutate(v = ifelse(is.na(v), 0, v)) %>%
      arrange(desc(dscore)) %>%
      mutate(vdr = cumsum(v)/sum(v))


    # Save in results frame
    cname.vdr <- paste0("vdr", delay.minutes)
    results1[cname.vdr] <- perf$vdr
  }

  # calculate ADR
  perf1 <- frd %>%
    group_by(accountID) %>%
    summarize(dscore = floor(max(Score)*scorebins)) %>%
    group_by(dscore) %>%
    summarise(fa = n()) %>%
    right_join(scrref, by = 'dscore') %>%
    mutate(fa = ifelse(is.na(fa), 0, fa)) %>%
    arrange(desc(dscore)) %>%
    mutate(nf = cumsum(fa))

  # Calculate approximate nr false-positives by day
  # Filter out all non-fraud accounts
  message("Extracting non-fraud transactions")
  nfrd <- trx %>%
    group_by(accountID) %>%
    filter(max(Label) < 0.5) %>%
    arrange(accountID, ts) %>%
    mutate(day = yday(ts)) %>%
    group_by(accountID, day) %>%
    summarize(dscore = floor(max(Score)*scorebins))

  message("Calculating AFPR")
  perf2 <- nfrd %>%
    group_by(dscore) %>%
    summarise(nfa = n()/sample.nonfraud) %>%
    right_join(perf1, by = 'dscore') %>%
    mutate(nfa = ifelse(is.na(nfa), 0, nfa)) %>%
    arrange(desc(dscore)) %>%
    mutate(nnf = cumsum(nfa),
           AFPR = nnf/nf,
           ADR = nf/sum(fa),
           volume = nnf/sum(nfa)) %>%
    select(-fa, -nfa)

  message("Calculating Cases per day")
  nr.of.days = as.integer(max(trx$ts) - min(trx$ts))
  results <- results1 %>%
    left_join(perf2, by = "dscore") %>%
    mutate(dscore = dscore/scorebins,
           volperday= (nf+nnf)/nr.of.days) %>%
    rename(threshold = dscore)

  message("Done! See 'results'")

  # plots
  plt.theme <- theme(axis.text=element_text(size=14),
                     axis.title=element_text(size=14,face="bold"),
                     plot.title=element_text(size=16, face="bold"),
                     legend.title=element_text(size=14, face="bold"),
                     legend.text=element_text(size=12))
  afpr.max = 20
  score.min <- (results %>%
                  filter(AFPR<=afpr.max) %>%
                  arrange(threshold))[['threshold']][1]
  volperday.max <- (results %>%
                  filter(AFPR<=afpr.max) %>%
                  arrange(threshold))[['volperday']][1]

    # ADR-AFPR
  print(ggplot(results) +
          geom_line(aes(AFPR, ADR), size=2, colour='darkblue') +
          plt.theme +
          ggtitle("Account-Level Fraud Detection") +
          xlab("Account False-Positive Ratio") +
          ylab("Account Detection Rate") +
          xlim(0,afpr.max))

  # VDR-AFPR
  print(ggplot(results %>%
                 gather(vdr0, vdr1, vdr5, vdr10, vdr15, key='delay', value='VDR')) +
          geom_line(aes(x=AFPR, y=VDR, color=delay), size=2) +
          plt.theme +
          ggtitle("Potential Loss Prevention") +
          xlab("Account False-Positive Ratio") +
          ylab("Value Detection Rate") +
          xlim(0,afpr.max) +
          scale_colour_discrete(breaks=c('vdr0', 'vdr1', 'vdr5', 'vdr10', 'vdr15'),
                                labels=paste(c('0', '1', '5', '10', '15'), "min"))
  )

  # Cases per day
  print(ggplot(results) +
          geom_line(aes(threshold, volperday), size=2, colour='darkblue') +
          plt.theme +
          xlab("Score Threshold") +
          ylab("Case Volume Per Day") +
          ggtitle("Score Distribution") +
          xlim(1, score.min) +
          ylim(0, volperday.max * 1.2)
  )

  # Score-AFPR
  print(ggplot(results) +
          geom_line(aes(threshold, AFPR), size=2, colour='darkblue') +
          plt.theme +
          xlab("Score Threshold") +
          ylab("Account False-Positive Ratio") +
          ggtitle("Score Calibration") +
          xlim(1, score.min) +
          ylim(0, afpr.max * 1.2)
  )

  # Score-VDR
  print(ggplot(results %>%
                 gather(vdr0, vdr1, vdr5, vdr10, vdr15, key='delay', value='VDR')) +
          geom_line(aes(x=threshold, y=VDR, color=delay), size=2) +
          plt.theme +
          ggtitle("Potential Loss Prevention") +
          xlab("Score Threshold") +
          ylab("Value Detection Rate") +
          xlim(1, score.min) +
          scale_colour_discrete(breaks=c('vdr0', 'vdr1', 'vdr5', 'vdr10', 'vdr15'),
                                labels=paste(c('0', '1', '5', '10', '15'), "min"))
  )
  return(results)
}
	# Utility function to use within AzureML for calculating useful account-level performance metrics
	# for transaction fraud detection models. Each metric is calculated as a function of the score-cutoff
	# used. These include:
	#
	# Account Detection Rate (ADR): what fraction of the fraud accounts received a score above the
	# threshold at some point after the fraud started
	# Value Detection Rate (VDR): From the point in time that the fist transaction scored above
	# the threshold, what fraction of the losses could have been prevented.
	#
	# This code specifically calculates the effect of delayed action: it will calculate VDR for various scenarios,
	# each assuming a specified delay in taking action after the transaction took place.
	#
	# Expected data example:

	# accountID,Timestamp,Amount,Label,Score
	# Acct01,8/11/2016 12:44:46 PM,0,0,0.009517137
	# Acct01,8/11/2016 2:56:51 PM,10,1,0.0080635994
	# Acct01,8/11/2016 4:02:01 PM,12.5,1,0.00806359
	# Acct02,8/11/2016 4:05:11 PM,2090,0,0.00806359
	# Acct02,9/18/2016 8:47:18 AM,100,0,0.009517137
	# ...
	#
	# Typical use in AzureML Execute R module:
	#
	# source('src/fraudEval/fraudEval.R')
	# trx <- maml.mapInputPort(1)
	# results <- evaluatePerf(trx)
	# maml.mapOutputPort("results")
	#
	#
	# Jacob Spoelstra

	library(tidyr)
	library(dplyr)
	library(ggplot2)
	library(lubridate)


	evaluatePerf <- function(trx, sample.nonfraud = 0.1, scorebins = 500){
	# Parameters
	# sample.nonfraud = 0.1 # Sample rate for non-fraud accounts. Assuming that Frauds are not sampled
	# scorebins = 500 # Granularity of perfromance tables

	delay.range = c(0, 1, 5, 10, 15) # Delays (in minutes) between event and action to simulate. Fixed for now

	trx <- trx %>%
	rename(ts = Timestamp)

	# By account, enforce monotonically rising scores
	# Select only fraud transactions
	message('Extracting fraud transactions')
	# All trx on fraud accounts
	frd_a <- trx %>%
	group_by(accountID) %>%
	filter(max(Label) > 0.5) %>%
	arrange(accountID, ts) %>%
	mutate(isfirst = rank(ts)==1)
	# Only fraud trx, enforce monitonically rising score
	frd_f <- frd_a %>%
	group_by(accountID) %>%
	filter(Label > 0.5) %>%
	arrange(accountID, ts) %>%
	mutate(hiScr = floor(cummax(Score) * scorebins))
	# For non-fraud trx, set amount to 0 to not influence VDR calc.
	frd_n <- frd_a %>%
	group_by(accountID) %>%
	filter(Label < 0.5) %>%
	mutate(hiScr = floor(Score * scorebins),
	Amount = 0)
	# combine and sort
	frd <- frd_f %>%
	rbind(frd_n) %>%
	arrange(accountID, ts)


	# Need this to cover scores that don't occur in data
	scrref <- data_frame(dscore = (scorebins-1):0)

	message("Calculating performance based on different delays")
	results1 <- scrref
	for(delay.minutes in delay.range){
	message("Working on delay of ", delay.minutes, " minutes")
	frd$dscore <- 0
	delay <- make_difftime(minute = delay.minutes)

	# by account, Update scores based on delay
	for(r in 1:dim(frd)[1]){
	if(frd$isfirst[r]){
	last.ts <- frd$ts[r]
	last.score <- frd$hiScr[r]
	next
	}
	# find most recent entry older than delay
	score.effective <- last.score[last.ts <= frd$ts[r] - delay]
	if(length(score.effective)){
	frd$dscore[r] <- tail(score.effective, 1)
	}
	last.ts <- append(last.ts, frd$ts[r])
	last.score <- append(last.score, frd$hiScr[r])
	}

	# Calculate $-weighted recall
	perf <- frd %>%
	group_by(dscore) %>%
	summarise(v = sum(Amount)) %>%
	right_join(scrref, by = 'dscore') %>%
	mutate(v = ifelse(is.na(v), 0, v)) %>%
	arrange(desc(dscore)) %>%
	mutate(vdr = cumsum(v)/sum(v))


	# Save in results frame
	cname.vdr <- paste0("vdr", delay.minutes)
	results1[cname.vdr] <- perf$vdr
	}

	# calculate ADR
	perf1 <- frd %>%
	group_by(accountID) %>%
	summarize(dscore = floor(max(Score)*scorebins)) %>%
	group_by(dscore) %>%
	summarise(fa = n()) %>%
	right_join(scrref, by = 'dscore') %>%
	mutate(fa = ifelse(is.na(fa), 0, fa)) %>%
	arrange(desc(dscore)) %>%
	mutate(nf = cumsum(fa))

	# Calculate approximate nr false-positives by day
	# Filter out all non-fraud accounts
	message("Extracting non-fraud transactions")
	nfrd <- trx %>%
	group_by(accountID) %>%
	filter(max(Label) < 0.5) %>%
	arrange(accountID, ts) %>%
	mutate(day = yday(ts)) %>%
	group_by(accountID, day) %>%
	summarize(dscore = floor(max(Score)*scorebins))

	message("Calculating AFPR")
	perf2 <- nfrd %>%
	group_by(dscore) %>%
	summarise(nfa = n()/sample.nonfraud) %>%
	right_join(perf1, by = 'dscore') %>%
	mutate(nfa = ifelse(is.na(nfa), 0, nfa)) %>%
	arrange(desc(dscore)) %>%
	mutate(nnf = cumsum(nfa),
	AFPR = nnf/nf,
	ADR = nf/sum(fa),
	volume = nnf/sum(nfa)) %>%
	select(-fa, -nfa)

	message("Calculating Cases per day")
	nr.of.days = as.integer(max(trx$ts) - min(trx$ts))
	results <- results1 %>%
	left_join(perf2, by = "dscore") %>%
	mutate(dscore = dscore/scorebins,
	volperday= (nf+nnf)/nr.of.days) %>%
	rename(threshold = dscore)

	message("Done! See 'results'")

	# plots
	plt.theme <- theme(axis.text=element_text(size=14),
	axis.title=element_text(size=14,face="bold"),
	plot.title=element_text(size=16, face="bold"),
	legend.title=element_text(size=14, face="bold"),
	legend.text=element_text(size=12))
	afpr.max = 20
	score.min <- (results %>%
	filter(AFPR<=afpr.max) %>%
	arrange(threshold))[['threshold']][1]
	volperday.max <- (results %>%
	filter(AFPR<=afpr.max) %>%
	arrange(threshold))[['volperday']][1]

	# ADR-AFPR
	print(ggplot(results) +
	geom_line(aes(AFPR, ADR), size=2, colour='darkblue') +
	plt.theme +
	ggtitle("Account-Level Fraud Detection") +
	xlab("Account False-Positive Ratio") +
	ylab("Account Detection Rate") +
	xlim(0,afpr.max))

	# VDR-AFPR
	print(ggplot(results %>%
	gather(vdr0, vdr1, vdr5, vdr10, vdr15, key='delay', value='VDR')) +
	geom_line(aes(x=AFPR, y=VDR, color=delay), size=2) +
	plt.theme +
	ggtitle("Potential Loss Prevention") +
	xlab("Account False-Positive Ratio") +
	ylab("Value Detection Rate") +
	xlim(0,afpr.max) +
	scale_colour_discrete(breaks=c('vdr0', 'vdr1', 'vdr5', 'vdr10', 'vdr15'),
	labels=paste(c('0', '1', '5', '10', '15'), "min"))
	)

	# Cases per day
	print(ggplot(results) +
	geom_line(aes(threshold, volperday), size=2, colour='darkblue') +
	plt.theme +
	xlab("Score Threshold") +
	ylab("Case Volume Per Day") +
	ggtitle("Score Distribution") +
	xlim(1, score.min) +
	ylim(0, volperday.max * 1.2)
	)

	# Score-AFPR
	print(ggplot(results) +
	geom_line(aes(threshold, AFPR), size=2, colour='darkblue') +
	plt.theme +
	xlab("Score Threshold") +
	ylab("Account False-Positive Ratio") +
	ggtitle("Score Calibration") +
	xlim(1, score.min) +
	ylim(0, afpr.max * 1.2)
	)

	# Score-VDR
	print(ggplot(results %>%
	gather(vdr0, vdr1, vdr5, vdr10, vdr15, key='delay', value='VDR')) +
	geom_line(aes(x=threshold, y=VDR, color=delay), size=2) +
	plt.theme +
	ggtitle("Potential Loss Prevention") +
	xlab("Score Threshold") +
	ylab("Value Detection Rate") +
	xlim(1, score.min) +
	scale_colour_discrete(breaks=c('vdr0', 'vdr1', 'vdr5', 'vdr10', 'vdr15'),
	labels=paste(c('0', '1', '5', '10', '15'), "min"))
	)
	return(results)
	}