traversc/fastAUC.r

## fastAUC.r
fastROC <- function(probs, class) {
    class_sorted <- class[order(probs, decreasing=T)]
    TPR <- cumsum(class_sorted) / sum(class)
    FPR <- cumsum(class_sorted == 0) / sum(class == 0)
    return(list(tpr=TPR, fpr=FPR))
}

# Helpful function adapted from: https://stat.ethz.ch/pipermail/r-help/2005-September/079872.html
fastAUC <- function(probs, class) {
    x <- probs
    y <- class
    x1 = x[y==1]; n1 = length(x1);
    x2 = x[y==0]; n2 = length(x2);
    r = rank(c(x1,x2))
    auc = (sum(r[1:n1]) - n1*(n1+1)/2) / n1 / n2
    return(auc)
}

####################################################
### Some tests on random datasets


#small test dataset
probs <- runif(50000)
class <- sample(c(1,0), 50000, replace=T)

system.time(pROC_auc_results <- pROC::auc(pROC::roc(class, probs)))
#Elapsed: 17.906s

system.time(fast_auc_results <- fastAUC(probs, class))
#Elapsed: 0.022s

system.time(ROCR_auc_results <- ROCR::performance(ROCR::prediction(probs,class), measure="auc"))
#Elapsed: 0.208s

#Check that the results are the same -- True up to some float precision
abs(as.numeric(pROC_auc_results) - fast_auc_results) < 1e-12
abs(ROCR_auc_results@y.values[[1]] - fast_auc_results) < 1e-12

#Larger test dataset
probs <- runif(5e6)
class <- sample(c(1,0), 5e6, replace=T)
probs <- ifelse(class==1, probs+0.01, probs-0.01)

system.time(fast_auc_results <- fastAUC(probs, class))
#Elapsed: 3.687s

system.time(ROCR_auc_results <- ROCR::performance(ROCR::prediction(probs,class), measure="auc"))
#Elapsed: 16.006

#Check that the results are the same -- True up to some float precision
abs(ROCR_auc_results@y.values[[1]] - fast_auc_results) < 1e-12
	fastROC <- function(probs, class) {
	class_sorted <- class[order(probs, decreasing=T)]
	TPR <- cumsum(class_sorted) / sum(class)
	FPR <- cumsum(class_sorted == 0) / sum(class == 0)
	return(list(tpr=TPR, fpr=FPR))
	}

	# Helpful function adapted from: https://stat.ethz.ch/pipermail/r-help/2005-September/079872.html
	fastAUC <- function(probs, class) {
	x <- probs
	y <- class
	x1 = x[y==1]; n1 = length(x1);
	x2 = x[y==0]; n2 = length(x2);
	r = rank(c(x1,x2))
	auc = (sum(r[1:n1]) - n1*(n1+1)/2) / n1 / n2
	return(auc)
	}

	####################################################
	### Some tests on random datasets


	#small test dataset
	probs <- runif(50000)
	class <- sample(c(1,0), 50000, replace=T)

	system.time(pROC_auc_results <- pROC::auc(pROC::roc(class, probs)))
	#Elapsed: 17.906s

	system.time(fast_auc_results <- fastAUC(probs, class))
	#Elapsed: 0.022s

	system.time(ROCR_auc_results <- ROCR::performance(ROCR::prediction(probs,class), measure="auc"))
	#Elapsed: 0.208s

	#Check that the results are the same -- True up to some float precision
	abs(as.numeric(pROC_auc_results) - fast_auc_results) < 1e-12
	abs(ROCR_auc_results@y.values[[1]] - fast_auc_results) < 1e-12

	#Larger test dataset
	probs <- runif(5e6)
	class <- sample(c(1,0), 5e6, replace=T)
	probs <- ifelse(class==1, probs+0.01, probs-0.01)

	system.time(fast_auc_results <- fastAUC(probs, class))
	#Elapsed: 3.687s

	system.time(ROCR_auc_results <- ROCR::performance(ROCR::prediction(probs,class), measure="auc"))
	#Elapsed: 16.006

	#Check that the results are the same -- True up to some float precision
	abs(ROCR_auc_results@y.values[[1]] - fast_auc_results) < 1e-12