Created
June 24, 2013 19:44
-
-
Save nathanvan/5852927 to your computer and use it in GitHub Desktop.
Sample R code from the June 2013 Stat Bytes talk
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Stat Bytes supplement 6/21/2013 | |
## Nathan VanHoudnos | |
## Quick demo to test Rprof | |
Rprof("profile1.out", line.profiling=TRUE) | |
source('simple.data.R') | |
Rprof(NULL) | |
summaryRprof("profile1.out", lines = "show")$by.line | |
## And to play with Rcpp | |
## install.packages(c('inline','Rcpp','RcppArmadillo')) | |
require(Rcpp) | |
require(RcppArmadillo) | |
require(inline) | |
## Here is a pure R version of a function to calculate | |
## a log likelihood in a 2PL IRT model | |
log.prob.apply.r <- function(U.data, th, a, b, applyBy) { | |
P.persons <- nrow(U.data) | |
term.1 <- outer(th, a) | |
term.2 <- matrix(rep(a*b, P.persons), nrow=P.persons, | |
byrow=TRUE) | |
P.prob <- 1/(1 + exp(term.2 - term.1)) | |
## Use the inverse CDF trick | |
log.bernoulli <- U.data*log(P.prob) + (1-U.data)*log(1-P.prob) | |
return(apply(log.bernoulli,applyBy,sum)) | |
} | |
## Write the C++ function in a string | |
## N.B. You can also write it in a separate | |
## file, then load it into a string if you | |
## prefer. See: | |
## fileName <- 'log.prob.apply.cpp.src.cpp' | |
## log.prob.apply.cpp.src <- readChar(fileName, file.info(fileName)$size) | |
log.prob.apply.cpp.src <- ' | |
// Import the R objects passed to the function | |
arma::mat U = Rcpp::as<arma::mat>(UdataR); | |
arma::colvec theta = Rcpp::as<arma::colvec>(thetaR); | |
arma::rowvec a = Rcpp::as<arma::rowvec>(aR); | |
arma::rowvec b = Rcpp::as<arma::rowvec>(bR); | |
int appBy = as<int>(appByS); | |
// | |
// Find out the dimensions of the U matrix | |
int Ppersons = U.n_rows; | |
int Iitems = U.n_cols; | |
// | |
// Declare variables to use | |
arma::mat term1(Ppersons,Iitems); | |
arma::mat term2(Ppersons,Iitems); | |
arma::mat Pprob(Ppersons,Iitems); | |
arma::mat logBernoulli(Ppersons,Iitems);; | |
// | |
// Calculate the terms for the logit | |
term1 = theta*a; //Outer product | |
// | |
// Fill term2 with elementwise multiplication | |
// (every row is the same) | |
for (int row=0; row<Ppersons; row++) { | |
term2.row(row) = a%b; //Schur product (element wise) | |
} | |
// | |
// Calculate the logit | |
Pprob = 1/(1+exp(term2-term1)); | |
// | |
// Use the inverse CDF trick | |
logBernoulli = U%log(Pprob) + (1-U)%log(1-Pprob); | |
// | |
// Return the likelihood | |
if ( appBy == 1 ) { | |
logBernoulli = cumsum(logBernoulli, 1); | |
theta = logBernoulli.col(Iitems-1); | |
return(Rcpp::wrap( theta )); | |
} else { | |
logBernoulli = cumsum(logBernoulli, 0); | |
a = logBernoulli.row(Ppersons-1); | |
return(Rcpp::wrap( a )); | |
} | |
' | |
## C++ version | |
log.prob.apply.cpp <- cxxfunction( | |
signature( | |
UdataR="numeric", | |
thetaR="numeric", aR="numeric", bR="numeric", | |
appByS="int"), | |
body=log.prob.apply.cpp.src, plugin="RcppArmadillo") | |
## Use the 100 datasets from the profiling example (see 'sample.data.R') | |
system.time( | |
a.disc.lik.r <- lapply( tmp, function(holder){ | |
log.prob.apply.r(U.data=holder$U, | |
th=holder$theta.abl, | |
a=holder$a.disc, | |
b=holder$b.diff, | |
applyBy=2) | |
} ) ) ## 1.1413 seconds | |
system.time( | |
a.disc.lik.cpp <- lapply( tmp, function(holder){ | |
as.numeric(log.prob.apply.cpp(UdataR=holder$U, | |
thetaR=holder$theta.abl, | |
aR=holder$a.disc, | |
bR=holder$b.diff, | |
appByS=2)) | |
} ) ) ## 0.888 seconds | |
stopifnot(all.equal(a.disc.lik.r, a.disc.lik.cpp)) | |
## So a 22% percent improvement by compiling the code | |
## Not bad, but not an order of magnitude either. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Stat Bytes supplement 6/21/2013 -- Nathan VanHoudnos | |
## Set the random-number generator seed, | |
## to make the results reproducible | |
set.seed(314159) | |
make.data <- function(I.items=30, P.persons=2000) { | |
# set the number of items I and persons P | |
# I.items <- 30 | |
# P.persons <- 2000 | |
# set the fixed item and population parameters | |
a.disc <- 1 + runif(I.items,-0.5,0.5) | |
b.diff <- seq(-3,3,length=I.items) | |
mean.theta <- 0 | |
sig2.theta <- (1.25)^2 | |
# generate thetas and the I x P matrix of response probabilities | |
theta.abl <- rnorm(P.persons, mean=mean.theta, sd=sqrt(sig2.theta)) | |
term.1 <- outer(theta.abl, a.disc) | |
term.2 <- matrix(rep(a.disc*b.diff, P.persons), nrow=P.persons, | |
byrow=TRUE) | |
## Using lower level functions improves speed, but costs clarity | |
P.prob.1 <- plogis(term.1-term.2) | |
P.prob.2 <- 1/(1 + exp(term.2 - term.1)) | |
stopifnot(all.equal(P.prob.1, P.prob.2)) | |
## ifelse is linear time, casting is constant time | |
U.flip <- runif(I.items*P.persons) | |
U.1 <- ifelse(U.flip < P.prob.1,1,0) | |
U.2 <- as.numeric(U.flip < P.prob.2) | |
dim(U.2) <- c(P.persons, I.items) | |
stopifnot(all.equal(U.1, U.2)) | |
return(list(U=U.1, theta.abl=theta.abl, a.disc=a.disc, b.diff=b.diff)) | |
} | |
tmp <- lapply(1:100,function(ii){make.data()}) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment