Skip to content

Instantly share code, notes, and snippets.

View jeffwong's full-sized avatar

Jeffrey Wong jeffwong

  • San Francisco, CA
View GitHub Profile
formatDataTable = function(test) {
colnames = names(test)
# Fix numerics
pattern = '`%s` := as.numeric( `%s` )'
for ( field in colnames[classes == 'numeric'])
test[ , eval( parse( text = sprintf( pattern, field,field ) ) ) ]
# Fix dates
# Can throw a warning when trying to parse NA
@jeffwong
jeffwong / jdk7
Last active August 29, 2015 14:01
make rJava run JDK 7
#source http://stackoverflow.com/questions/13610293/how-to-get-rjava-0-9-3-to-work-on-os-x-10-7-4-with-oracle-java-1-7
JAVA_LD_LIBRARY_PATH=/Library/Java/JavaVirtualMachines/jdk1.7.0_55.jdk/Contents/Home/jre/lib/server
JAVA_LIBS='-L/Library/Java/JavaVirtualMachines/jdk1.7.0_55.jdk/Contents/Home/jre/lib/server -ljvm'
JAVA_CPPFLAGS='-I/Library/Java/JavaVirtualMachines/jdk1.7.0_55.jdk/Contents/Home/include -I/Library/Java/JavaVirtualMachines/jdk1.7.0_55.jdk/Contents/Home/include/darwin'
R CMD javareconf
@jeffwong
jeffwong / logline.R
Created April 12, 2014 19:08
Approximating log(x) curve with a line
require(ggplot2)
require(reshape2)
m = 2
x = seq(from=0.1, to=5, by = .1)
log = log(x)
line = 1/m*(x - m) + log(m)
df = data.frame(x = x,
log = log,
line = line)
@jeffwong
jeffwong / ggplot.ccf.R
Created March 30, 2014 02:31
equivalent to R's ccf function
ggplot.ccf = function(x, y, lag.min=NULL, lag.max=NULL) {
ccf.data = ccf(x,y,plot=F)
indices = which(ccf.data$lag[,1,1] %in% lag.min:lag.max)
ccf.df = data.frame(lag = ccf.data$lag[indices,1,1],
correlation = ccf.data$acf[indices,1,1])
ggplot(ccf.df,
aes(x = lag, y = correlation)) +
geom_bar(stat = 'identity') +
@jeffwong
jeffwong / fieller.R
Created March 27, 2014 08:21
Confidence Interval for the ratio of two means. http://en.wikipedia.org/wiki/Fieller%27s_theorem
fieller = function(x,y) {
x.mean = mean(x)
y.mean = mean(y)
x.sd = sd(x)
y.sd = sd(y)
xy.cov = cov(x,y)
print(sprintf("x has mean %s, y has mean %s", x.mean, y.mean))
print(sprintf("ratio of means is %s", x.mean/y.mean))
@jeffwong
jeffwong / ratio.R
Created March 27, 2014 06:36
How two random gaussian processes can grow independently at a steady rate but their ratio is changing
require(ggplot2)
require(reshape2)
set.seed(100)
smallnoise = function(n) {
rnorm(n,5,1)
}
bignoise = function(n) {
if(rnorm(1) > 0) rnorm(n,50,10)
@jeffwong
jeffwong / benchmark.R
Last active August 29, 2015 13:57
Benchmarking parallel strategies in R
naiveTask.local = function() {
foreach(i = 1:10000, .inorder=T) %do% {rnorm(500)}
}
naiveTask.parallel = function() {
require(doMC)
registerDoMC()
foreach(i = 1:10000, .inorder=T) %dopar% {rnorm(500)}
}
batchTask.local = function() {
@jeffwong
jeffwong / getFactorGrid.R
Last active January 4, 2016 05:19
Compute all possible combinations of factor variables in a data frame
getFactorGrid = function(df) {
factors = getFactors(df)
df.factors = df[,factors,with=F]
unique(df.factors)
}
getFactors = function(df) {
classes = lapply(df, data.class)
colnames(df)[which(classes %in% c('character', 'factor'))]
}
@jeffwong
jeffwong / is_ec2.R
Created January 21, 2014 07:35
Determine if a machine is an ec2 machine
is_ec2 = function() {grepl("^[A-Za-z0-9].*-i-[A-Za-z0-9].*$", system('hostname', intern=T))}
nYearsAgo = function(t, n) {
as.Date(sprintf("%s%s", as.numeric(substr(as.character(t), 1, 4)) - n, substr(as.character(t), 5, 10)), format = "%Y-%m-%d")
}