Skip to content

Instantly share code, notes, and snippets.

View jeffwong's full-sized avatar

Jeffrey Wong jeffwong

  • San Francisco, CA
View GitHub Profile
@jeffwong
jeffwong / install_jupyterhub.sh
Created March 29, 2015 17:52
JupyterHub install
sudo aptitude install python3-setuptools
sudo apt-get install python3-dev libzmq3-dev libcurl4-openssl-dev
https://github.com/jupyter/jupyterhub.git
cd jupyterhub
sudo apt-get install npm nodejs-legacy
sudo npm install -g configurable-http-proxy
sudo easy_install3 pip
@jeffwong
jeffwong / hashStrings.R
Last active August 29, 2015 14:14
Hash and Unhash strings
hashStrings = function(x) {
sapply(x, function(i) {
sprintf("hash_%s", paste0(charToRaw(i), collapse=""))
})
}
unhashStrings = function(x) {
sapply(x, function(i) {
i = gsub("hash_", "", i)
i.split = strsplit(i, "")[[1]]
@jeffwong
jeffwong / RML.R
Last active August 29, 2015 14:10
Convert R data structures to text files for VW, FM, LIBSVM
write.libsvm = function(data, filename= "out.dat", class = 1) {
out = file(filename)
writeLines(apply(data, 1, function(X) paste(X[class],apply(cbind(which(X!=0)[-class], X[which(X!=0)[-class]]), 1, paste, collapse=":"), collapse=" ") ), out)
close(out)
}
#' x should be a model matrix
#' y is a response vector
write.vw = function(data, outcomeName, weightName = '', classification=F, file = 'vw.txt') {
#Getting sigopt to run on Ubuntu
sudo apt-get install gcc-multilib
wget http://ftp.gnu.org/gnu/glpk/glpk-4.39.tar.gz
tar -xzf glpk-4.39.tar.gz
cd glpk-4.39/
./configure
make
sudo make install
LD_LIBRARY_PATH=/usr/local/lib
@jeffwong
jeffwong / ipython.sh
Last active August 29, 2015 14:06
ipython data science
wget http://09c8d0b2229f813c1b93-c95ac804525aac4b6dba79b00b39d1d3.r79.cf1.rackcdn.com/Anaconda-2.1.0-Linux-x86_64.sh
bash Anaconda-2.1.0-Linux-x86.sh
#http://thomassileo.com/blog/2012/11/19/setup-a-remote-ipython-notebook-server-with-numpyscipymaltplotlibpandas-in-a-virtualenv-on-ubuntu-server/
@jeffwong
jeffwong / ggplot_utils.R
Last active August 29, 2015 14:05
ggplot utility functions
# Multiple plot function
#
# ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects)
# - cols: Number of columns in layout
# - layout: A matrix specifying the layout. If present, 'cols' is ignored.
#
# If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE),
# then plot 1 will go in the upper left, 2 will go in the upper right, and
# 3 will go all the way across the bottom.
#
@jeffwong
jeffwong / rhive.R
Created August 4, 2014 21:04
R to Hive utils
dbBuildTableDefinition.hive =
function (databasename, tablename, df, field.types = NULL, s3path)
{
if (!is.data.frame(df))
df <- as.data.frame(df)
if (is.null(field.types)) {
field.types <- lapply(df, hiveDataType)
}
flds <- paste(names(field.types), field.types)
base = sprintf("CREATE EXTERNAL TABLE %s.%s
@jeffwong
jeffwong / ggplot.coefs.R
Created June 22, 2014 22:47
ggplot for glmnet
plotCoefs = function(model, nonzero = T, subset.condition, ...) {
stopifnot(require(ggplot2))
coefs = coef(model)[,1]
coef.labels = attr(coefs, "names")
isLogistic = model$name == "Binomial Deviance"
if (isLogistic) coefs = exp(coefs)
coefs.df = data.frame(label = coef.labels, value = coefs)
coefs.df = coefs.df[order(coefs.df$value),]
coefs.df$label = factor(coefs.df$label, levels = coefs.df$label)
@jeffwong
jeffwong / prestoR.R
Created June 11, 2014 07:33
PrestoR
sudo apt-get install libcurl4-openssl-dev
install.packages(c('RCurl', 'jsonlite'))
@jeffwong
jeffwong / facetAdjust.R
Created June 6, 2014 17:56
Fix formatting problems when using facet_wrap
facetAdjust <- function(x, pos = c("up", "down"))
{
pos <- match.arg(pos)
p <- ggplot_build(x)
gtable <- ggplot_gtable(p); dev.off()
dims <- apply(p$panel$layout[2:3], 2, max)
nrow <- dims[1]
ncol <- dims[2]
panels <- sum(grepl("panel", names(gtable$grobs)))
space <- ncol * nrow