Skip to content

Instantly share code, notes, and snippets.

View jeffwong's full-sized avatar

Jeffrey Wong jeffwong

  • San Francisco, CA
View GitHub Profile
@jeffwong
jeffwong / flare.json
Created November 17, 2012 23:43
my pack layout
{
"name": "flare",
"children": [
{
"name": "analytics",
"children": [
{
"name": "cluster",
"children": [
{"name": "AgglomerativeCluster", "size": 3938},
@jeffwong
jeffwong / mybins.R
Last active December 13, 2015 21:59
Binning numerics in R
mybins = function(x, numbreaks, weights, ordered=T) {
if (missing(weights)) x.w = x
else x.w = rep(x, times=weights)
breaks = quantile(x.w, seq(0,1,length.out=numbreaks+1))
bins = findInterval(x, breaks, rightmost.closed=T)
bins.f = factor(bins, levels = 1:numbreaks, ordered=ordered)
list(breaks = breaks, bins = bins.f, numbreaks = numbreaks, ordered = ordered)
}
mybins.assign = function(bins, newx) {
# Multiple plot function
#
# ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects)
# - cols: Number of columns in layout
# - layout: A matrix specifying the layout. If present, 'cols' is ignored.
#
# If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE),
# then plot 1 will go in the upper left, 2 will go in the upper right, and
# 3 will go all the way across the bottom.
#
@jeffwong
jeffwong / strstrip.R
Created March 6, 2013 21:12
Delete trailing whitespace from a string in R
strstrip = function(x) gsub("^\\s+|\\s+$", "", x)
@jeffwong
jeffwong / myscale.R
Last active December 14, 2015 19:29
Scale data frame with both continuous and binary variables
isBinaryVariable = function(x) {
apply(x, 2, function(j) {
return (identical(range(unique(j)), c(0,1)))
})
}
scaleBinary = function(j) {
p = mean(j)
j.sd = sqrt(p*(1-p))
(j - p) / j.sd
@jeffwong
jeffwong / myload.R
Last active December 15, 2015 02:59
Load an R object without overwriting things in the current workspace
myload = function(filename) {
tryCatch({
local({
objects = load(filename)
if (length(objects) == 1) return (get(objects))
else return (lapply(sapply, get))
})
}, error = function(e) {cat("No such file")})
}
@jeffwong
jeffwong / chunk.sh
Last active December 16, 2015 12:59
Break a file into chunks
#!/bin/bash
if [ $# -lt 3 ] ;
then
echo "Usage: $0 inputfile output_prefix lines_per_chunk [header=1]"
exit
fi
header=1
if [ $# -eq 4 ] ;
@jeffwong
jeffwong / hideNewLevels.R
Last active December 17, 2015 10:19
Hide new factor levels in a test set
require(data.table)
#Note that this permanently modifies test.data
hideNewLevels = function(train.data, test.data, replacement=NA, colnames.factor=NULL) {
if(is.null(colnames.factor)) colnames.factor = extractFactors(test.data)
for (col in colnames.factor) {
levels.test = levels(test.data[,get(col)])
levels.train = levels(train.data[,get(col)])
badlevels.indices = which(!levels.test %in% levels.train)
@jeffwong
jeffwong / getMemory.R
Last active December 17, 2015 10:19
Get memory usage for R objects loaded in the current workspace
.ls.objects <- function (pos = 1, pattern, head=FALSE, n=5) {
napply <- function(names, fn) sapply(names, function(x)
fn(get(x, pos = pos)))
names <- ls(pos = pos, pattern = pattern)
obj.class <- napply(names, function(x) as.character(class(x))[1])
obj.mode <- napply(names, mode)
obj.type <- ifelse(is.na(obj.class), obj.mode, obj.class)
obj.size <- napply(names, object.size)
obj.prettysize <- sapply(obj.size, function(r) prettyNum(r, big.mark = ",") )
obj.dim <- t(napply(names, function(x)
@jeffwong
jeffwong / bootstrapForecast.R
Last active December 17, 2015 17:19
Use multiple ARIMA models to bootstrap a confidence interval for the total
bootstrapForecast = function(..., newxreglist=NULL, inv.transform=NULL, runs=1000, n.ahead=90, CI.level = .95) {
models = list(...)
nummodels = length(models)
sims = vapply(1:runs, function(i) {
sim = vapply(1:nummodels, function(j) {
if (is.data.frame(newxreglist)) newxreg = newxreglist
else newxreg = newxreglist[[j]]
if (!is.null(inv.transform)) {
if(is.list(inv.transform)) inv = inv.transform[[j]]
else inv = inv.transform