Skip to content

Instantly share code, notes, and snippets.

Zachary M. Jones zmjones

Block or report user

Report or block zmjones

Hide content and notifications from this user.

Learn more about blocking users

Contact Support about this user’s behavior.

Learn more about reporting abuse

Report abuse
View GitHub Profile
@zmjones
zmjones / decay.R
Last active Oct 12, 2015
time since event and decaying cumulative sums for binary time series
View decay.R
decay <- function(yvar, d) {
# yvar: a binary variable
# d: number of periods
# returns: the cumulative sum of yvar at each point
# where each addition to the count decays away after d periods
yvar[is.na(yvar)] <- 0
run <- cumsum(yvar)
tvar <- seq_along(yvar)
run <- 0
sum <- 0
@zmjones
zmjones / cv.R
Last active Dec 31, 2015
k-fold cross-validation using generic fitting and loss functions
View cv.R
require(parallel)
validate.cv <- function(df, folds, resamples, model, loss, cores) {
mclapply(1:resamples, function(x) {
df$folds <- sample(rep(1:folds, length.out = nrow(df)))
lapply(1:folds, function(test) {
fit <- model(df[df$folds != test, ])
loss(fit, df[df$folds == test, ])
})}, mc.cores = CORES)
}
@zmjones
zmjones / pss2013.py
Last active Jan 2, 2016
scrape all of the of Peace Science presentations from the 2013 meeting
View pss2013.py
import requests
from bs4 import BeautifulSoup
def find_links(url, find_file=False):
soup = BeautifulSoup(requests.get(url).content)
try:
links = soup.find('div', id='content-core').find_all('a')
if find_file:
links = [link['href'] + '/' + link.contents[2].strip()
@zmjones
zmjones / futurama.R
Last active Jan 2, 2016
scrapes and visualizes ratings for Futurama from IMDB
View futurama.R
require(ggplot2)
require(plyr)
df <- read.csv("futurama.csv", colClasses = "character")
df[, c("season", "episode")] <- ldply(strsplit(as.character(df$episode), ".", fixed = TRUE))
ind <- by(df, list(df$season), function(x) {
x <- x[order(as.integer(x$episode)), ]
row.names(x)
})
df$title <- factor(df$title, levels = df$title[as.integer(unlist(ind))])
@zmjones
zmjones / maddison-new.R
Last active Jan 2, 2016
reshape the Maddison historical GDP and population data
View maddison-new.R
pkgs <- c("reshape2", "gdata", "countrycode")
invisible(lapply(pkgs, require, character.only = TRUE))
mpd <- read.xls("http://www.ggdc.net/maddison/maddison-project/data/mpd_2013-01.xlsx",
skip = 2, check.names = FALSE)
mpd <- mpd[, !apply(mpd, 2, function(x) all(is.na(x)))]
names(mpd)[1] <- "year"
names(mpd)[124] <- "Byzantium_Ottoman_Empire_Turkey"
colnames(mpd) <- trim(gsub("^[0-9]+|\\.|\\(|\\)|(&amp;)|'", "", colnames(mpd)))
colnames(mpd) <- gsub("-\\s+|/\\s+", "-", colnames(mpd))
@zmjones
zmjones / parses3logs.py
Created Feb 7, 2014
parse and clean log files from AWS S3
View parses3logs.py
import csv
import os
import re
import dateutil
import pandas as pd
from urlparse import urlparse
log_path = ''
# parsing code: http://ferrouswheel.me/2010/01/python_tparse-fields-in-s3-logs/
log_entries = []
@zmjones
zmjones / partial_dependence_party.R
Last active Aug 29, 2015
parallel calculation of marginal or joint dependence of explanatory variables from a party random forest
View partial_dependence_party.R
pkgs <- c("party", "parallel")
invisible(lapply(pkgs, require, character.only = TRUE))
partial_dependence <- function(fit, ivar, cores = 1, ...) {
## calculates the partial dependence of the response on explanatory variable(s)
## fit must be a party object
## ivar must be a character vector of length >= 1 all of which
## exist in the dataframe used to fit the model
## if the length of ivar > 1, joint dependence is calculated
df <- data.frame(get("input", fit@data@env), get("response", fit@data@env))
@zmjones
zmjones / example.R
Last active Aug 29, 2015
shiny/ggvis version of plotLearningCurve
View example.R
library(mlr) ## head
library(shiny)
r = generateLearningCurve(list("classif.rpart", "classif.knn"),
task = sonar.task, percs = seq(0.2, 1, by = 0.2),
measures = list(tpr, fpr, fn, fp),
resampling = makeResampleDesc(method = "Subsample", iters = 5),
show.info = FALSE)
plotLearningCurve(r, interactive = TRUE)
@zmjones
zmjones / example.R
Last active Aug 29, 2015
partial dependence for supervised methods
View example.R
library(mlr)
library(checkmate)
fr = train("regr.rpart", bh.task)
dr = generatePartialPredictionData(fr, getTaskData(bh.task), c("lstat", "chas"))
plotPartialPrediction(dr, facet = "chas")
fc = train("classif.rpart", iris.task)
dc = generatePartialPredictionData(fc, getTaskData(iris.task), c("Petal.Width", "Petal.Length"),
function(x) table(x) / length(x))
@zmjones
zmjones / example.R
Last active Aug 29, 2015
calibration plots for MLR
View example.R
lrns = list(makeLearner("classif.rpart", predict.type = "prob"),
makeLearner("classif.nnet", predict.type = "prob"))
fit = lapply(lrns, train, task = iris.task)
pred = lapply(fit, predict, task = iris.task)
names(pred) = c("rpart", "nnet")
out = generateCalibrationData(pred)
plotCalibration(out)
fit = lapply(lrns, train, task = sonar.task)
pred = lapply(fit, predict, task = sonar.task)
You can’t perform that action at this time.