Skip to content

Instantly share code, notes, and snippets.

@johnjosephhorton
johnjosephhorton / bls_data_get.py
Created October 2, 2011 03:23
Get data from a poorly formatted BLS table using Python
import urllib2
import csv
FIRST_LINE = 11
LAST_LINE = 38
def get_level(l):
for i, char in enumerate(l):
if char != " ":
break
def f(x):
return x**2
@johnjosephhorton
johnjosephhorton / nfl_mashup.R
Created October 8, 2011 18:05
NFL Mashup with R
library(lme4)
library(ggplot2)
library(XML)
# grab the NFL data & compute the score difference (Home - Away)
nfl.raw <- read.csv("http://www.repole.com/sun4cast/stats/nfl2011stats.csv")
nfl.raw$delta <- with(nfl.raw, (ScoreOff - ScoreDef))
# fit the model
m <- lmer(delta ~ (1 | TeamName) + (1|Opponent), data = nfl.raw)
@johnjosephhorton
johnjosephhorton / get_machine_learning_wages_on_odesk.py
Created November 26, 2011 19:22
Get the hourly wages of machine learning contractors on oDesk
# John Horton
# www.john-joseph-horton.com
# Description: Answer to Quora question about machine learning hourly rates
# "http://www.quora.com/Machine-Learning/What-do-contractors-in-machine-learning-charge-by-the-hour"
from BeautifulSoup import BeautifulSoup
import urllib2
def contractors(skill, offset):
@johnjosephhorton
johnjosephhorton / optimal_apps.R
Created January 27, 2012 21:48
Optimal number of applications to send
df$s.star <- with(df, log(-c/log(1-q))/log(1-q))
df$s.star[df$s.star < 0] <- 0
g.optimal <- ggplot(df, aes(x = q, y = s.star, colour=factor(c))) + geom_line(aes(group=factor(c))) +
xlab("Probability that an application is successful") +
ylab("Optimal number of applications to send")
png("optimal_apps.png")
print(g.optimal)
dev.off()
@johnjosephhorton
johnjosephhorton / stereotypes_about_animals.py
Created February 4, 2012 21:02
Stereotypes about animals
import networkx as nx
import matplotlib.pyplot as plt
relationships = {
'cats':['cute', 'clean', 'curious', 'lazy'],
'children':['cruel', 'happy', 'mean', 'stupid'],
'cows':['fat', 'sacred to hindus', 'stupid', 'sacred'],
'dogs':['loyal', 'cute', 'loving', 'happy'],
'frogs':['happy', 'slimy', 'important', 'sensitive to pollution'],
'goldfish':['dirty', 'good', 'addicting', 'hard to keep alive'],
@johnjosephhorton
johnjosephhorton / .emacs
Created February 5, 2012 06:09
My .emacs file
(add-to-list 'load-path "~/.emacs.d/")
(add-to-list 'load-path (expand-file-name "~/elisp/org-mode/lisp"))
(add-to-list 'auto-mode-alist '("\\.\\(org\\ |org_archive\\|txt\\)$" . org-mode))
(setq org-agenda-files '("/tmp/test.org"))
(require 'org-install)
(require 'org-habit)
(global-set-key "\C-cl" 'org-store-link)
(global-set-key "\C-ca" 'org-agenda)
@johnjosephhorton
johnjosephhorton / india.R
Created March 6, 2012 21:35
Code for making a plot of contractor locations
library(ggplot2)
library(sqldf)
# using the dataset 'raw' wich is a list of contractors by lat, long
df <- sqldf("SELECT COUNT(*) AS num, LocLat, LocLong
FROM raw GROUP BY LocLat, LocLong")
g <- qplot(LocLong,LocLat, colour=num, size=log(num), data = df)
png("india.png")
print(g)
dev.off()
@johnjosephhorton
johnjosephhorton / sql_to_R.R
Created March 7, 2012 02:20
Code snipped for turning Postgresql stdout of a table into an R data frame
get.sql.out <- function(file, nrows = -1){
raw <- read.table(file, header=TRUE, nrow = nrows, sep="|", fill=TRUE, strip.white=TRUE)
num.rows <- dim(raw)[1]
raw$index <- 1:num.rows
raw <- subset(raw, index != 1 & index != num.rows)
raw
}
@johnjosephhorton
johnjosephhorton / db.sh
Created April 13, 2012 18:20
Bash script to move a file to your public dropbox folder and copy the url to the clip board
#!/bin/bash
cp $1 ~/Dropbox/Public/
echo "http://dl.dropbox.com/u/<your id>/"$1 | xclip -i -selection clipboard