Skip to content

Instantly share code, notes, and snippets.

View seanjtaylor's full-sized avatar

Sean J. Taylor seanjtaylor

View GitHub Profile
library(rvest)
library(dplyr)
html.doc <- read_html('http://www.footballoutsiders.com/stat-analysis/2016/quarterbacks-and-progression-air-yards')
# Extract table
raw.table <- html.doc %>%
html_table() %>%
first
library(rvest)
library(dplyr)
library(stringr)
library(ggplot2)
tbls <- read_html('https://en.wikipedia.org/wiki/List_of_serial_killers_by_number_of_victims') %>% html_table()
t1 <- tbls[[1]] %>% select(name = Name, country = Country, years = `Years active`, victims = `Proven victims`) %>% mutate(victims = as.character(victims))
t2 <- tbls[[2]] %>% select(name = Name, country = Country, years = `Years active`, victims = `Proven victims`)
@seanjtaylor
seanjtaylor / air_yards_plot.R
Created October 1, 2016 21:03
Plotting Air Yards Auto-correlation
library(dplyr)
library(ggplot2)
library(rvest)
library(tidyr)
html.doc <- read_html('http://www.footballoutsiders.com/stat-analysis/2016/quarterbacks-and-progression-air-yards')
# Extract table
raw.table <- html.doc %>%
html_table() %>%
@seanjtaylor
seanjtaylor / gamling_odds_implied_wins.R
Created September 8, 2016 15:26
Analysis of Season Win O/Us
library(rvest)
library(stringr)
library(dplyr)
library(ggplot2)
library(tidyr)
min.fun <- function(p, thresh.prob = NULL, thresh = NULL) {
(thresh.prob - pbinom(thresh, 16, p))^2
}
@seanjtaylor
seanjtaylor / nfl_officials_analysis.R
Created June 15, 2016 16:04
Quick analysis of NFL Officials
library(rvest)
library(stringr)
library(readr)
library(ggplot2)
library(dplyr)
library(tidyr)
library(broom)
library(lubridate)
base.url <- 'http://www.pro-football-reference.com/'
@seanjtaylor
seanjtaylor / btl_example.R
Last active August 29, 2015 14:25
How to estimate a BTL model in R.
df <- data.frame(higher = c('US', 'CA', 'MX'),
lower = c('CA', 'MX', 'MX'))
levels <- c('US', 'CA', 'MX')
X.l <- model.matrix(~ 0 + factor(higher, levels = levels), data = df)
X.r <- model.matrix(~ 0 + factor(lower, levels = levels), data = df)
X <- X.l - X.r
colnames(X) <- levels # makes it easier to interpret regression output
@seanjtaylor
seanjtaylor / gist:568141f04a16d518be24
Created February 11, 2015 01:46
Reshaping a Pandas dataframe into a sparse matrix
import pandas as pd
import scipy.sparse as sps
df = pd.DataFrame({'tag1': ['sean', 'udi', 'bogdan'], 'tag2': ['sean', 'udi', 'udi'], 'freq': [1,2,3]})
# tag1 -> rows, tag2 -> columns
df.set_index(['tag1', 'tag2'], inplace=True)
mat = sps.coo_matrix((df.freq, (df.index.labels[0], df.index.labels[1])))
print(mat.todense())
library(mgcv)
library(ggplot2)
library(dplyr)
library(XML)
library(weatherData)
us.airports.url <- 'http://www.world-airport-codes.com/us-top-40-airports.html'
us.airports <- readHTMLTable(us.airports.url)[[1]] %>%
filter(!is.na(IATA)) %>%
@seanjtaylor
seanjtaylor / index.html
Last active December 23, 2015 07:59
Creds Viz take 2
<html>
<head>
<style>
rect.background {
fill: #EEE;
}
line.rule {
stroke: #FFF;
}
text.labels {