Skip to content

Instantly share code, notes, and snippets.

View MichaelChirico's full-sized avatar

Michael Chirico MichaelChirico

View GitHub Profile
library(data.table)
URL <- paste0("https://gist.githubusercontent.com/MichaelChirico/",
"67d8dcec64ea6cb7caf2/raw/22f7eb750c1ba8175463ae48901285934a5dadf8/annual.csv")
annual <- fread(URL)
annual.m <-
melt(annual, id.vars = "year"
)[ , paste0("V", 1:3) :=
@MichaelChirico
MichaelChirico / get_cor.R
Created March 29, 2016 17:50
cor from matrix data.frame
library(data.table)
DT <- as.data.table(ChickWeight)[ , lapply(.SD, as.numeric)]
DF <- as.data.frame(cor(DT))
#^ here's what you have from Stata^
#convert to matrix
M <- as.matrix(DF)
#axe the lower triangle
M[lower.tri(M, diag = TRUE)] <- NA
@MichaelChirico
MichaelChirico / philly_private_schools.R
Last active March 30, 2016 22:38
scraping private school demographic info
library(rvest)
library(data.table)
URL1 <- paste0("http://greatphillyschools.org/",
"schools?Public%20Special%20Admission=0&",
"Public%20District=0&Public%20Charter=0")
URL2 <- gsub("schools?", "schools?page=2&", URL1, fixed = TRUE)
urls1 <- html(URL1) %>%
DT <- structure(list(sport = c("NHL", "NHL", "NHL", "NHL", "NHL", "NHL",
"NHL", "NHL", "NHL", "NHL", "NHL", "NHL", "NHL", "NHL", "NHL",
"NHL", "NHL", "NHL", "NHL", "NHL", "NHL", "NHL", "NHL", "NHL",
"NHL", "NHL", "NHL", "NHL", "NHL", "NHL", "NHL", "NHL", "NHL",
"NHL", "NHL", "NHL", "NHL", "NHL", "NHL", "NHL", "NHL", "NHL",
"NHL", "NHL", "NHL", "NHL", "NHL", "NHL", "NHL", "NHL", "NHL",
"NHL", "NHL", "NHL", "NHL", "NHL", "NHL", "NHL", "NHL", "NHL",
"NHL", "NHL", "NHL", "NHL", "NHL", "NHL", "NHL", "NHL", "NHL",
"NHL", "NHL", "NHL", "NHL", "NHL", "NHL", "NHL", "NHL", "NHL",
"NHL", "NHL", "NHL", "NHL", "NHL", "NHL", "NHL", "NHL", "NHL",
@MichaelChirico
MichaelChirico / iv_pred_sim.R
Last active September 12, 2016 00:43
Simulating ways to get predictions from a fitted IV regression
nn = 1e6
set.seed(9112016)
#fake data
z = rnorm(nn)
v = rnorm(nn)
err1 = rnorm(nn)
err2 = rnorm(nn)
#z & v influence x
x = 2 + 3 * z + 2 * v + err1
@MichaelChirico
MichaelChirico / test.R
Created November 22, 2016 16:48
testing quantiles
set.seed((1234))
library(data.table)
annual_data <- data.table(year=1991:2000,ret=rnorm(200),group=1:20)
annual_data[ , decile2 := funchir::create_quantiles(ret, 10), by = year]
yearly_deciles <- annual_data[,data.table(t(quantile(ret,probs=seq(from=.1, to = 1, length.out=10)))),by=year]
find_decile <- function(thisReturn,year,decile_table){
for(i in 2:ncol(decile_table)){
library(data.table)
dict.orig = tolower(readLines("/usr/share/dict/american-english"))
#words shorter than the longest padded with "" for simpler retrieval
dictDT = setDT(tstrsplit(dict.orig, split = "", fill = ""))
#lookup table for conversion
lookup = data.table(num = c(rep(2L, 3), rep(3L, 3), rep(4L, 3),
rep(5L, 3), rep(6L, 3), rep(7L, 4),
rep(8L, 3), rep(9L, 4)),
We can make this file beautiful and searchable if this error is corrected: It looks like row 9 should actually have 23 columns, instead of 11. in line 8.
EST,Max TemperatureF,Mean TemperatureF,Min TemperatureF,Max Dew PointF,MeanDew PointF,Min DewpointF,Max Humidity, Mean Humidity, Min Humidity, Max Sea Level PressureIn, Mean Sea Level PressureIn, Min Sea Level PressureIn, Max VisibilityMiles, Mean VisibilityMiles, Min VisibilityMiles, Max Wind SpeedMPH, Mean Wind SpeedMPH, Max Gust SpeedMPH,PrecipitationIn, CloudCover, Events, WindDirDegrees
2017-1-1,51,42,34,29,26,24,75,48,30,30.42,30.19,29.94,10,10,10,18,12,25,0.00,4,,274
2017-1-2,43,38,32,41,37,25,96,88,66,30.45,30.35,30.19,10,6,2,15,8,25,0.16,8,Rain,52
2017-1-3,48,44,41,46,42,39,97,93,89,30.18,29.79,29.53,10,4,2,15,11,28,0.20,8,Rain,31
2017-1-4,54,45,35,46,36,10,96,74,14,29.83,29.58,29.48,10,6,2,24,12,34,0.01,6,Rain,250
2017-1-5,34,31,28,27,17,8,92,58,23,30.05,29.98,29.85,10,8,1,21,11,28,0.03,7,Snow,253
2017-1-6,32,29,26,28,21,9,93,70,31,30.31,30.05,29.92,10,7,0,13,5,17,0.04,8,Snow,336
2017-1-7,24,22,19,18,13,4,89,68,33,30.33,30.24,30.18,10,5,0,17,13,25,0.08,7,Fog-Snow,1
2017-1-8,24,20,15,7,3,-1,57,46,29,
CATEGORY CALL_GROUP N
1: STREET CRIMES DISORDER 216246
2: STREET CRIMES PERSON CRIME 68546
3: OTHER SUSPICIOUS 116233
4: OTHER DISORDER 170351
5: OTHER NON CRIMINAL/ADMIN 173143
6: OTHER PERSON CRIME 10314
7: OTHER PROPERTY CRIME 137229
8: OTHER TRAFFIC 93484
9: MOTOR VEHICLE THEFT PROPERTY CRIME 14946
@MichaelChirico
MichaelChirico / bug.csv
Created February 23, 2017 16:56
segfault bug
ID V
16227 0
16228 0
16229 0
16230 0
16232 0
16234 0
16235 0
16236 0
16237 0