Skip to content

Instantly share code, notes, and snippets.

# Here's where I extract the database IDs and repeat them 50 times to make the column long enough for
# my new long-form dataset (596,100 rows)
client.data.new = rep(client.data[,1],50)
for (i in 2:32){
# for each column in the first 31 after the ID column, find the 49 matching columns
# to the right and stack them using melt
stacked.data = melt(client.data, id.vars="CnBio_ID", measure.vars=seq(i,(i+(31*49)),31), value.name=names(client.data)[i])
@inkhorn
inkhorn / toronto_casino.r
Created May 2, 2013 01:01
Casino Analysis
library(ff)
library(ffbase)
library(stringr)
library(ggplot2)
library(ggthemes)
library(reshape2)
library(RgoogleMaps)
# Loading 2 copies of the same data set so that I can convert one and have the original for its text values
casino = read.csv("/home/inkhorn/Downloads/casino_survey_results20130325.csv")
@inkhorn
inkhorn / toronto.casino.glm.r
Created May 17, 2013 18:57
toronto casino glm results
Call:
glm(formula = casino$Q6 == "City of Toronto" ~ GoBigorGoHome +
TechnicalDetails + Soc.Env.Issues, family = binomial(logit),
data = casino)
Deviance Residuals:
Min 1Q Median 3Q Max
-3.6426 -0.4745 -0.1156 0.4236 3.4835
Coefficients:
@inkhorn
inkhorn / adj.mun.cacsino.glm.r
Created May 17, 2013 18:59
adjacent municipality casino glm
Call:
glm(formula = casino$Q6 == "Adjacent Municipality" ~ GoBigorGoHome +
TechnicalDetails + Soc.Env.Issues, family = binomial(logit),
data = casino)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.0633 -0.7248 -0.5722 -0.3264 2.7136
Coefficients:
@inkhorn
inkhorn / neither.casino.glm.r
Created May 17, 2013 19:00
neither casino glm
Call:
glm(formula = casino$Q6 == "Neither" ~ GoBigorGoHome + TechnicalDetails +
Soc.Env.Issues, family = binomial(logit), data = casino)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.4090 -0.7344 -0.3934 0.8966 2.7194
Coefficients:
Estimate Std. Error z value Pr(>|z|)
@inkhorn
inkhorn / casino.geo.r
Last active October 30, 2023 12:44
not in my backyard casino analysis
library(ff)
library(ggthemes)
ffload(file="casino", overwrite=TRUE)
casino.orig$Outside.of.Toronto = as.ff(ifelse(casino.orig[,"City"] == "Toronto",0,1))
casino.in.toronto = glm(casino.orig[,"Q6"] == "City of Toronto" ~ Outside.of.Toronto, data=casino.orig, family=binomial(logit))
casino.outside.toronto = glm(casino.orig[,"Q6"] == "Adjacent Municipality" ~ Outside.of.Toronto, data=casino.orig, family=binomial(logit))
summary(casino.in.toronto)
@inkhorn
inkhorn / estimate_age.R
Last active December 20, 2015 09:19
Estimate Age from First Name in R
library(stringr)
library(plyr)
# We're assuming you've downloaded the SSA files into your R project directory.
file_listing = list.files()[3:135]
for (f in file_listing) {
year = str_extract(f, "[0-9]{4}")
if (year == "1880") { # Initializing the very long dataframe
name_data = read.csv(f, header=FALSE)
@inkhorn
inkhorn / ebike.r
Created September 13, 2013 00:31
E-bike Survey Analysis
library(rpart)
library(plyr)
library(rpart.plot)
ebike = read.csv("E-Bike_Survey_Responses.csv")
# This next part is strictly to change any blank responses into NAs
ebike[,2:10][ebike[,2:10] == ''] = NA
# In this section we use mapvalues from the plyr package to get rid of blanks, but also
@inkhorn
inkhorn / daycares.R
Created October 17, 2013 02:18
Daycare Analysis
library(ff)
library(ffbase)
library(RgoogleMaps)
library(plyr)
addTrans <- function(color,trans)
{
# This function adds transparancy to a color.
# Define transparancy with an integer between 0 and 255
# 0 being fully transparant and 255 being fully visable
@inkhorn
inkhorn / enron processing.py
Last active February 5, 2017 18:12
Script to read, filter, and output all enron emails into many files in one directory
docs = []
from os import listdir, chdir
import re
# Here's my attempt at coming up with regular expressions to filter out
# parts of the enron emails that I deem as useless.
email_pat = re.compile(".+@.+")
to_pat = re.compile("To:.+\n")