Drew Conway drewconway

## frum_viz.R
# Visualize data and save
png("Tax_percentage.png",height=1000,width=1000,res=100)
ggplot(frum_data,aes(tax1))+stat_density()+geom_vline(aes(xintercept=31.5,colour="Actual Value"))+opts(title="In approximate percentage terms, how much is the U.S. (federal)
    government currently taking out of the U.S. economy in taxation?",plot.title=theme_text(size = 12))+xlab("Simulated Tea Party Survey Response Distribution")+ylab("Density")
dev.off()

png("Tax_family.png",height=1000,width=1000,res=100)
ggplot(frum_data,aes(tax2))+stat_density()+geom_vline(aes(xintercept=7.5,colour="Actual Value"))+opts(title="How much federal income tax do you think a typical family
    earning $50,000 pays (in 1,000 dollars)?",plot.title=theme_text(size = 12))+xlab("Simulated Tea Party Survey Response Distribution")+ylab("Density")
dev.off()

## frum_data_gen.R
# From the FrumForum.com Tea Party survey we see the following result for
# the questions:

# Question 1: In approximate percentage terms, how much is the U.S. (federal)
# government currently taking out of the U.S. economy in taxation?
#
# Mean:     42.06%
# STD:      19.06%
# Actual:   31.5%
#

## nyc_meetup_timeseries.R
### Meetup activity history ###

joins<-read.csv("New_York_R_Statistical_Programming_Meetup_Groups_Joins.csv")
rsvp<-read.csv("New_York_R_Statistical_Programming_Meetup_RSVPs.csv")
activity<-read.csv("New_York_R_Statistical_Programming_Meetup_Total_and_Active_Members.csv")

# Merge the data into a single frame
all<-merge(joins,rsvp,by="Date",all=TRUE)
all<-merge(all,activity,by="Date",all=TRUE)
all_dates<-as.vector(all$Date)

## nyc_meetup_wordclous.R
library(ggplot2)
library(XML)

### Meetup topics word cloud ###

# Get the raw meetup description into a dataframe
raw_desc<-levels(read.table('descriptions.txt',sep="\n")$V1)
clean_strings<-function(s){
    low<-tolower(s)
    clean<-gsub("[[:punct:]\n]","",low)

## haiti_analysis.R
library(geoR)
library(geoRglm)

# Perfrom MCMC simulations
model<-list(cov.pars = c(1, 1), beta = 1, family = "poisson")
mcmc.test<-mcmc.control(S.scale = 0.45, thin = 1)
test.tune<-glsm.mcmc(haiti.geo, model = model, mcmc.input=mcmc.test)
haiti.mcmc<-prepare.likfit.glsm(test.tune)
prior<- prior.glm.control(phi.prior = "fixed", phi = .1)

## Scotch_Pref.R
# File-Name:       Scotch_Pref.R
# Date:            2009-11-29
# Author:          Drew Conway
# Purpose:         Display one-dimensional item response for scotch whiskey preference
# Data Used:       whiskey, package=flexmix
# Packages Used:   zelig,ggplot
# Output File:     scotch_pref.png
# Data Output:
# Machine:         Drew Conway's MacBook

## cpi_oprobit.R
# File-Name:       cpi_oprobit.R
# Date:            2009-11-17
# Author:          Drew Conway
# Purpose:         Quick ordered probit analysis of the Corruption Perceptions Index 2009
#                   to check for effect of number of surveys used on CPI scores
# Data Used:       corruption_index.csv
#  available here: http://www.drewconway.com/zia/wp-content/uploads/2009/11/corruption_index.csv
# Packages Used:   foreign,Zelig
# Output File:
# Data Output:

## currency_converter.r
# File-Name:       currency_converter.R
# Date:            2009-11-17
# Author:          Drew Conway
# Purpose:         Convert currency data
# Data Used:       vc_invests.csv
# Packages Used:   foreign,XML
# Output File:     vc_invests_USD.csv
# Data Output:
# Machine:         Drew Conway's MacBook


## gist:137954
def write_data(data,path,new_path):
# Takes data dict and writes new data to a new file
    reader=csv.reader(open(path,'U'),delimiter=',')
    writer=csv.writer(open(new_path,"w"))
    row_num=0
    for row in reader:
        if row_num<1:
        # Keep ther same column headers as before, so we simply
        # re-write the first row.
            writer.writerow(row)

## gist:137653
import html5lib
from html5lib import treebuilder

def parse_data(player_urls):
# Returns a dict of player data parse trees indexed by player name
    # Create a dict indexed by player names
    player_data=dict.fromkeys(player_urls.keys())
    # Download player profile data and parse using html5lib
    for name in player_urls.keys():
    # html5lib integrates the easy-to-use BeautifulSoup parse tree using the treebuilders library.
	# Visualize data and save
	png("Tax_percentage.png",height=1000,width=1000,res=100)
	ggplot(frum_data,aes(tax1))+stat_density()+geom_vline(aes(xintercept=31.5,colour="Actual Value"))+opts(title="In approximate percentage terms, how much is the U.S. (federal)
	government currently taking out of the U.S. economy in taxation?",plot.title=theme_text(size = 12))+xlab("Simulated Tea Party Survey Response Distribution")+ylab("Density")
	dev.off()

	png("Tax_family.png",height=1000,width=1000,res=100)
	ggplot(frum_data,aes(tax2))+stat_density()+geom_vline(aes(xintercept=7.5,colour="Actual Value"))+opts(title="How much federal income tax do you think a typical family
	earning $50,000 pays (in 1,000 dollars)?",plot.title=theme_text(size = 12))+xlab("Simulated Tea Party Survey Response Distribution")+ylab("Density")
	dev.off()
	# From the FrumForum.com Tea Party survey we see the following result for
	# the questions:

	# Question 1: In approximate percentage terms, how much is the U.S. (federal)
	# government currently taking out of the U.S. economy in taxation?
	#
	# Mean: 42.06%
	# STD: 19.06%
	# Actual: 31.5%
	#
	### Meetup activity history ###

	joins<-read.csv("New_York_R_Statistical_Programming_Meetup_Groups_Joins.csv")
	rsvp<-read.csv("New_York_R_Statistical_Programming_Meetup_RSVPs.csv")
	activity<-read.csv("New_York_R_Statistical_Programming_Meetup_Total_and_Active_Members.csv")

	# Merge the data into a single frame
	all<-merge(joins,rsvp,by="Date",all=TRUE)
	all<-merge(all,activity,by="Date",all=TRUE)
	all_dates<-as.vector(all$Date)
	library(ggplot2)
	library(XML)

	### Meetup topics word cloud ###

	# Get the raw meetup description into a dataframe
	raw_desc<-levels(read.table('descriptions.txt',sep="\n")$V1)
	clean_strings<-function(s){
	low<-tolower(s)
	clean<-gsub("[[:punct:]\n]","",low)
	library(geoR)
	library(geoRglm)

	# Perfrom MCMC simulations
	model<-list(cov.pars = c(1, 1), beta = 1, family = "poisson")
	mcmc.test<-mcmc.control(S.scale = 0.45, thin = 1)
	test.tune<-glsm.mcmc(haiti.geo, model = model, mcmc.input=mcmc.test)
	haiti.mcmc<-prepare.likfit.glsm(test.tune)
	prior<- prior.glm.control(phi.prior = "fixed", phi = .1)
	# File-Name: Scotch_Pref.R
	# Date: 2009-11-29
	# Author: Drew Conway
	# Purpose: Display one-dimensional item response for scotch whiskey preference
	# Data Used: whiskey, package=flexmix
	# Packages Used: zelig,ggplot
	# Output File: scotch_pref.png
	# Data Output:
	# Machine: Drew Conway's MacBook
	# File-Name: cpi_oprobit.R
	# Date: 2009-11-17
	# Author: Drew Conway
	# Purpose: Quick ordered probit analysis of the Corruption Perceptions Index 2009
	# to check for effect of number of surveys used on CPI scores
	# Data Used: corruption_index.csv
	# available here: http://www.drewconway.com/zia/wp-content/uploads/2009/11/corruption_index.csv
	# Packages Used: foreign,Zelig
	# Output File:
	# Data Output:
	# File-Name: currency_converter.R
	# Date: 2009-11-17
	# Author: Drew Conway
	# Purpose: Convert currency data
	# Data Used: vc_invests.csv
	# Packages Used: foreign,XML
	# Output File: vc_invests_USD.csv
	# Data Output:
	# Machine: Drew Conway's MacBook
	def write_data(data,path,new_path):
	# Takes data dict and writes new data to a new file
	reader=csv.reader(open(path,'U'),delimiter=',')
	writer=csv.writer(open(new_path,"w"))
	row_num=0
	for row in reader:
	if row_num<1:
	# Keep ther same column headers as before, so we simply
	# re-write the first row.
	writer.writerow(row)
	import html5lib
	from html5lib import treebuilder

	def parse_data(player_urls):
	# Returns a dict of player data parse trees indexed by player name
	# Create a dict indexed by player names
	player_data=dict.fromkeys(player_urls.keys())
	# Download player profile data and parse using html5lib
	for name in player_urls.keys():
	# html5lib integrates the easy-to-use BeautifulSoup parse tree using the treebuilders library.