Skip to content

Instantly share code, notes, and snippets.

# Visualize data and save
png("Tax_percentage.png",height=1000,width=1000,res=100)
ggplot(frum_data,aes(tax1))+stat_density()+geom_vline(aes(xintercept=31.5,colour="Actual Value"))+opts(title="In approximate percentage terms, how much is the U.S. (federal)
government currently taking out of the U.S. economy in taxation?",plot.title=theme_text(size = 12))+xlab("Simulated Tea Party Survey Response Distribution")+ylab("Density")
dev.off()
png("Tax_family.png",height=1000,width=1000,res=100)
ggplot(frum_data,aes(tax2))+stat_density()+geom_vline(aes(xintercept=7.5,colour="Actual Value"))+opts(title="How much federal income tax do you think a typical family
earning $50,000 pays (in 1,000 dollars)?",plot.title=theme_text(size = 12))+xlab("Simulated Tea Party Survey Response Distribution")+ylab("Density")
dev.off()
# From the FrumForum.com Tea Party survey we see the following result for
# the questions:
# Question 1: In approximate percentage terms, how much is the U.S. (federal)
# government currently taking out of the U.S. economy in taxation?
#
# Mean: 42.06%
# STD: 19.06%
# Actual: 31.5%
#
### Meetup activity history ###
joins<-read.csv("New_York_R_Statistical_Programming_Meetup_Groups_Joins.csv")
rsvp<-read.csv("New_York_R_Statistical_Programming_Meetup_RSVPs.csv")
activity<-read.csv("New_York_R_Statistical_Programming_Meetup_Total_and_Active_Members.csv")
# Merge the data into a single frame
all<-merge(joins,rsvp,by="Date",all=TRUE)
all<-merge(all,activity,by="Date",all=TRUE)
all_dates<-as.vector(all$Date)
library(ggplot2)
library(XML)
### Meetup topics word cloud ###
# Get the raw meetup description into a dataframe
raw_desc<-levels(read.table('descriptions.txt',sep="\n")$V1)
clean_strings<-function(s){
low<-tolower(s)
clean<-gsub("[[:punct:]\n]","",low)
library(geoR)
library(geoRglm)
# Perfrom MCMC simulations
model<-list(cov.pars = c(1, 1), beta = 1, family = "poisson")
mcmc.test<-mcmc.control(S.scale = 0.45, thin = 1)
test.tune<-glsm.mcmc(haiti.geo, model = model, mcmc.input=mcmc.test)
haiti.mcmc<-prepare.likfit.glsm(test.tune)
prior<- prior.glm.control(phi.prior = "fixed", phi = .1)
# File-Name: Scotch_Pref.R
# Date: 2009-11-29
# Author: Drew Conway
# Purpose: Display one-dimensional item response for scotch whiskey preference
# Data Used: whiskey, package=flexmix
# Packages Used: zelig,ggplot
# Output File: scotch_pref.png
# Data Output:
# Machine: Drew Conway's MacBook
# File-Name: cpi_oprobit.R
# Date: 2009-11-17
# Author: Drew Conway
# Purpose: Quick ordered probit analysis of the Corruption Perceptions Index 2009
# to check for effect of number of surveys used on CPI scores
# Data Used: corruption_index.csv
# available here: http://www.drewconway.com/zia/wp-content/uploads/2009/11/corruption_index.csv
# Packages Used: foreign,Zelig
# Output File:
# Data Output:
# File-Name: currency_converter.R
# Date: 2009-11-17
# Author: Drew Conway
# Purpose: Convert currency data
# Data Used: vc_invests.csv
# Packages Used: foreign,XML
# Output File: vc_invests_USD.csv
# Data Output:
# Machine: Drew Conway's MacBook
def write_data(data,path,new_path):
# Takes data dict and writes new data to a new file
reader=csv.reader(open(path,'U'),delimiter=',')
writer=csv.writer(open(new_path,"w"))
row_num=0
for row in reader:
if row_num<1:
# Keep ther same column headers as before, so we simply
# re-write the first row.
writer.writerow(row)
import html5lib
from html5lib import treebuilder
def parse_data(player_urls):
# Returns a dict of player data parse trees indexed by player name
# Create a dict indexed by player names
player_data=dict.fromkeys(player_urls.keys())
# Download player profile data and parse using html5lib
for name in player_urls.keys():
# html5lib integrates the easy-to-use BeautifulSoup parse tree using the treebuilders library.