Daniel Marcelino dmarcelinobr

## simulation.r
N < - 8619170             # population size of voters
ss <- c(1000, 1200, 3000) # samples size
p <- .27                 # .27 Serra .25 Haddad .19 Russomano true population proportion
nsim <- 100              # number of simulations

pop.prob <- list()
for (i in 1:length(ss)) {
  n <- ss[i]
  x <- rhyper(nsim, N * p, N * (1 - p), n)
  pop.prob[[i]] <- x / n

## mapbr.r
AC <- lm(PVOTOS~PGASTOS+I(PGASTOSc^2),

subset(dados, p==5 & t==2006 & j==”AC”),

na.action=na.omit)

ac <- as.data.frame(summary(AC)$coef)

ac <- ac[-c(1,3),]

## plots.R
# Fake data
y<- sample(10, 100, rep=T)
x <- rnorm(100)

# plots
par(mfrow=c(2,3)) # open an object
plot(x,y) # First plot
title("Default plot")

plot(x,y, axes = FALSE) # Second plot

## parallel.R
#to make the final table, I changed manually the n size every trial by: 1e3, 1e4, 1e5, 1e6, 1e7. Then I also chnaged the benchmark object 'res'
n <- 1e7
set.seed(51)
process <- data.frame(id=sample(100, n, rep=T), x=rnorm(n), y=runif(n), z=rpois(n, 1) pexp(2, rate=1/3) )

all <- multicore:::detectCores(all.tests=TRUE)

if(!require(rbenchmark)){
install.packages("rbenchmark")
} else{

## border.R
#-------------------------------------------------------------------------------
# Generic panel border (can set any combination of left/right/top/bottom)
#-------------------------------------------------------------------------------

theme_border <- function(
        type = c("left", "right", "bottom", "top", "none"),
        colour = "black", size = 1, linetype = 1) {
    # use with e.g.: ggplot(...) + opts( panel.border=theme_border(type=c("bottom","left")) ) + ...
    type <- match.arg(type, several.ok=TRUE)
    structure(

## stat_density2d.R

# Simulate randomly-distributed data
nObs <- 5000
myData <- data.frame(X = rnorm(nObs), Y = rnorm(nObs))
nClusters <- 7  # Cluster it
kMeans <- kmeans(myData, centers = nClusters)
myData$Cluster <- as.factor(kMeans$cluster)

# Plot points colored by cluster
p1 <- ggplot(myData,

## Latent_Class_Analysis.R
doInstall <- TRUE
toInstall <- c("ggplot2", "poLCA", "reshape2")
if(doInstall){install.packages(toInstall, repos = "http://cran.us.r-project.org")}
lapply(toInstall, library, character.only = TRUE)

ANES <- read.csv("http://www.oberlin.edu/faculty/cdesante/assets/downloads/ANES.csv")
ANES <- ANES[ANES$year == 2008, -c(1, 11, 17)]  # Limit to just 2008 respondents,
head(ANES)  # remove some non-helpful variables
# Adjust so that 1 is the minimum value for each variable:
ANES <- data.frame(apply(ANES, 2, function(cc){  cc - min(cc, na.rm = T) + 1  }))

## Logit.R
# http://en.wikipedia.org/wiki/Logit#Definition

# Logit: From probability to normal
# Logistic (inverse logit): From normal to probability

Logit <- function(p){log(p / (1 - p))}
Logistic <- function(x){exp(x) / (1 + exp(x))}

## sdfBeta.R
# See: http://stats.stackexchange.com/questions/12148/looking-for-a-name-for-a-mean-influencing-statistic
# Somewhat personalized

sdfBeta <- function(numer, denom = 1){
  if(identical(1, denom)){denom <- rep(1, length(numer))}
  Bj <- sum(numer, na.rm = T) / sum(denom, na.rm = T)
  Bjni <- (sum(numer, na.rm = T) - numer) / (sum(denom, na.rm = T) - denom)
  StdError <- sd(Bjni, na.rm = T)
  Value <- (Bj - Bjni) / StdError
  names(Value) <- names(numer)

## SpaceTrim.R
spaceTrim <- function(x){  gsub("(^ +)|( +$)", "", x)  }
	N < - 8619170 # population size of voters
	ss <- c(1000, 1200, 3000) # samples size
	p <- .27 # .27 Serra .25 Haddad .19 Russomano true population proportion
	nsim <- 100 # number of simulations

	pop.prob <- list()
	for (i in 1:length(ss)) {
	n <- ss[i]
	x <- rhyper(nsim, N * p, N * (1 - p), n)
	pop.prob[[i]] <- x / n
	AC <- lm(PVOTOS~PGASTOS+I(PGASTOSc^2),

	subset(dados, p==5 & t==2006 & j==”AC”),

	na.action=na.omit)

	ac <- as.data.frame(summary(AC)$coef)

	ac <- ac[-c(1,3),]
	# Fake data
	y<- sample(10, 100, rep=T)
	x <- rnorm(100)

	# plots
	par(mfrow=c(2,3)) # open an object
	plot(x,y) # First plot
	title("Default plot")

	plot(x,y, axes = FALSE) # Second plot
	#to make the final table, I changed manually the n size every trial by: 1e3, 1e4, 1e5, 1e6, 1e7. Then I also chnaged the benchmark object 'res'
	n <- 1e7
	set.seed(51)
	process <- data.frame(id=sample(100, n, rep=T), x=rnorm(n), y=runif(n), z=rpois(n, 1) pexp(2, rate=1/3) )

	all <- multicore:::detectCores(all.tests=TRUE)

	if(!require(rbenchmark)){
	install.packages("rbenchmark")
	} else{
	#-------------------------------------------------------------------------------
	# Generic panel border (can set any combination of left/right/top/bottom)
	#-------------------------------------------------------------------------------

	theme_border <- function(
	type = c("left", "right", "bottom", "top", "none"),
	colour = "black", size = 1, linetype = 1) {
	# use with e.g.: ggplot(...) + opts( panel.border=theme_border(type=c("bottom","left")) ) + ...
	type <- match.arg(type, several.ok=TRUE)
	structure(

	# Simulate randomly-distributed data
	nObs <- 5000
	myData <- data.frame(X = rnorm(nObs), Y = rnorm(nObs))
	nClusters <- 7 # Cluster it
	kMeans <- kmeans(myData, centers = nClusters)
	myData$Cluster <- as.factor(kMeans$cluster)

	# Plot points colored by cluster
	p1 <- ggplot(myData,
	doInstall <- TRUE
	toInstall <- c("ggplot2", "poLCA", "reshape2")
	if(doInstall){install.packages(toInstall, repos = "http://cran.us.r-project.org")}
	lapply(toInstall, library, character.only = TRUE)

	ANES <- read.csv("http://www.oberlin.edu/faculty/cdesante/assets/downloads/ANES.csv")
	ANES <- ANES[ANES$year == 2008, -c(1, 11, 17)] # Limit to just 2008 respondents,
	head(ANES) # remove some non-helpful variables
	# Adjust so that 1 is the minimum value for each variable:
	ANES <- data.frame(apply(ANES, 2, function(cc){ cc - min(cc, na.rm = T) + 1 }))
	# http://en.wikipedia.org/wiki/Logit#Definition

	# Logit: From probability to normal
	# Logistic (inverse logit): From normal to probability

	Logit <- function(p){log(p / (1 - p))}
	Logistic <- function(x){exp(x) / (1 + exp(x))}
	# See: http://stats.stackexchange.com/questions/12148/looking-for-a-name-for-a-mean-influencing-statistic
	# Somewhat personalized

	sdfBeta <- function(numer, denom = 1){
	if(identical(1, denom)){denom <- rep(1, length(numer))}
	Bj <- sum(numer, na.rm = T) / sum(denom, na.rm = T)
	Bjni <- (sum(numer, na.rm = T) - numer) / (sum(denom, na.rm = T) - denom)
	StdError <- sd(Bjni, na.rm = T)
	Value <- (Bj - Bjni) / StdError
	names(Value) <- names(numer)