Skip to content

Instantly share code, notes, and snippets.

View dmarcelinobr's full-sized avatar
💭
I may be slow to respond

Daniel Marcelino dmarcelinobr

💭
I may be slow to respond
View GitHub Profile
N < - 8619170 # population size of voters
ss <- c(1000, 1200, 3000) # samples size
p <- .27 # .27 Serra .25 Haddad .19 Russomano true population proportion
nsim <- 100 # number of simulations
pop.prob <- list()
for (i in 1:length(ss)) {
n <- ss[i]
x <- rhyper(nsim, N * p, N * (1 - p), n)
pop.prob[[i]] <- x / n
AC <- lm(PVOTOS~PGASTOS+I(PGASTOSc^2),
subset(dados, p==5 & t==2006 & j==”AC”),
na.action=na.omit)
ac <- as.data.frame(summary(AC)$coef)
ac <- ac[-c(1,3),]
@dmarcelinobr
dmarcelinobr / plots.R
Last active December 15, 2015 00:39
Example on how to manipulate axis values in R
# Fake data
y<- sample(10, 100, rep=T)
x <- rnorm(100)
# plots
par(mfrow=c(2,3)) # open an object
plot(x,y) # First plot
title("Default plot")
plot(x,y, axes = FALSE) # Second plot
#to make the final table, I changed manually the n size every trial by: 1e3, 1e4, 1e5, 1e6, 1e7. Then I also chnaged the benchmark object 'res'
n <- 1e7
set.seed(51)
process <- data.frame(id=sample(100, n, rep=T), x=rnorm(n), y=runif(n), z=rpois(n, 1) pexp(2, rate=1/3) )
all <- multicore:::detectCores(all.tests=TRUE)
if(!require(rbenchmark)){
install.packages("rbenchmark")
} else{
#-------------------------------------------------------------------------------
# Generic panel border (can set any combination of left/right/top/bottom)
#-------------------------------------------------------------------------------
theme_border <- function(
type = c("left", "right", "bottom", "top", "none"),
colour = "black", size = 1, linetype = 1) {
# use with e.g.: ggplot(...) + opts( panel.border=theme_border(type=c("bottom","left")) ) + ...
type <- match.arg(type, several.ok=TRUE)
structure(
# Simulate randomly-distributed data
nObs <- 5000
myData <- data.frame(X = rnorm(nObs), Y = rnorm(nObs))
nClusters <- 7 # Cluster it
kMeans <- kmeans(myData, centers = nClusters)
myData$Cluster <- as.factor(kMeans$cluster)
# Plot points colored by cluster
p1 <- ggplot(myData,
doInstall <- TRUE
toInstall <- c("ggplot2", "poLCA", "reshape2")
if(doInstall){install.packages(toInstall, repos = "http://cran.us.r-project.org")}
lapply(toInstall, library, character.only = TRUE)
ANES <- read.csv("http://www.oberlin.edu/faculty/cdesante/assets/downloads/ANES.csv")
ANES <- ANES[ANES$year == 2008, -c(1, 11, 17)] # Limit to just 2008 respondents,
head(ANES) # remove some non-helpful variables
# Adjust so that 1 is the minimum value for each variable:
ANES <- data.frame(apply(ANES, 2, function(cc){ cc - min(cc, na.rm = T) + 1 }))
# http://en.wikipedia.org/wiki/Logit#Definition
# Logit: From probability to normal
# Logistic (inverse logit): From normal to probability
Logit <- function(p){log(p / (1 - p))}
Logistic <- function(x){exp(x) / (1 + exp(x))}
# See: http://stats.stackexchange.com/questions/12148/looking-for-a-name-for-a-mean-influencing-statistic
# Somewhat personalized
sdfBeta <- function(numer, denom = 1){
if(identical(1, denom)){denom <- rep(1, length(numer))}
Bj <- sum(numer, na.rm = T) / sum(denom, na.rm = T)
Bjni <- (sum(numer, na.rm = T) - numer) / (sum(denom, na.rm = T) - denom)
StdError <- sd(Bjni, na.rm = T)
Value <- (Bj - Bjni) / StdError
names(Value) <- names(numer)
@dmarcelinobr
dmarcelinobr / SpaceTrim.R
Created May 30, 2013 15:16
remove leading and trailing spaces from a string
spaceTrim <- function(x){ gsub("(^ +)|( +$)", "", x) }