Note: This post is a summary of information paraphrased from an excellent blog post by Christian Sepulveda.
Create the app and download the necessary dependencies.
# An addition | |
5 + 5 | |
# A subtraction | |
5 - 5 | |
# A multiplication | |
3 * 5 | |
# A division |
storage.vector <- NA | |
# Function that assigns treatment/control depending on | |
# propensity scores (assignment probabilities) | |
experiment <- function(vector.of.probabilities = NULL) { | |
k = 0 | |
for (i in 1:length(vector.of.probabilities)) { | |
if( | |
sample(x = c(1,0), size = 1, prob = c(vector.of.probabilities[i], | |
1 - vector.of.probabilities[i])) == 1) { |
################ PRELIMINARIES | |
library(MASS) | |
data(Pima.tr) | |
library(tree) | |
library(randomForest) | |
## STEP 1: Logistic regression ## | |
logistic_reg <- glm(type ~ ., data = Pima.tr, family = binomial) # basic model | |
predict_logistic.tr <- predict(logistic_reg, type = "response") # predicted probabilities (TRAINING SET) |
# EXERCISE TO BUILD INTUITION FOR CORRELATED VS. UNCORRELATED DATA | |
# PLEASE FOCUS ON UNDERSTANDING THE BELOW | |
### DO NOT JUST EXECUTE ALL THE CODE IN ONE BATCH--RUN IT LINE BY LINE... | |
### Simulation of analysis on correlated data | |
set.seed(1314) | |
nsims <- 10000 |
library(boot) | |
#estimate the mean via bootstrapping | |
boot.mean <- function(data,index) return(mean(data[index])) | |
#calculate the CI via t-distribution | |
t.dist.ci <- function(samp) { | |
df <- length(samp) - 1 | |
factors <- qt(c(0.025, 0.975), df = df) | |
samp.mean <- mean(samp) |
library(haven) | |
library(arm) | |
df <- read_dta("C:/Users/Vinic/Downloads/turnout.dta") | |
View(df) | |
df[1,1] | |
df[1,] | |
lm2 <- glm(turnout ~ ., data = df, family = binomial) | |
summary(lm2) |
set.seed(20181001) | |
### LOOCV | |
# Load packages and data | |
library(Matching) | |
library(boot) | |
data(lalonde) | |
# Train your model on ALL the data -- Use glm instead of lm |
library(haven) | |
df <- read_dta("C:/Users/Vinic/Downloads/turnout.dta") | |
View(df) | |
df[1,1] | |
df[1,] | |
lm2 <- glm(turnout ~ ., data = df, family = binomial) | |
summary(lm2) | |
Create the app and download the necessary dependencies.