mtramba

## Lalonde3Ways
library(Matching)
data("lalonde")
attach(lalonde)
set.seed(1)

#seperate data depending on value for nodegr
nodegree <- lalonde[which (nodegr == 1),]
degree <- lalonde[which(nodegr == 0),]

#Random Forests

## gist:a779718790ebacf4593b7588df1bb3c0
city.names <- c("A", "B", "C", "D", "E", "F", "G", "H", "I", "J")
observed.turnout = c(17, 30, 13, 55, 26, 29, 48, 43, 17, 30)

observed.diffmeans <- mean(observed.turnout[c(2,4,6,8,10)]) -
  mean(observed.turnout[c(1,3,5,7,9)])

print(observed.diffmeans)


foo <- data.frame(city.names, observed.turnout)

## Causal Inference Assignment
#1
nswre74_controls <- read.table("nswre74_control.txt")
nswre74_treated <- read.table("nswre74_treated.txt")
names(nswre74_controls) <-  c("treat", "age", "education", "black", "hispanic",
                             "married", "nodegree", "re74", "re75", "re78")
names(nswre74_treated) <-  c("treat", "age", "education", "black", "hispanic",
                             "married", "nodegree", "re74", "re75", "re78")
nswre74 <- rbind(nswre74_controls, nswre74_treated)
#Difference in Means Treatment Effect
treat.effect <- mean(nswre74_treated$re78) - mean(nswre74_controls$re78)

## CS112 Final
library(foreign)
data2 <- read.dta("peace.dta")

#removing observations with missing data
data2 <- data2[-c(47),]
data2 <- data2[-c(19),]

#logistic regression
rg <- glm(pbs2s3 ~ wartype + logcost + wardur + factnum +factnum2 +trnsfcap+ develop+ exp+ decade+ treaty +untype4, family = binomial(link = "logit"), data = data2)
rg$coefficients

## CS146 3.1 preclass
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
%matplotlib inline

'''
Function definitions for the normal-inverse-gamma distribution. The parameters
of the distribution, namely mu, lambda / nu, alpha, beta, are as defined here:

  https://en.wikipedia.org/wiki/Normal-inverse-gamma_distribution

## CS146 3.2 preclass
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
%matplotlib inline

#Given the data, the binomial likelihood function would be appropriate for this data, as the support matches the data (>0)
#the unobserved parameter is the percentage of treated patients who improve with the vaccination

#the beta distribution would be appropriate because we really don't know what percentage of the population improves
#therefore, the prior hyperparameters of a = 1 and b = 1

## CS146 5.1 preclass
##Task 1

stan_results_c = stan_model.sampling(data=eczema_data['control'])
print(stan_results_c.stansummary(pars=['p'], probs=[0.025, 0.5, 0.975]))
posterior_samples_c = stan_results_c.extract()
print(
    "Posterior 95% confidence interval for p:",
    np.percentile(posterior_samples_c['p'], [2.5, 97.5]))

plt.hist(posterior_samples['p'], bins=50, density=True)

## CS146 5.2 preclass
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline

social_data = pd.read_csv('socialmobility.csv')
print (social_data)

#establish alphas_0 and prior

## CS146 6.2 preclass
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy import stats

plt.figure(1, figsize=(12, 10))
n=1000
p = .1


## CS146 7.1
from scipy import stats
import numpy as np
import matplotlib.pyplot as plt
import pystan


electoral_votes = {
    'Alabama': 9,
    'Alaska': 3,
    'Arizona': 11,
	library(Matching)
	data("lalonde")
	attach(lalonde)
	set.seed(1)

	#seperate data depending on value for nodegr
	nodegree <- lalonde[which (nodegr == 1),]
	degree <- lalonde[which(nodegr == 0),]

	#Random Forests
	city.names <- c("A", "B", "C", "D", "E", "F", "G", "H", "I", "J")
	observed.turnout = c(17, 30, 13, 55, 26, 29, 48, 43, 17, 30)

	observed.diffmeans <- mean(observed.turnout[c(2,4,6,8,10)]) -
	mean(observed.turnout[c(1,3,5,7,9)])

	print(observed.diffmeans)


	foo <- data.frame(city.names, observed.turnout)
	#1
	nswre74_controls <- read.table("nswre74_control.txt")
	nswre74_treated <- read.table("nswre74_treated.txt")
	names(nswre74_controls) <- c("treat", "age", "education", "black", "hispanic",
	"married", "nodegree", "re74", "re75", "re78")
	names(nswre74_treated) <- c("treat", "age", "education", "black", "hispanic",
	"married", "nodegree", "re74", "re75", "re78")
	nswre74 <- rbind(nswre74_controls, nswre74_treated)
	#Difference in Means Treatment Effect
	treat.effect <- mean(nswre74_treated$re78) - mean(nswre74_controls$re78)
	library(foreign)
	data2 <- read.dta("peace.dta")

	#removing observations with missing data
	data2 <- data2[-c(47),]
	data2 <- data2[-c(19),]

	#logistic regression
	rg <- glm(pbs2s3 ~ wartype + logcost + wardur + factnum +factnum2 +trnsfcap+ develop+ exp+ decade+ treaty +untype4, family = binomial(link = "logit"), data = data2)
	rg$coefficients
	import numpy as np
	import matplotlib.pyplot as plt
	from scipy import stats
	%matplotlib inline

	'''
	Function definitions for the normal-inverse-gamma distribution. The parameters
	of the distribution, namely mu, lambda / nu, alpha, beta, are as defined here:

	https://en.wikipedia.org/wiki/Normal-inverse-gamma_distribution
	##Task 1

	stan_results_c = stan_model.sampling(data=eczema_data['control'])
	print(stan_results_c.stansummary(pars=['p'], probs=[0.025, 0.5, 0.975]))
	posterior_samples_c = stan_results_c.extract()
	print(
	"Posterior 95% confidence interval for p:",
	np.percentile(posterior_samples_c['p'], [2.5, 97.5]))

	plt.hist(posterior_samples['p'], bins=50, density=True)