Sebastian Daza sdaza

## create_descriptive_tables.R
# author: sebastian daza
# web: sdaza.com


# libraries
library(data.table)
library(xtable)


# create descriptive tables

## instrumental-variable-simulation.R
# intro to instrumental variables
# author: sebastian daza


# load libraries
library(simsem)
library(data.table)
library(texreg)
library(ivpack)
library(lme4)

## sqlToCSV.java
public void sqlToCSV (ResultSet rs, String filename) {
    try {
        FileWriter fw = new FileWriter(filename + ".csv");
        int cols = rs.getMetaData().getColumnCount();
        for(int i = 1; i <= cols; i ++){
            fw.append(rs.getMetaData().getColumnLabel(i));
            if(i < cols) fw.append(',');
            else fw.append('\n');
        }
        while (rs.next()) {

## sampling_example.R
# stratified sampling example and weighted means

set.seed(1)
library(data.table)

dat =  data.table(
  ID = 1:100,
  A = sample(c("AA", "BB", "CC", "DD", "EE"), 100, replace = TRUE),
  B = rnorm(100), C = abs(round(rnorm(100), digits=1)),
  D = sample(c("CA", "NY", "TX"), 100, replace = TRUE),

## testing.py
import re
from pyspark.sql import Row
from pyspark.ml.classification import LogisticRegression
from pyspark.ml.feature import HashingTF, Tokenizer
from pyspark.sql.types import DoubleType
from operator import add
from lxml import etree
# from pyspark.sql import SQLContext
# sqlContext = SQLContext(sc)

## miceMultilevelExample.R
# load data
load("ex.Rdata")

###################################
# example only using mice package
###################################

library(mice)

imp <- mice(ex, maxit = 0)

## samplesize.R

# SAMPLING ERROR

serr <- function(n, deff=1, rr=1, N=NULL, cl=.95, p=0.5, relative=FALSE)  {

# validation

if (sum(n==0)>=1) {
  stop("n vector contains 0 values")
}

## ape.R
mycoefplot <- function(coef) {

# reverse coefficients
coef <- apply(coef, 2, rev)
nvar <- length(rownames(coef))

# dotplot
dotplot(1:nvar~coef[,1],
	xlim=c(min(coef[,1]-2*coef[,2])-.02, max(coef[,1]+2*coef[,2])+.02),
	xlab='Average predictive comparison', ylab=" ",

## APCex.R
# AVERAGE PREDICTVE COMPARISON

# loading libraries
library(arm)

# data
dat <- read.csv("http://dl.getdropbox.com/u/18116710/example.csv")

# id: individual identifier
# wave: 1 to 7

## lookvar.R
# dat has to be a data.frame or data.table
# varnames should bea vector with the variable names you are looking for, e.g., c("hc", "hv"), or regular expressions
# the result would be a vector with variables names

lookvar  <- function(dat, varnames) {
  n  <- names(dat)

  nn  <- list()
    for (i in 1:length(varnames)) {
      nn[[i]]  <- grep(varnames[i],n)
	# author: sebastian daza
	# web: sdaza.com


	# libraries
	library(data.table)
	library(xtable)


	# create descriptive tables
	# intro to instrumental variables
	# author: sebastian daza


	# load libraries
	library(simsem)
	library(data.table)
	library(texreg)
	library(ivpack)
	library(lme4)
	public void sqlToCSV (ResultSet rs, String filename) {
	try {
	FileWriter fw = new FileWriter(filename + ".csv");
	int cols = rs.getMetaData().getColumnCount();
	for(int i = 1; i <= cols; i ++){
	fw.append(rs.getMetaData().getColumnLabel(i));
	if(i < cols) fw.append(',');
	else fw.append('\n');
	}
	while (rs.next()) {
	# stratified sampling example and weighted means

	set.seed(1)
	library(data.table)

	dat = data.table(
	ID = 1:100,
	A = sample(c("AA", "BB", "CC", "DD", "EE"), 100, replace = TRUE),
	B = rnorm(100), C = abs(round(rnorm(100), digits=1)),
	D = sample(c("CA", "NY", "TX"), 100, replace = TRUE),
	import re
	from pyspark.sql import Row
	from pyspark.ml.classification import LogisticRegression
	from pyspark.ml.feature import HashingTF, Tokenizer
	from pyspark.sql.types import DoubleType
	from operator import add
	from lxml import etree
	# from pyspark.sql import SQLContext
	# sqlContext = SQLContext(sc)
	# load data
	load("ex.Rdata")

	###################################
	# example only using mice package
	###################################

	library(mice)

	imp <- mice(ex, maxit = 0)

	# SAMPLING ERROR

	serr <- function(n, deff=1, rr=1, N=NULL, cl=.95, p=0.5, relative=FALSE) {

	# validation

	if (sum(n==0)>=1) {
	stop("n vector contains 0 values")
	}
	mycoefplot <- function(coef) {

	# reverse coefficients
	coef <- apply(coef, 2, rev)
	nvar <- length(rownames(coef))

	# dotplot
	dotplot(1:nvar~coef[,1],
	xlim=c(min(coef[,1]-2coef[,2])-.02, max(coef[,1]+2coef[,2])+.02),
	xlab='Average predictive comparison', ylab=" ",
	# AVERAGE PREDICTVE COMPARISON

	# loading libraries
	library(arm)

	# data
	dat <- read.csv("http://dl.getdropbox.com/u/18116710/example.csv")

	# id: individual identifier
	# wave: 1 to 7
	# dat has to be a data.frame or data.table
	# varnames should bea vector with the variable names you are looking for, e.g., c("hc", "hv"), or regular expressions
	# the result would be a vector with variables names

	lookvar <- function(dat, varnames) {
	n <- names(dat)

	nn <- list()
	for (i in 1:length(varnames)) {
	nn[[i]] <- grep(varnames[i],n)