Skip to content

Instantly share code, notes, and snippets.

View sdaza's full-sized avatar

Sebastian Daza sdaza

View GitHub Profile
# author: sebastian daza
# web: sdaza.com
# libraries
library(data.table)
library(xtable)
# create descriptive tables
@sdaza
sdaza / instrumental-variable-simulation.R
Last active February 16, 2021 09:54
Intro to instrumental variables
# intro to instrumental variables
# author: sebastian daza
# load libraries
library(simsem)
library(data.table)
library(texreg)
library(ivpack)
library(lme4)
@sdaza
sdaza / sqlToCSV.java
Last active November 1, 2020 15:28
From SQL to CSV
public void sqlToCSV (ResultSet rs, String filename) {
try {
FileWriter fw = new FileWriter(filename + ".csv");
int cols = rs.getMetaData().getColumnCount();
for(int i = 1; i <= cols; i ++){
fw.append(rs.getMetaData().getColumnLabel(i));
if(i < cols) fw.append(',');
else fw.append('\n');
}
while (rs.next()) {
# stratified sampling example and weighted means
set.seed(1)
library(data.table)
dat = data.table(
ID = 1:100,
A = sample(c("AA", "BB", "CC", "DD", "EE"), 100, replace = TRUE),
B = rnorm(100), C = abs(round(rnorm(100), digits=1)),
D = sample(c("CA", "NY", "TX"), 100, replace = TRUE),
@sdaza
sdaza / testing.py
Last active October 28, 2017 02:19
Spark
import re
from pyspark.sql import Row
from pyspark.ml.classification import LogisticRegression
from pyspark.ml.feature import HashingTF, Tokenizer
from pyspark.sql.types import DoubleType
from operator import add
from lxml import etree
# from pyspark.sql import SQLContext
# sqlContext = SQLContext(sc)
@sdaza
sdaza / miceMultilevelExample.R
Last active May 22, 2017 13:55
Multilevel imputation using MICE
# load data
load("ex.Rdata")
###################################
# example only using mice package
###################################
library(mice)
imp <- mice(ex, maxit = 0)
@sdaza
sdaza / samplesize.R
Last active December 30, 2015 22:49
Function for sample size and error
# SAMPLING ERROR
serr <- function(n, deff=1, rr=1, N=NULL, cl=.95, p=0.5, relative=FALSE) {
# validation
if (sum(n==0)>=1) {
stop("n vector contains 0 values")
}
@sdaza
sdaza / ape.R
Last active December 30, 2015 11:59
ape
mycoefplot <- function(coef) {
# reverse coefficients
coef <- apply(coef, 2, rev)
nvar <- length(rownames(coef))
# dotplot
dotplot(1:nvar~coef[,1],
xlim=c(min(coef[,1]-2*coef[,2])-.02, max(coef[,1]+2*coef[,2])+.02),
xlab='Average predictive comparison', ylab=" ",
@sdaza
sdaza / APCex.R
Last active December 29, 2015 08:19
Average predictive comparisons
# AVERAGE PREDICTVE COMPARISON
# loading libraries
library(arm)
# data
dat <- read.csv("http://dl.getdropbox.com/u/18116710/example.csv")
# id: individual identifier
# wave: 1 to 7
@sdaza
sdaza / lookvar.R
Last active December 11, 2015 21:18
lookvar
# dat has to be a data.frame or data.table
# varnames should bea vector with the variable names you are looking for, e.g., c("hc", "hv"), or regular expressions
# the result would be a vector with variables names
lookvar <- function(dat, varnames) {
n <- names(dat)
nn <- list()
for (i in 1:length(varnames)) {
nn[[i]] <- grep(varnames[i],n)