Ricardo Pietrobon rpietro

## test_sqlite_sqldf
#creating a dataset with 1.8 GIGs
setwd("~/Desktop")
bigdf <- data.frame(dim=sample(letters, replace=T, 4e7), fact1=rnorm(4e7), fact2=rnorm(4e7, 20, 50))
write.csv(bigdf, 'bigdf.csv', quote = F)

#opening the dataset and measuring performance time
setwd("~/Desktop")
library(sqldf)
f <- file("bigdf.csv")
system.time(bigdf <- sqldf("select * from f", dbname = tempfile(), file.format = list(header = T, row.names = F)))

## twang_examples.r
#this documented script is based on the article "Toolkit for Weighting and Analysis of Nonequivalent Groups: A tutorial for the twang package" http://goo.gl/2xYBX as well as the package documentation at http://goo.gl/BWaLH

library(twang)
library(lattice)
set.seed(1)
data(lalonde)
ps.lalonde <- ps(treat ~ age + educ + black + hispan + nodegree + married + re74 + re75, data = lalonde, n.trees=5000, #n.trees is the maximum number of iterations that gbm will run
                 interaction.depth=2, #The maximum depth of variable interactions. 1 implies an additive model, 2 implies a model with up to 2-way interactions, etc.
                 shrinkage=0.01, #ps() will issue a warning if the estimated optimal number of iterations is too close to the bound selected in this argument because it indicates that balance may improve if more complex models (i.e., those with more trees) are considered. The user should increase n.trees or decrease shrinkage if this warning appears.
                 perm.test.iters=0, #spec

## r_sqldf_tutorial.r
#Load sqldf package, which will load all others necessary
#By default, SQLite runs in background to do processing, could use others DB engines if you wanted

library("sqldf")

#Import employees data

employees  <- structure(list(id = 1:20, lastname = structure(c(5L, 14L, 13L, 15L, 6L, 16L, 9L, 1L, 3L, 12L, 10L, 8L, 12L, 3L, 11L, 13L, 10L, 7L, 2L, 4L), .Label = c("a", "b", "c", "f", "g", "h", "i", "j", "n", "o", "p", "r", "s", "t", "w", "z"), class = "factor"), firstname = structure(c(12L, 6L, 5L, 12L, 11L, 15L, 9L, 18L, 17L, 7L, 8L, 10L, 4L, 14L, 19L, 16L, 1L, 13L, 2L, 3L), .Label = c("chris", "dima", "drew", "eric", "hila", "jason", "jeremy", "joe", "jon", "jowanza", "lashanda", "matt", "michael", "michelle", "randy", "rudi", "solon", "stewart", "tim"), class = "factor"), gender = structure(c(2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("f", "m"), class = "factor")), .Names = c("id", "lastname", "firstname", "gender"), class = "data.frame", row.names = c(NA, -20

## dput_example.R
#assume you have a data set represented by an object called BOD (BOD is actually a dataset internal to R). then run dput with that data object
dput(BOD)
#the ouput of this function will be the representation of that data set: structure(list(Time = c(1, 2, 3, 4, 5, 7), demand = c(8.3, 10.3, 19, 16, 15.6, 19.8)), .Names = c("Time", "demand"), row.names = c(NA, -6L), class = "data.frame", reference = "A1.4, p. 270")
#when you want to request help, just start the reproducible example by throwing the output of dput into a data object:
BOD  <- structure(list(Time = c(1, 2, 3, 4, 5, 7), demand = c(8.3, 10.3, 19, 16, 15.6, 19.8)), .Names = c("Time", "demand"), row.names = c(NA, -6L), class = "data.frame", reference = "A1.4, p. 270")
#from here on then just write the code you wrote and the corresponding error message

## webScrape.r
rm(list = ls(all = TRUE)) #CLEAR WORKSPACE
library(quantmod)

#Scrape data from the website
library(XML)
rawPMI <- readHTMLTable('http://www.ism.ws/ISMReport/content.cfm?ItemNumber=10752')
rawPMI
PMI <- data.frame(rawPMI[[1]])
PMI
names(PMI)[1] <- 'Year'

## catR_example.r
library(catR)
Bank <- createItemBank(items = 500, model = "3PL", thMin = -4, thMax = 4, step = 0.05)
Start <- list(nrItems = 1, theta = 0, startSelect = "MFI")
Test <- list(method = "WL", itemSelect = "MFI")
Stop <- list(rule = "classification", thr = 2, alpha = 0.05)
Final <- list(method = "WL", alpha = 0.05)
res <- randomCAT(trueTheta = 1, itemBank = Bank, start = Start, test = Test, stop = Stop, final = Final)
res
plot(res, ci = TRUE, trueTh = TRUE, classThr = 2)

## README.md

      
              2 files
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                rpietro
                / README.md
            
            
              Created
              May 13, 2013 20:30
                — forked from jbroadway/Slimdown.md
            
          
    Slimdown

A very basic regex-based Markdown parser. Supports the
following elements (and can be extended via Slimdown::add_rule()):

Headers
Links
Bold
Emphasis
Deletions


## perc.r
```{r echo=FALSE, error=TRUE}
perc <- function(var){
    result <- 100*(table(var))/sum(table(var))
    return(result)
}

```

## bifactor.r
# this code comes straight from the three posts by Joel Caldwell's on his outstanding blog Engaging Market Research at http://joelcadwell.blogspot.com/ . Specifically, the posts are http://goo.gl/Rcsn4 , http://goo.gl/dDPnV and http://goo.gl/8C9Aj

library(psych)
library(lavaan)
library(mvtnorm)
library(qgraph)

# The goal is to show all the R code that you would need
# to reproduce everything that has been reported.
# We will use the mvtnorm package in order to randomly

## sublime_text_useful_shortcuts.mdown

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                rpietro
                / sublime_text_useful_shortcuts.mdown
            
            
              Last active
              December 19, 2015 18:09
                — forked from nuxlli/sublime_text_2_useful_shortcuts.md
            
          
    Sublime Text 2 - Useful Shortcuts

Tested in Mac OS X: super == command
Open/Goto


super+t: go to file
super+ctrl+p: go to project
super+r: go to methods
	#creating a dataset with 1.8 GIGs
	setwd("~/Desktop")
	bigdf <- data.frame(dim=sample(letters, replace=T, 4e7), fact1=rnorm(4e7), fact2=rnorm(4e7, 20, 50))
	write.csv(bigdf, 'bigdf.csv', quote = F)

	#opening the dataset and measuring performance time
	setwd("~/Desktop")
	library(sqldf)
	f <- file("bigdf.csv")
	system.time(bigdf <- sqldf("select * from f", dbname = tempfile(), file.format = list(header = T, row.names = F)))
	#this documented script is based on the article "Toolkit for Weighting and Analysis of Nonequivalent Groups: A tutorial for the twang package" http://goo.gl/2xYBX as well as the package documentation at http://goo.gl/BWaLH

	library(twang)
	library(lattice)
	set.seed(1)
	data(lalonde)
	ps.lalonde <- ps(treat ~ age + educ + black + hispan + nodegree + married + re74 + re75, data = lalonde, n.trees=5000, #n.trees is the maximum number of iterations that gbm will run
	interaction.depth=2, #The maximum depth of variable interactions. 1 implies an additive model, 2 implies a model with up to 2-way interactions, etc.
	shrinkage=0.01, #ps() will issue a warning if the estimated optimal number of iterations is too close to the bound selected in this argument because it indicates that balance may improve if more complex models (i.e., those with more trees) are considered. The user should increase n.trees or decrease shrinkage if this warning appears.
	perm.test.iters=0, #spec
	#Load sqldf package, which will load all others necessary
	#By default, SQLite runs in background to do processing, could use others DB engines if you wanted

	library("sqldf")

	#Import employees data

	employees <- structure(list(id = 1:20, lastname = structure(c(5L, 14L, 13L, 15L, 6L, 16L, 9L, 1L, 3L, 12L, 10L, 8L, 12L, 3L, 11L, 13L, 10L, 7L, 2L, 4L), .Label = c("a", "b", "c", "f", "g", "h", "i", "j", "n", "o", "p", "r", "s", "t", "w", "z"), class = "factor"), firstname = structure(c(12L, 6L, 5L, 12L, 11L, 15L, 9L, 18L, 17L, 7L, 8L, 10L, 4L, 14L, 19L, 16L, 1L, 13L, 2L, 3L), .Label = c("chris", "dima", "drew", "eric", "hila", "jason", "jeremy", "joe", "jon", "jowanza", "lashanda", "matt", "michael", "michelle", "randy", "rudi", "solon", "stewart", "tim"), class = "factor"), gender = structure(c(2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("f", "m"), class = "factor")), .Names = c("id", "lastname", "firstname", "gender"), class = "data.frame", row.names = c(NA, -20
	#assume you have a data set represented by an object called BOD (BOD is actually a dataset internal to R). then run dput with that data object
	dput(BOD)
	#the ouput of this function will be the representation of that data set: structure(list(Time = c(1, 2, 3, 4, 5, 7), demand = c(8.3, 10.3, 19, 16, 15.6, 19.8)), .Names = c("Time", "demand"), row.names = c(NA, -6L), class = "data.frame", reference = "A1.4, p. 270")
	#when you want to request help, just start the reproducible example by throwing the output of dput into a data object:
	BOD <- structure(list(Time = c(1, 2, 3, 4, 5, 7), demand = c(8.3, 10.3, 19, 16, 15.6, 19.8)), .Names = c("Time", "demand"), row.names = c(NA, -6L), class = "data.frame", reference = "A1.4, p. 270")
	#from here on then just write the code you wrote and the corresponding error message
	rm(list = ls(all = TRUE)) #CLEAR WORKSPACE
	library(quantmod)

	#Scrape data from the website
	library(XML)
	rawPMI <- readHTMLTable('http://www.ism.ws/ISMReport/content.cfm?ItemNumber=10752')
	rawPMI
	PMI <- data.frame(rawPMI[[1]])
	PMI
	names(PMI)[1] <- 'Year'
	library(catR)
	Bank <- createItemBank(items = 500, model = "3PL", thMin = -4, thMax = 4, step = 0.05)
	Start <- list(nrItems = 1, theta = 0, startSelect = "MFI")
	Test <- list(method = "WL", itemSelect = "MFI")
	Stop <- list(rule = "classification", thr = 2, alpha = 0.05)
	Final <- list(method = "WL", alpha = 0.05)
	res <- randomCAT(trueTheta = 1, itemBank = Bank, start = Start, test = Test, stop = Stop, final = Final)
	res
	plot(res, ci = TRUE, trueTh = TRUE, classThr = 2)
	```{r echo=FALSE, error=TRUE}
	perc <- function(var){
	result <- 100*(table(var))/sum(table(var))
	return(result)
	}

	```
	# this code comes straight from the three posts by Joel Caldwell's on his outstanding blog Engaging Market Research at http://joelcadwell.blogspot.com/ . Specifically, the posts are http://goo.gl/Rcsn4 , http://goo.gl/dDPnV and http://goo.gl/8C9Aj

	library(psych)
	library(lavaan)
	library(mvtnorm)
	library(qgraph)

	# The goal is to show all the R code that you would need
	# to reproduce everything that has been reported.
	# We will use the mvtnorm package in order to randomly