Skip to content

Instantly share code, notes, and snippets.

> library(MASS) # load the MASS package
> height.response = na.omit(survey$Height)
> n = length(height.response)
> s = sd(height.response) # sample standard deviation
> SE = s/sqrt(n); SE # standard error estimate
[1] 0.6811677
>
> print(paste("Standard Error:", SE))
[1] "Standard Error: 0.68116773214787"
>
> system.time({ caret.model <- train(carat ~ depth + table + price + x + y + z, data=diamonds, distribution="gaussian", method="gbm", trControl = trainControl(method="none", number=1), tuneLength=1, verbose=FALSE)})
user system elapsed
1.108 0.060 1.188
> system.time({ model <- gbm(carat ~ depth + table + price + x + y + z, data=diamonds, distribution="gaussian")})
user system elapsed
0.818 0.026 0.856
> install.packages("dplyr")
also installing the dependency ‘DBI’
trying URL 'https://cran.rstudio.com/bin/macosx/mavericks/contrib/3.3/DBI_0.5-1.tgz'
Content type 'application/x-gzip' length 353669 bytes (345 KB)
==================================================
downloaded 345 KB
trying URL 'https://cran.rstudio.com/bin/macosx/mavericks/contrib/3.3/dplyr_0.5.0.tgz'
Content type 'application/x-gzip' length 4841443 bytes (4.6 MB)
@earino
earino / foo
Created October 21, 2016 14:38
> class(hash_table)
[1] "hash"
attr(,"package")
[1] "hash"
> str(hash_table)
Formal class 'hash' [package "hash"] with 1 slot
..@ .xData:<environment: 0x24f72ec8>
> object_size(hash_table)
type: 17
Error: Unimplemented type
@earino
earino / evil_library.R
Created August 23, 2016 18:59
Replace the library() function so it logs
if (! "old_library" %in% ls()) {
old_library <- library
}
logging_library <- function(...) {
cat(as.character(substitute(...)),
sep="\n",
file="library_log.txt",
append=TRUE)
@earino
earino / foo.sql
Created September 17, 2015 19:55
merge example
MERGE INTO Inventory AS I
USING Changes AS C
ON I.Part = C.Part
WHEN MATCHED AND
I.Action = 'Mod'
THEN UPDATE
SET Qty = I.Qty + C.Qty
WHEN NOT MATCHED AND
I.Action = 'New'
THEN INSERT (Part, Qty)
@earino
earino / plot.R
Created July 6, 2015 16:53
visualization of cs100.1x participation
library(ggplot2)
library(gridExtra)
cs_100.1x <- data.frame(state=c("Enrolled",
"Active",
"Completed a Quiz",
"Installed the VM",
"Completed Lab 1",
"Completed Lab 2",
"Completed Lab 3",
@earino
earino / animate.R
Created June 21, 2015 01:23
Animate Santa Monica City Data
library(dplyr)
library(magrittr)
library(ggmap)
library(readr)
library(animation)
library(lubridate)
parking <- read_csv("csvs/parking.csv")
parking %<>% mutate(wday=wday(Date.Time),
@earino
earino / plot.R
Last active August 29, 2015 14:19
plot.R
p <- ggplot(data.frame(x=c(2,4,8,16), y=c(1, 1.84, 3.7, 7.22)), aes(x=x, y=y)) +
ggtitle("Scaling of Spark Decision Trees") +
xlab("Machines") +
ylab("Speedup") +
geom_line(aes(color="actual")) +
geom_point() +
geom_text(aes(label=y), vjust=.5, hjust=-.15) +
geom_line(aes(y=2*y, color="implied")) +
@earino
earino / foo.R
Created April 15, 2015 20:21
expanded RF resamples
library(randomForest)
Set.seed(12345)
genr_data <- function(n,p) {
X <- matrix(rnorm(n*p),n,p)
y <- as.factor(apply(X,1, function(x)
ifelse(sum(x^2)>qchisq(0.5,p),"+","-")))
## Hastie etal 10.2