Skip to content

Instantly share code, notes, and snippets.

View CerebralMastication's full-sized avatar
☠️
brilliantly executing terrible plans

JD Long CerebralMastication

☠️
brilliantly executing terrible plans
View GitHub Profile
rm(list = ls())
nchooser <- function(n, r) {
if (n == r) {
#cat("called nchooser(n = r)\n")
return(1)
} else if (r == 0) {
#cat("called nchooser(r = 0)\n")
return(1)
} else
#cat("called nchooser(", n", " r ") \n")
sampleSize <- 1e6
bigDF <- data.frame(letter1=I(sample(letters, sampleSize, rep=T)),
letter2=I(sample(letters, sampleSize, rep=T)),
b=rnorm(sampleSize),
c=rnorm(sampleSize))
require(sqldf)
system.time(sqldf("select letter1, letter2, avg(b) as b, avg(c) as c from bigDF group by letter1, letter2"))
estimatePi <- function(numDraws){
r <- .5 #radius... in case the unit circle is too boring
x <- runif(numDraws, min=-r, max=r)
y <- runif(numDraws, min=-r, max=r)
inCircle <- ifelse( (x^2 + y^2)^.5 < r , 1, 0)
return(sum(inCircle) / length(inCircle) * 4)
}
#first let's set up a big table and a little table
bigTableRows <- 1e6
numbers <- 0:500
dim1 <- sample(numbers, bigTableRows, replace=T)
dim2 <- sample(numbers, bigTableRows, replace=T)
fact1 <- rnorm(bigTableRows)
bigTable <- data.frame(dim1, dim2, fact1, stringsAsFactors = F)
#!/bin/bash
#debian R upgrade
echo "deb http://streaming.stat.iastate.edu/CRAN/bin/linux/debian lenny-cran/" | sudo tee -a /etc/apt/sources.list
sudo apt-get update
sudo apt-get -t lenny-cran install --yes --force-yes r-base r-base-dev
#! /usr/bin/env Rscript
#start an instance
startup <- system("ec2-run-instances ami-b232d0db -k ec2ApiTools", intern=T)
Sys.sleep(45) #rather random but 45 seconds is typically long enough to boot
#query running instances
instances <- system("ec2-describe-instances", intern=T)
instancesParsed <- sapply(instances, strsplit, "\t")
# looking at some data with @neilkod
myData <- read.csv("http://www.neilkodner.com/10kreads.csv")
plot(density(myData$X1))
#holy shit that's a big spike at the low values. Let's look at the tail
plot(density(subset(myData, X1 > 3)$X1))
plot(density(subset(myData, X1 > 5)$X1))
# example of how to use a Guassian Copula to create random draws from
# normally distributed data
require("reshape")
require("plyr")
require("QRMlib")
require("Matrix")
#make this reproducable
set.seed(2)
# multivariate normal example using the MASS package
require(MASS)
#make this reproducible
set.seed(2)
#how many draws in our starting data set?
n <- 1e4
# multivariate normal example using Cholesky decomposition
require(Matrix)
#make this reproducible
set.seed(2)
#how many draws in our starting data set?
n <- 1e4