Ben Marwick benmarwick

## ipak.R
# ipak function: install and load multiple R packages.
# check to see if packages are installed. Install them if they are not, then load them into the R session.

ipak <- function(pkg){
    new.pkg <- pkg[!(pkg %in% installed.packages()[, "Package"])]
    if (length(new.pkg))
        install.packages(new.pkg, dependencies = TRUE)
    sapply(pkg, require, character.only = TRUE)
}

## 0_reuse_code.js
// Use Gists to store code you would like to remember later on
console.log(window); // log the "window" object to the console

## explore-correlations.r
## Correlation matrix with p-values. See http://goo.gl/nahmV for documentation of this function
cor.prob <- function (X, dfr = nrow(X) - 2) {
  R <- cor(X, use="pairwise.complete.obs")
  above <- row(R) < col(R)
  r2 <- R[above]^2
  Fstat <- r2 * dfr/(1 - r2)
  R[above] <- 1 - pf(Fstat, 1, dfr)
  R[row(R) == col(R)] <- NA
  R
}

## grainAnalysis.R
# Load Data
grainData <- read.csv('grainSize.csv', check.names=F, na.strings='--' )

# Calculate Derived Sample Values
grainData[['Phi Diameter']] <- -log2( grainData[['Grain Diameter']] )
totalWeight <- sum( grainData[['Sample Weight']] )
grainData[["Percent Retained"]] <- grainData[['Sample Weight']] / totalWeight
grainData[["Cumulative Percent"]] <- cumsum( grainData[["Percent Retained"]] )
grainData[['Percent Finer']] <- 1 - grainData[['Cumulative Percent']]

## TAGS_Stats.R
require(stringr)
require(RCurl)
require(ggplot2)
gsqAPI = function(key,query,gid=0){ return( read.csv( paste( sep="",'http://spreadsheets.google.com/tq?', 'tqx=out:csv','&tq=', curlEscape(query), '&key=', key, '&gid=', gid) ) ) }

trim <- function (x) sub('@','',x)

twCounts=function(df){
	print("Counting @'d users")
    to.count=data.frame(table(df$to))

## analyzeCN220.r
# @author: Michael J Bommarito II
# @date: Feb 20, 2011
# @email: michael.bommarito@gmail.com
# @packages: gridExtra, ggplot2

library(gridExtra)
library(ggplot2)

setwd('/data/workspace/blog/cn220/')

## clustergram-had.r
ks.default <- function(rows) seq(2, max(3, rows %/% 4))

many_kmeans <- function(x, ks = ks.default(nrow(x)), ...) {
  ldply(seq_along(ks), function(i) {
    cl <- kmeans(x, centers = ks[i], ...)
    data.frame(obs = seq_len(nrow(x)), i = i, k = ks[i], cluster = cl$cluster)
  })
}

all_hclust <- function(x, ks = ks.default(nrow(x)), point.dist = "euclidean", cluster.dist = "ward") {

## clustergram-had.r
ks.default <- function(rows) seq(2, max(3, rows %/% 4))

many_kmeans <- function(x, ks = ks.default(nrow(x)), ...) {
  ldply(seq_along(ks), function(i) {
    cl <- kmeans(x, centers = ks[i], ...)
    data.frame(obs = seq_len(nrow(x)), i = i, k = ks[i], cluster = cl$cluster)
  })
}

all_hclust <- function(x, ks = ks.default(nrow(x)), point.dist = "euclidean", cluster.dist = "ward") {

## sqldf_examples.R
library(sqldf)

sqldf("SELECT
        day
        , avg(temp) as avg_temp
      FROM beaver2
      GROUP BY
        day;")

#   day avg_temp

## readtps.R
read.tps = function(data) {
  # Reads the .tps file format produced by TPSDIG
  # (http://life.bio.sunysb.edu/morph/ into a single data frame
  # USAGE: R> read.tps("filename.tps")
  a = readLines(data) # so we can do some searching and indexing
  LM = grep("LM", a) # find the line numbers for LM
  ID.ind = grep("ID", a) # find the line numbers for ID
  # and the ID values, SCALE values, and image names
  ID = gsub("(ID=)(.*)", "\\2", grep("ID", a, value=T))
  SCALE = gsub("(SCALE=)(.*)", "\\2", grep("SCALE", a, value=T))
	# ipak function: install and load multiple R packages.
	# check to see if packages are installed. Install them if they are not, then load them into the R session.

	ipak <- function(pkg){
	new.pkg <- pkg[!(pkg %in% installed.packages()[, "Package"])]
	if (length(new.pkg))
	install.packages(new.pkg, dependencies = TRUE)
	sapply(pkg, require, character.only = TRUE)
	}
	// Use Gists to store code you would like to remember later on
	console.log(window); // log the "window" object to the console
	## Correlation matrix with p-values. See http://goo.gl/nahmV for documentation of this function
	cor.prob <- function (X, dfr = nrow(X) - 2) {
	R <- cor(X, use="pairwise.complete.obs")
	above <- row(R) < col(R)
	r2 <- R[above]^2
	Fstat <- r2 * dfr/(1 - r2)
	R[above] <- 1 - pf(Fstat, 1, dfr)
	R[row(R) == col(R)] <- NA
	R
	}
	# Load Data
	grainData <- read.csv('grainSize.csv', check.names=F, na.strings='--' )

	# Calculate Derived Sample Values
	grainData[['Phi Diameter']] <- -log2( grainData[['Grain Diameter']] )
	totalWeight <- sum( grainData[['Sample Weight']] )
	grainData[["Percent Retained"]] <- grainData[['Sample Weight']] / totalWeight
	grainData[["Cumulative Percent"]] <- cumsum( grainData[["Percent Retained"]] )
	grainData[['Percent Finer']] <- 1 - grainData[['Cumulative Percent']]
	require(stringr)
	require(RCurl)
	require(ggplot2)
	gsqAPI = function(key,query,gid=0){ return( read.csv( paste( sep="",'http://spreadsheets.google.com/tq?', 'tqx=out:csv','&tq=', curlEscape(query), '&key=', key, '&gid=', gid) ) ) }

	trim <- function (x) sub('@','',x)

	twCounts=function(df){
	print("Counting @'d users")
	to.count=data.frame(table(df$to))
	# @author: Michael J Bommarito II
	# @date: Feb 20, 2011
	# @email: michael.bommarito@gmail.com
	# @packages: gridExtra, ggplot2

	library(gridExtra)
	library(ggplot2)

	setwd('/data/workspace/blog/cn220/')
	ks.default <- function(rows) seq(2, max(3, rows %/% 4))

	many_kmeans <- function(x, ks = ks.default(nrow(x)), ...) {
	ldply(seq_along(ks), function(i) {
	cl <- kmeans(x, centers = ks[i], ...)
	data.frame(obs = seq_len(nrow(x)), i = i, k = ks[i], cluster = cl$cluster)
	})
	}

	all_hclust <- function(x, ks = ks.default(nrow(x)), point.dist = "euclidean", cluster.dist = "ward") {
	library(sqldf)

	sqldf("SELECT
	day
	, avg(temp) as avg_temp
	FROM beaver2
	GROUP BY
	day;")

	# day avg_temp
	read.tps = function(data) {
	# Reads the .tps file format produced by TPSDIG
	# (http://life.bio.sunysb.edu/morph/ into a single data frame
	# USAGE: R> read.tps("filename.tps")
	a = readLines(data) # so we can do some searching and indexing
	LM = grep("LM", a) # find the line numbers for LM
	ID.ind = grep("ID", a) # find the line numbers for ID
	# and the ID values, SCALE values, and image names
	ID = gsub("(ID=)(.*)", "\\2", grep("ID", a, value=T))
	SCALE = gsub("(SCALE=)(.*)", "\\2", grep("SCALE", a, value=T))