Skip to content

Instantly share code, notes, and snippets.

@benmarwick
benmarwick / grainAnalysis.R
Created December 28, 2011 00:16 — forked from Sharpie/grainAnalysis.R
R code related to graphical analysis of a sieved soil sample.
# Load Data
grainData <- read.csv('grainSize.csv', check.names=F, na.strings='--' )
# Calculate Derived Sample Values
grainData[['Phi Diameter']] <- -log2( grainData[['Grain Diameter']] )
totalWeight <- sum( grainData[['Sample Weight']] )
grainData[["Percent Retained"]] <- grainData[['Sample Weight']] / totalWeight
grainData[["Cumulative Percent"]] <- cumsum( grainData[["Percent Retained"]] )
grainData[['Percent Finer']] <- 1 - grainData[['Cumulative Percent']]
@benmarwick
benmarwick / TAGS_Stats.R
Created January 22, 2012 04:27 — forked from psychemedia/TAGS_Stats.R
Tools for processing and visualising data from a TAGS archive
require(stringr)
require(RCurl)
require(ggplot2)
gsqAPI = function(key,query,gid=0){ return( read.csv( paste( sep="",'http://spreadsheets.google.com/tq?', 'tqx=out:csv','&tq=', curlEscape(query), '&key=', key, '&gid=', gid) ) ) }
trim <- function (x) sub('@','',x)
twCounts=function(df){
print("Counting @'d users")
to.count=data.frame(table(df$to))
# @author: Michael J Bommarito II
# @date: Feb 20, 2011
# @email: michael.bommarito@gmail.com
# @packages: gridExtra, ggplot2
library(gridExtra)
library(ggplot2)
setwd('/data/workspace/blog/cn220/')
ks.default <- function(rows) seq(2, max(3, rows %/% 4))
many_kmeans <- function(x, ks = ks.default(nrow(x)), ...) {
ldply(seq_along(ks), function(i) {
cl <- kmeans(x, centers = ks[i], ...)
data.frame(obs = seq_len(nrow(x)), i = i, k = ks[i], cluster = cl$cluster)
})
}
all_hclust <- function(x, ks = ks.default(nrow(x)), point.dist = "euclidean", cluster.dist = "ward") {
ks.default <- function(rows) seq(2, max(3, rows %/% 4))
many_kmeans <- function(x, ks = ks.default(nrow(x)), ...) {
ldply(seq_along(ks), function(i) {
cl <- kmeans(x, centers = ks[i], ...)
data.frame(obs = seq_len(nrow(x)), i = i, k = ks[i], cluster = cl$cluster)
})
}
all_hclust <- function(x, ks = ks.default(nrow(x)), point.dist = "euclidean", cluster.dist = "ward") {
library(sqldf)
sqldf("SELECT
day
, avg(temp) as avg_temp
FROM beaver2
GROUP BY
day;")
# day avg_temp
read.tps = function(data) {
# Reads the .tps file format produced by TPSDIG
# (http://life.bio.sunysb.edu/morph/ into a single data frame
# USAGE: R> read.tps("filename.tps")
a = readLines(data) # so we can do some searching and indexing
LM = grep("LM", a) # find the line numbers for LM
ID.ind = grep("ID", a) # find the line numbers for ID
# and the ID values, SCALE values, and image names
ID = gsub("(ID=)(.*)", "\\2", grep("ID", a, value=T))
SCALE = gsub("(SCALE=)(.*)", "\\2", grep("SCALE", a, value=T))
#' An R function for creating simple D3 javascript directed network graphs.
#'
#' d3SimpleNetwork creates simple D3 javascript network graphs.
#'
#' @param data a data frame object with three columns. The first two are the names of the linked units. The third records an edge value. (Currently the third column doesn't affect the graph.)
#' @param Source character string naming the network source variable in the data frame. If \code{Source = NULL} then the first column of the data frame is treated as the source.
#' @param Target character string naming the network target variable in the data frame. If \code{Target = NULL} then the second column of the data frame is treated as the target.
#' @param height numeric height for the network graph's frame area.
#' @param width numeric width for the network graph's frame area.
#' @param file a character string of the file name to save the resulting graph. If a file name is given a standalone webpage is created, i.e. with a header and footer. If \code{file = NULL} then
#get data from google sheet
# connect to google sheet
require(RCurl)
options(RCurlOptions = list(capath = system.file("CurlSSL", "cacert.pem", package = "RCurl"), ssl.verifypeer = FALSE))
#in google spreadsheet, go to file-> publish to web -> get link to publish to web -> get csv file
goog <- "https://docs.google.com/spreadsheet/pub?key=0As7CmPqGXTzldFRsVi1VZ2EyNXJ1ZEV5SG5GSExwRHc&single=true&gid=5&output=csv"
data <- read.csv(textConnection(getURL(goog)), stringsAsFactors = FALSE)
# extract just data for plotting: pH, SOM, CaCO3, MS-LF, MS-FD
plotting_data <- na.omit(data[,c('Sample.number',
# load required libraries
library(tm)
library(ggplot2)
library(lsa)
# 1. Prepare mock data
text <- c("transporting food by cars will cause global warming. so we should go local.",
"we should try to convince our parents to stop using cars because it will cause global warming.",
"some food, such as mongo, requires a warm weather to grow. so they have to be transported to canada.",
"a typical electronic circuit can be built with a battery, a bulb, and a switch.",