Skip to content

Instantly share code, notes, and snippets.

# Better alternatives to 3-D
# Here are some examples of what we don't want to do:
# http://stackoverflow.com/questions/12765497/create-a-ribbon-plot
doInstall <- TRUE # Change to FALSE if you don't want packages installed.
toInstall <- c("ggplot2")
if(doInstall){install.packages(toInstall, repos = "http://cran.r-project.org")}
lapply(toInstall, library, character.only = TRUE)
# Air passenger data. ts converted to long matrix:
# load required libraries
library(tm)
library(ggplot2)
library(lsa)
# 1. Prepare mock data
text <- c("transporting food by cars will cause global warming. so we should go local.",
"we should try to convince our parents to stop using cars because it will cause global warming.",
"some food, such as mongo, requires a warm weather to grow. so they have to be transported to canada.",
"a typical electronic circuit can be built with a battery, a bulb, and a switch.",
#get data from google sheet
# connect to google sheet
require(RCurl)
options(RCurlOptions = list(capath = system.file("CurlSSL", "cacert.pem", package = "RCurl"), ssl.verifypeer = FALSE))
#in google spreadsheet, go to file-> publish to web -> get link to publish to web -> get csv file
goog <- "https://docs.google.com/spreadsheet/pub?key=0As7CmPqGXTzldFRsVi1VZ2EyNXJ1ZEV5SG5GSExwRHc&single=true&gid=5&output=csv"
data <- read.csv(textConnection(getURL(goog)), stringsAsFactors = FALSE)
# extract just data for plotting: pH, SOM, CaCO3, MS-LF, MS-FD
plotting_data <- na.omit(data[,c('Sample.number',
#' An R function for creating simple D3 javascript directed network graphs.
#'
#' d3SimpleNetwork creates simple D3 javascript network graphs.
#'
#' @param data a data frame object with three columns. The first two are the names of the linked units. The third records an edge value. (Currently the third column doesn't affect the graph.)
#' @param Source character string naming the network source variable in the data frame. If \code{Source = NULL} then the first column of the data frame is treated as the source.
#' @param Target character string naming the network target variable in the data frame. If \code{Target = NULL} then the second column of the data frame is treated as the target.
#' @param height numeric height for the network graph's frame area.
#' @param width numeric width for the network graph's frame area.
#' @param file a character string of the file name to save the resulting graph. If a file name is given a standalone webpage is created, i.e. with a header and footer. If \code{file = NULL} then
read.tps = function(data) {
# Reads the .tps file format produced by TPSDIG
# (http://life.bio.sunysb.edu/morph/ into a single data frame
# USAGE: R> read.tps("filename.tps")
a = readLines(data) # so we can do some searching and indexing
LM = grep("LM", a) # find the line numbers for LM
ID.ind = grep("ID", a) # find the line numbers for ID
# and the ID values, SCALE values, and image names
ID = gsub("(ID=)(.*)", "\\2", grep("ID", a, value=T))
SCALE = gsub("(SCALE=)(.*)", "\\2", grep("SCALE", a, value=T))
library(sqldf)
sqldf("SELECT
day
, avg(temp) as avg_temp
FROM beaver2
GROUP BY
day;")
# day avg_temp
ks.default <- function(rows) seq(2, max(3, rows %/% 4))
many_kmeans <- function(x, ks = ks.default(nrow(x)), ...) {
ldply(seq_along(ks), function(i) {
cl <- kmeans(x, centers = ks[i], ...)
data.frame(obs = seq_len(nrow(x)), i = i, k = ks[i], cluster = cl$cluster)
})
}
all_hclust <- function(x, ks = ks.default(nrow(x)), point.dist = "euclidean", cluster.dist = "ward") {
ks.default <- function(rows) seq(2, max(3, rows %/% 4))
many_kmeans <- function(x, ks = ks.default(nrow(x)), ...) {
ldply(seq_along(ks), function(i) {
cl <- kmeans(x, centers = ks[i], ...)
data.frame(obs = seq_len(nrow(x)), i = i, k = ks[i], cluster = cl$cluster)
})
}
all_hclust <- function(x, ks = ks.default(nrow(x)), point.dist = "euclidean", cluster.dist = "ward") {
# @author: Michael J Bommarito II
# @date: Feb 20, 2011
# @email: michael.bommarito@gmail.com
# @packages: gridExtra, ggplot2
library(gridExtra)
library(ggplot2)
setwd('/data/workspace/blog/cn220/')
@benmarwick
benmarwick / TAGS_Stats.R
Created January 22, 2012 04:27 — forked from psychemedia/TAGS_Stats.R
Tools for processing and visualising data from a TAGS archive
require(stringr)
require(RCurl)
require(ggplot2)
gsqAPI = function(key,query,gid=0){ return( read.csv( paste( sep="",'http://spreadsheets.google.com/tq?', 'tqx=out:csv','&tq=', curlEscape(query), '&key=', key, '&gid=', gid) ) ) }
trim <- function (x) sub('@','',x)
twCounts=function(df){
print("Counting @'d users")
to.count=data.frame(table(df$to))