This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Load Data | |
grainData <- read.csv('grainSize.csv', check.names=F, na.strings='--' ) | |
# Calculate Derived Sample Values | |
grainData[['Phi Diameter']] <- -log2( grainData[['Grain Diameter']] ) | |
totalWeight <- sum( grainData[['Sample Weight']] ) | |
grainData[["Percent Retained"]] <- grainData[['Sample Weight']] / totalWeight | |
grainData[["Cumulative Percent"]] <- cumsum( grainData[["Percent Retained"]] ) | |
grainData[['Percent Finer']] <- 1 - grainData[['Cumulative Percent']] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# @author: Michael J Bommarito II | |
# @date: Feb 20, 2011 | |
# @email: michael.bommarito@gmail.com | |
# @packages: gridExtra, ggplot2 | |
library(gridExtra) | |
library(ggplot2) | |
setwd('/data/workspace/blog/cn220/') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ks.default <- function(rows) seq(2, max(3, rows %/% 4)) | |
many_kmeans <- function(x, ks = ks.default(nrow(x)), ...) { | |
ldply(seq_along(ks), function(i) { | |
cl <- kmeans(x, centers = ks[i], ...) | |
data.frame(obs = seq_len(nrow(x)), i = i, k = ks[i], cluster = cl$cluster) | |
}) | |
} | |
all_hclust <- function(x, ks = ks.default(nrow(x)), point.dist = "euclidean", cluster.dist = "ward") { |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Plotting the output of FactoMineR's PCA using ggplot2 | |
# | |
# load libraries | |
library(FactoMineR) | |
library(ggplot2) | |
library(scales) | |
library(grid) | |
library(plyr) | |
library(gridExtra) | |
# |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ks.default <- function(rows) seq(2, max(3, rows %/% 4)) | |
many_kmeans <- function(x, ks = ks.default(nrow(x)), ...) { | |
ldply(seq_along(ks), function(i) { | |
cl <- kmeans(x, centers = ks[i], ...) | |
data.frame(obs = seq_len(nrow(x)), i = i, k = ks[i], cluster = cl$cluster) | |
}) | |
} | |
all_hclust <- function(x, ks = ks.default(nrow(x)), point.dist = "euclidean", cluster.dist = "ward") { |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# From http://cainarchaeology.weebly.com/r-package-for-seriation-via-ca.html | |
library(CAseriation) | |
data("perfect_seriation") | |
#loads the sample dataset | |
check.ca.plot(perfect_seriation,1,2) | |
#plot the Correspondence Analysis scatterplot of the first 2 dimensions in order #to inspect data structure (e.g., seeking for the horseshoe effect) | |
sort.table(perfect_seriation,1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# set working directory, ie. location of JSTOR DfR CSV | |
# files on the computer | |
setwd("C:\\some directory with JSTOR DfR CSV files") | |
# create a list of all the CSV files | |
myFiles <- list.files(pattern="*.csv|CSV") | |
# read in all the CSV files to an R data object | |
myData <- lapply(myFiles, read.csv) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Set working directory | |
dir <- "C:\\" # adjust to suit | |
setwd(dir) | |
# configure variables and filenames for MALLET | |
## here using MALLET's built-in example data and | |
## variables from http://programminghistorian.org/lessons/topic-modeling-and-mallet | |
# folder containing txt files for MALLET to work on | |
importdir <- "C:\\mallet-2.0.7\\sample-data\\web\\en" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Speed tests of different parallel and non-parallel methods | |
# for iterating over different numbers of topics with | |
# topicmodels | |
# clear workspace and stop any previous cluster instances | |
rm(list = ls(all.names = TRUE)) | |
gc() | |
sfStop() | |
library(topicmodels) |
OlderNewer