Skip to content

Instantly share code, notes, and snippets.

View mrdwab's full-sized avatar

Ananda Mahto mrdwab

View GitHub Profile
@mrdwab
mrdwab / Stratified.R
Created May 21, 2011 17:06
R stratified random sampling from a data frame
stratified = function(df, group, size) {
# USE: * Specify your data frame and grouping variable (as column
# number) as the first two arguments.
# * Decide on your sample size. For a sample proportional to the
# population, enter "size" as a decimal. For an equal number
# of samples from each group, enter "size" as a whole number.
#
# Example 1: Sample 10% of each group from a data frame named "z",
# where the grouping variable is the fourth variable, use:
#
@mrdwab
mrdwab / SampleSize.R
Created May 21, 2011 17:35
R sample size and confidence interval calculation
###############################################################################
# Sample Size and Confidence Interval Calculation #
# v 1.3 by "Ananda Mahto"/mrdwab/ananda@mahto.info #
# 2011 May 17 #
# --------------------------------------------------------------------------- #
# #
# Example usage: #
# * sample.size.table(c.lev = c(90, 95, 98, 99), population = 378) #
# * sample.size(c.lev = 98, population = 200) #
# * confidence.interval(c.lev = 95, p.ss = 80, population = 100) #
@mrdwab
mrdwab / readtps.R
Created March 17, 2012 16:51
read.tps function for R
read.tps = function(data) {
# Reads the .tps file format produced by TPSDIG
# (http://life.bio.sunysb.edu/morph/ into a single data frame
# USAGE: R> read.tps("filename.tps")
a = readLines(data) # so we can do some searching and indexing
LM = grep("LM", a) # find the line numbers for LM
ID.ind = grep("ID", a) # find the line numbers for ID
# and the ID values, SCALE values, and image names
ID = gsub("(ID=)(.*)", "\\2", grep("ID", a, value=T))
SCALE = gsub("(SCALE=)(.*)", "\\2", grep("SCALE", a, value=T))
@mrdwab
mrdwab / write.Hmisc.SPSS.R
Created April 21, 2012 19:10
Write an SPSS file from R with variable labels from the Hmisc package
write.Hmisc.SPSS = function(data, datafile, codefile) {
# EXAMPLE DATA (see: http://stackoverflow.com/q/10181730/1270695)
# df <- data.frame(id = c(1:6),
# p.code = c(1, 5, 4, NA, 0, 5),
# p.label = c('Optometrists', 'Nurses',
# 'Financial analysts', '<NA>',
# '0', 'Nurses'),
# foo = LETTERS[1:6])
# Add some variable labels using label from the Hmisc package
# require(Hmisc)
@mrdwab
mrdwab / df.sorter.R
Created April 23, 2012 18:26
Data frame sorter for R
df.sorter = function(data, var.order=names(data), col.sort=NULL ) {
# Sorts a data.frame by columns or rows or both.
# Can refer to variables either by names or number.
# If referring to variable by number, and sorting both the order
# of variables and the sorting within variables, refer to the
# variable numbers of the final data.frame
#
# === EXAMPLES ===
#
# library(foreign)
@mrdwab
mrdwab / concat.split.R
Created April 26, 2012 06:54
Split concatenated variables in R
concat.split = function(data, split.col, mode=NULL,
sep=",", drop.col=FALSE) {
# Takes a column with multiple values, splits the values into
# separate columns, and returns a new data.frame.
# 'data' is the source data.frame; 'split.col' is the variable that
# needs to be split; 'mode' can be either 'binary' or 'value'
# (where 'binary' is default and it recodes values to 1 or NA);
# 'sep' is the character separating each value (defaults to ',');
# and 'drop.col' is logical (whether to remove the original
# variable from the output or not.
@mrdwab
mrdwab / LinearizeNestedList.R
Created December 4, 2012 16:00
Un-nest a nested list in R
LinearizeNestedList <- function(NList, LinearizeDataFrames=FALSE,
NameSep="/", ForceNames=FALSE) {
# LinearizeNestedList:
#
# https://sites.google.com/site/akhilsbehl/geekspace/
# articles/r/linearize_nested_lists_in_r
#
# Akhil S Bhel
#
# Implements a recursive algorithm to linearize nested lists upto any
@mrdwab
mrdwab / TDASample.R
Last active October 13, 2015 20:17
Sampling with string as a seed
## @knitr tdasample
TDASample <- function(inString, N, n, toFile = FALSE) {
if (is.factor(inString)) inString <- as.character(inString)
if (nchar(inString) <= 3) stop("inString must be > 3 characters")
string1 <- "jnt3g127rbfeqixkos 586d90pyal4chzmvwu"
string2 <- "2dyn0uxq ovalrpksieb3fhjw584cm9t7z16g"
instring <- chartr(string1, string2, tolower(inString))
t1 <- sd(c(suppressWarnings(sapply(strsplit(instring, ""),
as.numeric))), na.rm = TRUE)
cut2 <- function (x, breaks, labels = NULL, include.lowest = FALSE, right = TRUE,
dig.lab = 3, ordered_result = FALSE, ...)
{
if (!is.numeric(x))
stop("'x' must be numeric")
if (length(breaks) == 1L) {
if (is.na(breaks) || breaks < 2L)
stop("invalid number of intervals")
nb <- as.integer(breaks + 1)
dx <- diff(rx <- range(x, na.rm = TRUE))
@mrdwab
mrdwab / MERGE.R
Last active December 12, 2015 09:18
MERGE <- function (x, y, by = intersect(names(x), names(y)), by.x = by,
by.y = by, all = FALSE, all.x = all, all.y = all, sort = TRUE,
suffixes = c(".x", ".y"), incomparables = NULL, ...)
{
fix.by <- function(by, df) {
if (is.null(by))
by <- numeric()
by <- as.vector(by)
nc <- ncol(df)
if (is.character(by)) {