Skip to content

Instantly share code, notes, and snippets.

View mrdwab's full-sized avatar

Ananda Mahto mrdwab

View GitHub Profile
@mrdwab
mrdwab / cSplit.R
Last active March 14, 2023 05:03
The faster version of `concat.split` that makes use of `data.table` efficiency.
cSplit <- function(indt, splitCols, sep = ",", direction = "wide",
makeEqual = NULL, fixed = TRUE, drop = TRUE,
stripWhite = FALSE) {
message("`cSplit` is now part of the 'splitstackshape' package (V1.4.0)")
## requires data.table >= 1.8.11
require(data.table)
if (!is.data.table(indt)) setDT(indt)
if (is.numeric(splitCols)) splitCols <- names(indt)[splitCols]
if (any(!vapply(indt[, splitCols, with = FALSE],
is.character, logical(1L)))) {
@mrdwab
mrdwab / stratified.R
Last active January 29, 2023 03:28
Stratified random sampling from a `data.frame` in R
stratified <- function(df, group, size, select = NULL,
replace = FALSE, bothSets = FALSE) {
if (is.null(select)) {
df <- df
} else {
if (is.null(names(select))) stop("'select' must be a named list")
if (!all(names(select) %in% names(df)))
stop("Please verify your 'select' argument")
temp <- sapply(names(select),
function(x) df[[x]] %in% select[[x]])
@mrdwab
mrdwab / pdfWatermarkEncrypt.bat
Created August 8, 2014 10:33
Use PDFtk to add a "stamp" or a "background" to a PDF and restrict editing/copying.
@echo off
if [%1]==[] goto :eof
:loop
pdftk %1 stamp back.pdf output "%~dpn1_new%~x1" owner_pw somepasswordyouwant
shift
if not [%1]==[] goto loop
@mrdwab
mrdwab / SampleSize.R
Created May 21, 2011 17:35
R sample size and confidence interval calculation
###############################################################################
# Sample Size and Confidence Interval Calculation #
# v 1.3 by "Ananda Mahto"/mrdwab/ananda@mahto.info #
# 2011 May 17 #
# --------------------------------------------------------------------------- #
# #
# Example usage: #
# * sample.size.table(c.lev = c(90, 95, 98, 99), population = 378) #
# * sample.size(c.lev = 98, population = 200) #
# * confidence.interval(c.lev = 95, p.ss = 80, population = 100) #
@mrdwab
mrdwab / oo_cell_properties_macro.txt
Created July 25, 2014 02:19
Cell-properties as function-results
REM ***** BASIC *****
Function CELL_NOTE(vSheet,lRowIndex&,iColIndex%)
Dim v
v = getSheetCell(vSheet,lRowIndex&,iColIndex%)
if vartype(v) = 9 then
CELL_NOTE = v.Annotation.getText.getString
else
CELL_NOTE = v
endif
End Function
@mrdwab
mrdwab / LinearizeNestedList.R
Created December 4, 2012 16:00
Un-nest a nested list in R
LinearizeNestedList <- function(NList, LinearizeDataFrames=FALSE,
NameSep="/", ForceNames=FALSE) {
# LinearizeNestedList:
#
# https://sites.google.com/site/akhilsbehl/geekspace/
# articles/r/linearize_nested_lists_in_r
#
# Akhil S Bhel
#
# Implements a recursive algorithm to linearize nested lists upto any
upper_left <- function(n, diag = TRUE, byrow = FALSE) {
x <- seq.int(n)
tmp1 <- sequence(rev(x))
tmp2 <- rep(x, rev(x))
out <- if (byrow) {
cbind(row = tmp2, col = tmp1)
} else {
cbind(row = tmp1, col = tmp2)
}
if (diag) out else out[rowSums(out) != n + 1, ]
@mrdwab
mrdwab / advent_1.R
Last active December 18, 2020 22:09
fun_for <- function(x, target, n) {
if (!n %in% c(2, 3)) stop("The accounting Elves are crazy!")
if (n == 2) {
out <- x[(target - x) %in% x]
} else if (n == 3) {
out <- numeric(0)
for (i in seq_along(x)) {
s1 <- x + x[i]
for (j in seq_along(s1)) {
s2 <- s1 + x[j]
# Using `fread` and `fwrite` to paste together columns like `do.call(paste, ...)`
fpaste <- function(dt, sep = ",") {
x <- tempfile()
if (sep == "") {
data.table(V1 = do.call(stringi::stri_join, c(dt, sep = "")))
} else {
fwrite(dt, file = x, sep = sep, col.names = FALSE)
fread(x, sep = "\n", header = FALSE)
}
}
@mrdwab
mrdwab / 65227663.R
Last active December 10, 2020 05:27
Testing different approaches for replacing characters with integers. https://stackoverflow.com/q/65227663/1270695
## If your replacement is just a sequence of integers the length of the unique values being factored,
## you can create a function like this which should be quite fast.
fac2int <- function(x, levels, labels = levels, exclude = NA, ordered = is.ordered(x), nmax = NA) {
as.integer(factor(x, levels, labels, exclude, ordered, nmax))
}
### DIFFERENT APPROACHES TO TEST
fun_datamatrix <- function() {
df[] <- data.matrix(as.data.frame(lapply(df, factor, levels = df2$Group)))