Skip to content

Instantly share code, notes, and snippets.

View mrdwab's full-sized avatar

Ananda Mahto mrdwab

View GitHub Profile
set.seed(1)
data_pos <- sample(0:50, 100, TRUE)
data_neg <- sample(-50:-1, 100, TRUE)
data_pos_neg <- c(0, sample(-50:50, 100, TRUE))
x <- runif(50, -5, 5)
grouped_stem <- function(invec, n = 2) {
if (!all(as.numeric(invec) == as.integer(invec))) stop("This function only works with integers")
invec <- sort(invec)
negative <- if (any(invec < 0)) TRUE else FALSE
myFun <- function(vec, find, replace) {
if (length(find) != length(replace)) stop("incompatible find/replace")
if (all(find %in% vec)) {
pos <- which(vec == find[1])
for (i in seq_along(pos)) {
ind <- pos[i]:(pos[i]+length(find)-1)
if (identical(vec[ind], find)) vec[ind] <- replace
}
} else {
message("nothing changed")
library(data.table)
library(SOfun)
ragged <- function(indt, keys, blank = "") {
require(data.table)
indt <- setkeyv(as.data.table(indt), keys)
vals <- setdiff(names(indt), keys)
nams <- paste0(keys, "_copy")
for (i in seq_along(nams)) {
indt[, (nams[i]) := c(as.character(get(key(indt)[i])[1]),
@mrdwab
mrdwab / subfix.r
Last active May 11, 2020 19:40
R script to fix manually edited srt files to upload to YouTube. Specifically, it converts the timestamps from SRT or SBV files to make sure the end and start times don't overlap.
#!/usr/local/bin/r
suppressMessages(library(docopt))
suppressMessages(library(glue))
options(useFancyQuotes = FALSE)
doc <- "Usage: subfix.r [FILE] [-h]
-h --help show this help text"
opt <- docopt(doc)
x <- readLines("~/Downloads/Andy - Career Talk/exported.srt")
cat(x[1:10], sep = "\n")
## 1
## 00:00:00,640 --> 00:00:12,850
## [Music]
##
## 2
## 00:00:12,850 --> 00:00:19,910
## Hi, my name is Andy Liepman, and Partners
##
library(pdftools)
library(tidyverse)
library(SOfun) # https://github.com/mrdwab/SOfun
data.loc = "https://ww2.amstat.org/misc/StatsPhD2003-MostRecent.pdf"
data.full =
pdf_text(data.loc) %>%
read_lines() %>%
head(-2) %>%
tail(-4)
#!/usr/local/bin/r
suppressMessages(library(docopt))
suppressMessages(library(glue))
doc <- "Usage: pdf2png.r [EXT] [-h]
-h --help show this help text"
opt <- docopt(doc)
#!/bin/bash
# Reduces the size of oversized jpegs, optimizes them, and compresses them to a cbz format
# Set the output file based on the input directory
filename=$(basename "$PWD").cbz
# Resize the jpegs to 50% of their original dimensions.
# NOTE: THIS OVERWRITES THE EXISTING FILES!
echo '>>>>> RESIZING JPEGS >>>>>'

Keybase proof

I hereby claim:

  • I am mrdwab on github.
  • I am mrdwab (https://keybase.io/mrdwab) on keybase.
  • I have a public key ASAV-O-osZ6SCQGHU-TpoJ_GM0tId8y8FyXoXxMHo8Gl-wo

To claim this, I am signing this object:

@mrdwab
mrdwab / cSplit_e.R
Created March 8, 2018 17:08
Faster versions of `cSplit_e`, `numMat`, and `charMat`.
cSplit_e_new <- function (indt, splitCols, sep = ",", mode = "binary", type = "numeric",
drop = FALSE, fixed = TRUE, fill = NULL) {
indt <- setDT(copy(indt))
if (is.numeric(splitCols)) splitCols <- names(indt)[splitCols]
if (length(sep) == 1) sep <- rep(sep, length(splitCols))
if (length(sep) != length(splitCols)) stop("Wrong number of sep supplied")
if (length(mode) == 1) mode <- rep(mode, length(splitCols))
if (length(mode) != length(mode)) stop("Wrong number of mode supplied")
if (any(!mode %in% c("binary", "value", "count"))) {