Skip to content

Instantly share code, notes, and snippets.

@jangorecki
jangorecki / bmerge-bench.R
Last active Jun 20, 2020
parallel bmerge
View bmerge-bench.R
ssa = function(unq_n, size, sort=FALSE) {
if (unq_n > size) return(sample.int(unq_n, size))
unq_sub = seq_len(unq_n)
ans = sample(c(unq_sub, sample(unq_sub, size=max(size-unq_n, 0), replace=TRUE)))
if (sort) sort(ans) else ans
}
set.seed(108)
library(data.table)
options(width=200)
options(datatable.auto.index=FALSE, datatable.verbose=FALSE) ## not needed but just to be future proof if forder will setindex
@jangorecki
jangorecki / smerge.R
Last active Jun 10, 2020
sort merge benchmark
View smerge.R
ssa = function(unq_n, size, sort=FALSE) {
if (unq_n > size) return(sample.int(unq_n, size))
unq_sub = seq_len(unq_n)
ans = sample(c(unq_sub, sample(unq_sub, size=max(size-unq_n, 0), replace=TRUE)))
if (sort) sort(ans) else ans
}
set.seed(108)
library(data.table)
options(width=200)
options(datatable.auto.index=FALSE, datatable.verbose=FALSE) ## not needed but just to be future proof if forder will setindex
@jangorecki
jangorecki / script.R
Created May 11, 2020
mergelist left-right
View script.R
library(data.table)
setDTthreads(40L)
test.data.table("mergelist.Rraw") ## warmup
set.seed(108)
N = 1e8L
## medium cardinality
region = data.table(region_id=seq_len(N/1e3), key="region_id")
division = data.table(division_id=seq_len(N/1e2), region_id=sample(N/1e3, N/1e2, TRUE), key="division_id")
View bench-isna.R
library(data.table)
fis.na = data.table:::fis.na
N = 1e8
set.seed(108)
N_na = 1e3
N_nan = 1e3
# integer
x = sample(N)
x[sample(N, N_na)] = NA
View nanpar.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <time.h>
typedef union {
double value;
unsigned int word[2];
} ieee_double;
@jangorecki
jangorecki / nan.c
Last active Sep 29, 2019
vectorized is.na and is.nan
View nan.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <time.h>
typedef union {
double value;
unsigned int word[2];
} ieee_double;
@jangorecki
jangorecki / olap-operation-r.R
Created Oct 25, 2015
OLAP operations in base R
View olap-operation-r.R
## credits
# https://dzone.com/articles/olap-operation-r
# Setup the dimension tables
state_table <- data.frame(key=c("CA", "NY", "WA", "ON", "QU"),
name=c("California", "new York", "Washington", "Ontario", "Quebec"),
country=c("USA", "USA", "USA", "Canada", "Canada"))
month_table <- data.frame(key=1:12,
desc=c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"),
quarter=c("Q1","Q1","Q1","Q2","Q2","Q2","Q3","Q3","Q3","Q4","Q4","Q4"))
View fizzbuzz.R
f = seq(3,100,3)
b = seq(5,100,5)
fb = f[f %in% b]
f = f[!f %in% fb]
b = b[!b %in% fb]
x = as.character(1:100)
x[f] = "Fizz"
x[b] = "Buzz"
x[fb] = "FizzBuzz"
cat(x, sep = "\n")
@jangorecki
jangorecki / compute_signal.R
Last active Sep 25, 2015
clean way to compute indicators and signals
View compute_signal.R
library(TTR)
library(xts)
library(data.table)
# - [x] use list element names instead of `label` argument
# - [x] allow to reuse preceeded indicators, reduce amount of indicator computation
# - [x] cleaner way of providing indicators and signals, provide R language object directly instead of `get` object by name
ma_crossover = function(ma_slow, ma_fast){
setDT(list(sig_buy = ma_fast > ma_slow, sig_sell = ma_fast < ma_slow))[sig_buy==TRUE, sig := 1L][sig_sell==TRUE, sig := -1L][!sig%in%c(1L,-1L), sig := 0L]$sig
@jangorecki
jangorecki / dynamic_i.R
Created May 25, 2015
programmatically filter data.table
View dynamic_i.R
target <- "col1"
value <- 15L
library(data.table)
set.seed(123)
n <- 1e7
dt <- data.table(col1 = sample(1L:30L, n, TRUE), col2 = sample(letters, n, TRUE), col3 = sample(letters, n, TRUE), col4 = sample(letters, n, TRUE))
cl <- substitute(
x <= y,