Skip to content

Instantly share code, notes, and snippets.

@jtilly
jtilly / xgb-missings.md
Last active Oct 11, 2018
Treatment of missing values with and without sparse matrices
View xgb-missings.md
library(xgboost)
library(dplyr)

params = list(min_child_weight = 0.00001, lambda = 0 )
nrounds = 1

# sparse ---
@jtilly
jtilly / check-lgb-multiclass.R
Last active May 12, 2018
dim(preds) vs. dim(predict(...)): what the hell is LightGBM doing?
View check-lgb-multiclass.R
library(lightgbm)
library(tidyverse)
rm(list = ls())
# We load the default iris dataset shipped with R
data(iris)
iris = as_data_frame(iris) %>%
mutate(Species = as.numeric(factor(Species)) - 1) %>%
View test_xgboost_missings.R
library(xgboost)
set.seed(1234)
N = 1000
x1 <- runif(N)
x <- ifelse(x1 <= 0.2, as.numeric(NA), x1)
y <- as.numeric(x1 >= 0.9)
bst <- xgboost(data = matrix(x, ncol=1), label = y,
objective = "binary:logistic", eval_metric = "logloss",
View textmessage.R
rm(list = ls())
dict.orig = unique(readLines("https://raw.githubusercontent.com/first20hours/google-10000-english/master/google-10000-english.txt"))
txt2numeric = function(word.orig, return.orig = TRUE) {
word = tolower(word.orig)
word = gsub("([^a-z]){1}", 1, word)
word = gsub("(a|b|c){1}", 2, word)
word = gsub("(d|e|f){1}", 3, word)
word = gsub("(g|h|i){1}", 4, word)
@jtilly
jtilly / print_mat.h
Created Nov 29, 2016
Simple helper function to print armadillo matrices
View print_mat.h
#ifndef print_mat_h
#define print_mat_h
void print_mat(arma::mat my_matrix) {
uint cols = my_matrix.n_cols;
uint rows = my_matrix.n_rows;
Rcout << "--------\n";
for(uint rX = 0; rX < rows; rX++) {
@jtilly
jtilly / timer.h
Created Nov 29, 2016
Simple C++ Profiler Class
View timer.h
// timer.h
#include <iostream>
#include <sstream>
#ifndef timer_h
#define timer_h
class timer {
private:
@jtilly
jtilly / robust_mixture.R
Created Nov 28, 2016
Use log/exp transformation to make the log likelihood computation of a simple mixture model more robust.
View robust_mixture.R
# making mixture models numerically robust
set.seed(4)
rm(list = ls())
nobs = 1000
alpha = 0.1
p = runif(nobs, min = 0.0, max = 1.0)
q = runif(nobs, min = 0.0, max = 1.0)
# naive computation of log-likelihood contribution
@jtilly
jtilly / image-only-pdf.sh
Created Nov 9, 2016
Convert a searchable PDF to an image only PDF using imagemagick
View image-only-pdf.sh
#!/bin/bash
convert -density 300 $1 /tmp/page_%04d.jpg
convert /tmp/page_*.jpg $2
rm /tmp/page_*.jpg
@jtilly
jtilly / build-latex.sh
Last active Aug 30, 2016
Continuously build tex file
View build-latex.sh
#!/bin/bash
latexmk -pdf -pvc $1
@jtilly
jtilly / time
Created May 30, 2016
Check memory usage
View time
alias time='/usr/bin/time -f "\nCPU: %Us\tReal: %es\tRAM: %MKB"'
You can’t perform that action at this time.