Skip to content

Instantly share code, notes, and snippets.

@jtilly
jtilly / install_r.sh
Last active June 16, 2016 17:57
Install R and libcurl from source
# This script installs libcurl and R 3.3.0 on a linux server
# when you can't use apt-get. Note that libcurl is a requirement
# for R 3.3.0
# also see: http://pj.freefaculty.org/blog/?p=315
export PATH=$HOME/packages/bin:$PATH
export LD_LIBRARY_PATH=$HOME/packages/lib:$LD_LIBRARY_PATH
export CFLAGS="-I$HOME/packages/include"
export LDFLAGS="-L$HOME/packages/lib"
@jtilly
jtilly / build-latex.sh
Last active August 30, 2016 15:03
Continuously build tex file
#!/bin/bash
latexmk -pdf -pvc $1
@jtilly
jtilly / image-only-pdf.sh
Created November 9, 2016 20:37
Convert a searchable PDF to an image only PDF using imagemagick
#!/bin/bash
convert -density 300 $1 /tmp/page_%04d.jpg
convert /tmp/page_*.jpg $2
rm /tmp/page_*.jpg
@jtilly
jtilly / robust_mixture.R
Created November 28, 2016 21:31
Use log/exp transformation to make the log likelihood computation of a simple mixture model more robust.
# making mixture models numerically robust
set.seed(4)
rm(list = ls())
nobs = 1000
alpha = 0.1
p = runif(nobs, min = 0.0, max = 1.0)
q = runif(nobs, min = 0.0, max = 1.0)
# naive computation of log-likelihood contribution
@jtilly
jtilly / print_mat.h
Created November 29, 2016 17:38
Simple helper function to print armadillo matrices
#ifndef print_mat_h
#define print_mat_h
void print_mat(arma::mat my_matrix) {
uint cols = my_matrix.n_cols;
uint rows = my_matrix.n_rows;
Rcout << "--------\n";
for(uint rX = 0; rX < rows; rX++) {
rm(list = ls())
dict.orig = unique(readLines("https://raw.githubusercontent.com/first20hours/google-10000-english/master/google-10000-english.txt"))
txt2numeric = function(word.orig, return.orig = TRUE) {
word = tolower(word.orig)
word = gsub("([^a-z]){1}", 1, word)
word = gsub("(a|b|c){1}", 2, word)
word = gsub("(d|e|f){1}", 3, word)
word = gsub("(g|h|i){1}", 4, word)
library(xgboost)
set.seed(1234)
N = 1000
x1 <- runif(N)
x <- ifelse(x1 <= 0.2, as.numeric(NA), x1)
y <- as.numeric(x1 >= 0.9)
bst <- xgboost(data = matrix(x, ncol=1), label = y,
objective = "binary:logistic", eval_metric = "logloss",
@jtilly
jtilly / install_dep.R
Last active February 3, 2018 14:18
Install Package Dependencies in R
#' Install Dependent Packages
#'
#' @param pkg.dir refers to the package directory that contains the
#' \code{Description} file
#' @param dependencies defines which dependencies of the dependent packages are
#' to be installed
#' @param repos is the (CRAN) repository used to install dependencies
#' @param lib is the library to which packages are installed
install_dep = function(pkg.dir = ".", dependencies = TRUE, repos = getOption("repos")[1], lib = .libPaths()[1]) {
@jtilly
jtilly / check-lgb-multiclass.R
Last active May 12, 2018 08:16
dim(preds) vs. dim(predict(...)): what the hell is LightGBM doing?
library(lightgbm)
library(tidyverse)
rm(list = ls())
# We load the default iris dataset shipped with R
data(iris)
iris = as_data_frame(iris) %>%
mutate(Species = as.numeric(factor(Species)) - 1) %>%
@jtilly
jtilly / timer.h
Created November 29, 2016 16:00
Simple C++ Profiler Class
// timer.h
#include <iostream>
#include <sstream>
#ifndef timer_h
#define timer_h
class timer {
private: