Skip to content

Instantly share code, notes, and snippets.

@olooney
olooney / pnorm.sql
Created June 13, 2018 23:28
PostgreSQL pnorm() function calculated the c.d.f. of the normal Gaussian distribution. This function match's R's build in pnorm() function to within +/- 2e-7 over the entire real line. However, it's a constant 1/0 above/below z=+7/-7.
CREATE OR REPLACE FUNCTION pnorm(z double precision) RETURNS double precision AS $$
SELECT CASE
WHEN $1 >= 0 THEN 1 - POWER(((((((0.000005383*$1+0.0000488906)*$1+0.0000380036)*$1+0.0032776263)*$1+0.0211410061)*$1+0.049867347)*$1+1),-16)/2
ELSE 1 - pnorm(-$1)
END;
$$ LANGUAGE SQL IMMUTABLE STRICT;
@olooney
olooney / xgboost_keras_mp.py
Created June 1, 2018 01:47
Compare XGBoost and Keras head-to-head with parallel processing
# MP setup *must* happen at the very start of the process!
# Otherwise you'll get errors or it will be ignored.
import multiprocessing
multiprocessing.set_start_method('forkserver')
import os
os.environ["OMP_NUM_THREADS"] = "10"
import numpy as np
import pandas as pd
import pickle
@olooney
olooney / platform_dump.py
Created May 22, 2018 20:47
use python's 2.7+ platform package to quickly and consistently characterize an enviroment
import platform
print("architecture = {!r}".format(platform.architecture()))
print("machine = {!r}".format(platform.machine()))
print("node = {!r}".format(platform.node()))
print("platform = {!r}".format(platform.platform()))
print("processor = {!r}".format(platform.processor()))
print("python_build = {!r}".format(platform.python_build()))
print("python_compiler = {!r}".format(platform.python_compiler()))
print("python_branch = {!r}".format(platform.python_branch()))
//g++ -std=c++17 -O3 -I eigen main.cpp -o main && time ./main
//g++ -std=c++17 -fopenmp -O3 -I eigen main.cpp -o main && time ./main
//g++ -std=c++17 -pg -fopenmp -O3 -I eigen main.cpp -o main && time ./main && gprof main gmon.out > profile.txt
#include <iostream>
#include <Eigen/Dense>
#include <cmath>
using namespace Eigen;
float logistic(float x) {
import kafka
p = kafka.KafkaProducer(bootstrap_servers='kafka:9092')
c = kafka.KafkaConsumer(bootstrap_servers='kafka:9092', auto_offset_reset='earliest')
p.send('oran', b'cool')
c.subscribe('oran')
p.send('oran', b'neat')
v1 = next(c)
v2 = next(c)
print(v1)
print(v2)
# apt-get install tesseract-ocr
# pip3 install Pillow, pytesseract
from PIL import Image, ImageDraw, ImageFont
import os, sys
import pytesseract as t
def round_trip(text, font_size=32):
font_filename = '/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf'
font = ImageFont.truetype(font_filename, size=font_size)
text_size = font.getsize(text)
@olooney
olooney / central_limit_theorem.R
Created December 20, 2017 15:46
illustrate CLS convergence for non-skewed and highly skewed random variables.
compare_histograms <- function(sn) {
sample_means <- function(r,n) { sqrt(n)*rowMeans(matrix(r(n*10000), ncol=n)) }
center_scale <- function(v) (v - mean(v))/sd(v)
exp_sample <- center_scale(sample_means(rexp, sn))
unif_sample <- center_scale(sample_means(runif, sn))
max_break = ceiling(max(c(unif_sample, exp_sample)))
min_break = floor(min(c(unif_sample, exp_sample)))
breaks = seq(min_break,max_break,length.out=(max_break-min_break)*8+1)
@olooney
olooney / requirements.txt
Created December 11, 2017 22:33
Python 2.6.6 requirements ("pip freeze") for the pyspark environment
Markdown==2.6.10
numpy==1.6.1
ordereddict==1.2
pandas==0.14.1
patsy==0.4.1
python-dateutil==2.6.1
pytz==2017.3
requests==2.18.4
six==1.11.0
@olooney
olooney / xeign.cpp
Created November 30, 2017 18:35
xtensor eigenpair example
#include <iostream>
#include <vector>
#include <tuple>
#include <algorithm>
#include "xtensor/xarray.hpp"
#include "xtensor/xio.hpp"
#include "xtensor/xmath.hpp"
#include "xtensor/xrandom.hpp"
#include "xtensor/xbuilder.hpp"
@olooney
olooney / gp_zip_tables.sql
Created October 26, 2017 13:05
find large tables and convert them to compressed tables in Greenplum
Select
sum(size) AS total_size_on_disk,
sum(size) / (select count(*) from individual) AS per_individual
FROM (
SELECT
pg_relation_size(C.oid) AS "size"
FROM pg_class C
LEFT JOIN pg_namespace N ON (N.oid = C.relnamespace)
WHERE 1=1 -- relname in (...)
ORDER BY pg_relation_size(C.oid) DESC