View binomial_prob.sql
-- Public Domain CC0 license. https://creativecommons.org/publicdomain/zero/1.0/ | |
-- Calculate the probability of k successes for n trials with probability of success k, | |
-- using the binomial distribution. | |
-- Calculate the binomial coefficient using the "multiplicative formula" | |
CREATE OR REPLACE FUNCTION functions.binomial_coef(n INT64, k INT64) AS (( | |
-- k!/(n!*(n-k)!) | |
-- We're going to have a hard time doing factorials here, | |
-- but based on the "multiplicative formula" in Wiki, it should be possible: |
View paintings_crawl.py
# Public Domain CC0 license. https://creativecommons.org/publicdomain/zero/1.0/ | |
# | |
# Download images from The Painting Dataset: https://www.robots.ox.ac.uk/~vgg/data/paintings/painting_dataset_2018.xlsx | |
# The image urls are outdaed in the Excel sheet but the painting urls are not, | |
# so this script re-crawls those images and downloads them locally. | |
# It works as of July 2020. | |
# | |
# Run this first with: | |
# $ scrapy runspider paintings_crawl.py -o paintings.json | |
# Images are stored in 'out/raw' |
View ft_extract.py
# Public Domain CC0 license. https://creativecommons.org/publicdomain/zero/1.0/ | |
# Run this file first, e.g.: | |
# $ scrapy runspider ft_extract.py -o members.json | |
# | |
# It will probably stop working if they change their urls for the contact list of course. | |
# Worked in Spring of 2019 | |
import scrapy | |
import re | |
from urllib.parse import urlparse, urlunparse |
View tokenizer.js
// Public Domain CC0 license. https://creativecommons.org/publicdomain/zero/1.0/ | |
class Tokenizer { | |
constructor(config = {}) { | |
this.filters = config.filters || /[\\.,/#!$%^&*;:{}=\-_`~()]/g; | |
this.lower = typeof config.lower === 'undefined' ? true : config.lower; | |
// Primary indexing methods. Word to index and index to word. | |
this.wordIndex = {}; | |
this.indexWord = {}; |
View oneliners_matplotlib.py
# Matplotlib | |
# Creating a list of colors (e.g. for a bar chart) | |
# "Blues" is the colormap. It can be any colormap | |
# https://matplotlib.org/examples/color/colormaps_reference.html | |
colors = [matplotlib.colors.to_hex(c) for c in plt.cm.Blues(np.linspace(0, 1, len(some_dataframe.index)))] | |
# Globally adjusting DPI and figure size | |
matplotlib.rcParams['figure.dpi'] = 100 | |
matplotlib.rcParams['figure.figsize'] = [6.0, 4.0] |
View keras_embedding_onehot.py
# Public Domain CC0 license. https://creativecommons.org/publicdomain/zero/1.0/ | |
# Create a Keras embedding layer with an initial one-hot encoding by using identity initializer | |
import tensorflow as tf | |
import numpy as np | |
# Input sequence consisting of four features (e.g. words) | |
# Let's pretend this is "hello world hello everyone else" | |
# Where hello is then mapped to 1, world = 0, everyone = 2, else = 3, | |
a = np.array([[1, 0, 1, 2, 3]]) |
View perl.sh
# Convert a unix timestamp in millisconds in a column of a CSV to a date | |
cat thefile.csv | perl -MPOSIX -pe 's/(^\d+),/strftime("%F,", localtime($1\/1000))/ge' |
View mongodbio.py
# Public Domain CC0 license. https://creativecommons.org/publicdomain/zero/1.0/ | |
"""MongoDB Apache Beam IO utilities. | |
Tested with google-cloud-dataflow package version 2.0.0 | |
""" | |
__all__ = ['ReadFromMongo'] | |
import datetime |
View redis.go
package services | |
import ( | |
"os" | |
"time" | |
log "github.com/Sirupsen/logrus" | |
"github.com/garyburd/redigo/redis" | |
) |
View cache.py
"""Global LRU caching utility. For that little bit of extra speed. | |
The caching utility provides a single wrapper function that can be used to | |
provide a bit of extra speed for some often used function. The cache is an LRU | |
cache including a key timeout. | |
Usage:: | |
import cache | |
@cache.memoize |
NewerOlder