Skip to content

Instantly share code, notes, and snippets.

@cs224
cs224 / toplevel-levenshtein-cps-memoize.clj
Created March 22, 2011 06:57
toplevel-levenshtein-cps-memoize.clj
(defn min-cps [args ret]
(ret (apply min args)))
(defn plus-cps [args ret]
(ret (apply + args)))
(defn multiply-cps [args ret]
(ret (apply * args)))
(defn equals-cps [a b ret]
(ret (= a b)))
(defn toplevel-levenshtein-cps-memoize [x y]
@cs224
cs224 / levenshtein-allison.clj
Created March 27, 2011 16:36
levenshtein-allison.clj
;;; implementation in clojure of the levenshtein allison algorithm as defined here:
;;; http://www.csse.monash.edu.au/~lloyd/tildeFP/Haskell/1998/Edit01/
(defn min3 [w nw n]
(if (< w nw) w (min nw n)))
(defn generate-diagonale [a b nw fn-diag-above fn-diag-below start]
(if start
(lazy-cat (list nw) (generate-diagonale a b nw fn-diag-above fn-diag-below false))
(if (or (empty? a) (empty? b)) '()
@cs224
cs224 / gist:3066392
Created July 7, 2012 12:57
datomic-clojure-relational-algebra-2012-07-07
(ns mynamespace.dt
(:use clojure.set)
(:use [datomic.api :only [q db] :as d])
(:use clojure.pprint)
)
;;; http://www.lshift.net/blog/2010/08/21/some-relational-algebra-with-datatypes-in-clojure-12
(defrecord Supplier [number name status city])
@cs224
cs224 / gist:4570937
Created January 19, 2013 05:38
How to query for dublet attributes across entities via datomic.
(defn datomic-group-fds-by-fd-id [fd-v & {:keys [id-fn] :or {id-fn :hash}}]
(q '[:find ?fd-id (into [] ?fd)
:in $fd-v
:where
[$fd-v ?fd ?fd-id]
]
(maps->rel fd-v [id-fn])))
(defn datomic-get-redundant-files [grouped-fds-by-fd-id & {:keys [id-fn] :or {id-fn :hash}}]
(q '[:find ?fd-v-g
;;; The solution relies on the following implementation of the partitions function:
;;; https://gist.github.com/ray1729/5830608
(defn partition-combination-sets [partition]
(let [partition (vec partition)
c (count partition)]
(for [i (range 1 (inc c))
combination (combinatorics/combinations (range c) i)]
;; the following (map partition combination) may be a bit confusing, because it does use the vector
;; defined above as a function and has nothing to do with the standard partition function of the clojure core language.
@cs224
cs224 / incanter-join.clj
Created October 13, 2013 05:17
$join does not work if the 1st element in the second dataset does not have a corresponding element in the first dataset
(ns experiment.incanter
(:require [incanter.core :as i]))
(def join-dataset-1 (i/dataset [:id :vehicle-type]
[[1 "car"]
[2 "motor-bike"]
[3 "train"]]))
;;;(i/view join-dataset-1)
(def join-dataset-2 (i/dataset [:id :number-wheels]
@cs224
cs224 / BasicXADataSourceUsageTest.java
Created August 12, 2014 07:50
BasicXADataSourceUsageTest for H2, HSQLDB and Derby
package xaresource;
import static org.hamcrest.CoreMatchers.is;
import static org.junit.Assert.assertThat;
import static org.junit.Assert.fail;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
@cs224
cs224 / CoIterator.java
Created September 20, 2014 14:53
Reimplementing the Matthias Mann Coroutine and CoIterator on top of Quasar.
package control.structures.continuations.quasarFiberChannelSameThread;
import java.io.Serializable;
import java.util.Iterator;
import java.util.NoSuchElementException;
import co.paralleluniverse.fibers.SuspendExecution;
import co.paralleluniverse.strands.Strand.State;
import co.paralleluniverse.strands.SuspendableCallable;
@cs224
cs224 / pyjags_rain_sprinkler_grass_simple_bayesian_network.py
Last active October 27, 2022 17:57
Simple Bayesian Network via Monte Carlo Markov Chain in PyMC3
import math
import pyjags
import numpy as np
import pandas as pd
np.random.seed(0)
np.set_printoptions(precision=3)
def pyjags_trace():
@cs224
cs224 / 00_pymc3_mixture_experiments_shifted_gamma.py
Last active March 11, 2017 16:28
Experiments in implementing a PyMC3 mixture model with two shifted Gamma stributions
import numpy as np, pandas as pd, matplotlib.pyplot as plt, seaborn as sns
import scipy.stats as stats
from theano import tensor as tt
import pymc3 as pm
from pymc3.distributions.dist_math import bound, logpow, gammaln
from pymc3.distributions.distribution import draw_values, generate_samples
SEED = 5132290 # from random.org
np.random.seed(SEED)