Skip to content

Instantly share code, notes, and snippets.

def expected_fraction_of_corpus_understood(phrases):
N = len(phrases)
num_total_words = sum(len(phrase) for phrase in phrases)
phrase_length_fit = fit_phrase_length(phrases)
((k, beta, _), _) = fit_heaps_law(phrases)
def result(n, s):
w = 1.0 * n / N * num_total_words
omega = k * beta * w ** (beta - 1)
return 1.0 / N * (s + 1.0 * s / n * (N - n) * (1 - p_phrase_has_new_word(omega, phrase_length_fit)))
return result
import pickle
import random
from scipy.optimize import curve_fit
def shuffled(x):
res = list(x)
random.shuffle(res)
return res
def fit_heaps_law(phrases):
import nltk
import pickle
import random
# I cleaned up the data manually in Vim.
lines = list(open('movie_lines.tsv').readlines())
random.shuffle(lines)
tagged = [
# Split lines into sentences; split sentences into words; tag words with
# part of speech (POS).

Keybase proof

I hereby claim:

  • I am jkff on github.
  • I am jkff (https://keybase.io/jkff) on keybase.
  • I have a public key whose fingerprint is CE4D BD42 5FCD 7379 CE9A 7BD8 0AE0 2A9A AE99 C3CB

To claim this, I am signing this object:

{-# LANGUAGE BangPatterns #-}
import Control.Applicative
import Prelude hiding (minimum, sum)
import Data.List (permutations)
import Control.Monad.ST
import Data.Bifunctor (second)
import Data.Vector.Unboxed (Vector)
import qualified Data.Vector.Unboxed as U
import Data.Vector.Unboxed.Mutable (STVector)
import qualified Data.Vector.Unboxed.Mutable as UM
@jkff
jkff / gist:1388235
Created November 23, 2011 08:59
Binary tree postorder -> structure
#include <stdio.h>
void printTree(int *idx, int *post, int rootIdx, int n) {
int root = post[rootIdx];
printf("%d", root);
if(root == n-1 || idx[root+1] > rootIdx) return;
printf(" { ");
@jkff
jkff / Main.hs
Created November 1, 2011 08:13
Haskell IP filter
module Main where
import qualified Data.ByteString.Lazy.Char8 as B
import System.Environment (getArgs)
import Data.Word
import Data.List
import Data.Array.Base (unsafeAt)
import Data.Array.Unboxed
import Data.Monoid
import Blaze.ByteString.Builder
@jkff
jkff / AutoDispose.cs
Created October 15, 2011 19:03
C# autodispose from constructor
public class Guard : IDisposable
{
private List<IDisposable> toDispose = new List<IDisposable>();
public void Add(IDisposable d) { toDispose.Add(d); }
public void Dispose() { foreach(var x in toDispose) x.Dispose(); }
public void Discharge() { toDispose.Clear(); }
public static void Do(Action<Guard> a)
{
@jkff
jkff / StringBinarySearch.java
Created May 28, 2011 04:11
String binary search
import java.util.*;
public class StringBinarySearch {
private static class ListDictionary implements Dictionary {
private List<String> ss;
public ListDictionary(List<String> ss) { this.ss = ss; }
public String getWordAt(int i) throws IndexOutOfBoundsException {
return i < 0 || i >= ss.size() ? null : ss.get(i);
}
}
@jkff
jkff / ProblemK.hs
Created May 21, 2011 20:46
Problem K monadized
module ProblemK (Numeric, Table, showTable, evalTable, parseTable)
where
import Data.Char (isNumber, isSpace, isAlpha, toUpper)
import Control.Monad
import Data.Maybe (Maybe (..), isJust, maybeToList)
import qualified Data.Map as M (Map(), unions, fromList, lookup, findWithDefault, insert, toList)
import qualified Data.Set as S (Set (..), member, empty, insert)
import Data.List (intercalate)