I hereby claim:
- I am jkff on github.
- I am jkff (https://keybase.io/jkff) on keybase.
- I have a public key whose fingerprint is CE4D BD42 5FCD 7379 CE9A 7BD8 0AE0 2A9A AE99 C3CB
To claim this, I am signing this object:
def expected_fraction_of_corpus_understood(phrases): | |
N = len(phrases) | |
num_total_words = sum(len(phrase) for phrase in phrases) | |
phrase_length_fit = fit_phrase_length(phrases) | |
((k, beta, _), _) = fit_heaps_law(phrases) | |
def result(n, s): | |
w = 1.0 * n / N * num_total_words | |
omega = k * beta * w ** (beta - 1) | |
return 1.0 / N * (s + 1.0 * s / n * (N - n) * (1 - p_phrase_has_new_word(omega, phrase_length_fit))) | |
return result |
import pickle | |
import random | |
from scipy.optimize import curve_fit | |
def shuffled(x): | |
res = list(x) | |
random.shuffle(res) | |
return res | |
def fit_heaps_law(phrases): |
import nltk | |
import pickle | |
import random | |
# I cleaned up the data manually in Vim. | |
lines = list(open('movie_lines.tsv').readlines()) | |
random.shuffle(lines) | |
tagged = [ | |
# Split lines into sentences; split sentences into words; tag words with | |
# part of speech (POS). |
I hereby claim:
To claim this, I am signing this object:
{-# LANGUAGE BangPatterns #-} | |
import Control.Applicative | |
import Prelude hiding (minimum, sum) | |
import Data.List (permutations) | |
import Control.Monad.ST | |
import Data.Bifunctor (second) | |
import Data.Vector.Unboxed (Vector) | |
import qualified Data.Vector.Unboxed as U | |
import Data.Vector.Unboxed.Mutable (STVector) | |
import qualified Data.Vector.Unboxed.Mutable as UM |
#include <stdio.h> | |
void printTree(int *idx, int *post, int rootIdx, int n) { | |
int root = post[rootIdx]; | |
printf("%d", root); | |
if(root == n-1 || idx[root+1] > rootIdx) return; | |
printf(" { "); |
module Main where | |
import qualified Data.ByteString.Lazy.Char8 as B | |
import System.Environment (getArgs) | |
import Data.Word | |
import Data.List | |
import Data.Array.Base (unsafeAt) | |
import Data.Array.Unboxed | |
import Data.Monoid | |
import Blaze.ByteString.Builder |
public class Guard : IDisposable | |
{ | |
private List<IDisposable> toDispose = new List<IDisposable>(); | |
public void Add(IDisposable d) { toDispose.Add(d); } | |
public void Dispose() { foreach(var x in toDispose) x.Dispose(); } | |
public void Discharge() { toDispose.Clear(); } | |
public static void Do(Action<Guard> a) | |
{ |
import java.util.*; | |
public class StringBinarySearch { | |
private static class ListDictionary implements Dictionary { | |
private List<String> ss; | |
public ListDictionary(List<String> ss) { this.ss = ss; } | |
public String getWordAt(int i) throws IndexOutOfBoundsException { | |
return i < 0 || i >= ss.size() ? null : ss.get(i); | |
} | |
} |
module ProblemK (Numeric, Table, showTable, evalTable, parseTable) | |
where | |
import Data.Char (isNumber, isSpace, isAlpha, toUpper) | |
import Control.Monad | |
import Data.Maybe (Maybe (..), isJust, maybeToList) | |
import qualified Data.Map as M (Map(), unions, fromList, lookup, findWithDefault, insert, toList) | |
import qualified Data.Set as S (Set (..), member, empty, insert) | |
import Data.List (intercalate) |