Skip to content

Instantly share code, notes, and snippets.

@aaronj1335
Last active August 29, 2015 13:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save aaronj1335/9606615 to your computer and use it in GitHub Desktop.
Save aaronj1335/9606615 to your computer and use it in GitHub Desktop.
debugging slow code
(defn tokens-and-labels [labeled-tokens]
"given a sequence of token/label pairs, return a set of the tokens and a set
of the labels"
;{:tokens (set (map first labeled-tokens))
; :labels (set (map second labeled-tokens))})
(reduce
(fn [{:keys [tokens labels]} [token label]]
{:tokens (conj tokens token)
:labels (conj labels label)})
{:tokens #{} :labels #{}}
labeled-tokens))
(defn random-probs-for-tokens [tokens]
(into {} (map #(vector % (rand)) tokens)))
(defn random-hmm [labeled-tokens]
"create an hmm
more formally, a set of N+2 states:
S = {s_0,..., s_N, s_F}
and M observations:
V = {v_1,..., v_M}
where `q_t = s` means that a given sequence was in state `s` at time `t`, and
the parameters:
λ = {A, B}
where A is the state transition probability distribution:
a_ij = P(q_t+1 = s_j | q_t = s_i)
and B is the observation probability distribution for each state:
b_j(k) = P(v_k at t | q_t = s_j)
input:
- labeled-tokens: a lazy seq of token/label pairs. something like:
(('person' 'NOUN') ('run' 'VERB') ...)
returns: a structure something like:
{:tokens #{... set of tokens}
:label-set #{'foo' ... rest of labels}
:labels {
'foo' {'bar' 0.123 ... rest of token/probability paris}
... rest of labels}}"
(let [{:keys [tokens labels]} (tokens-and-labels labeled-tokens)]
{:tokens tokens
:label-set labels
:labels (into {} (map #(vector % (random-probs-for-tokens tokens)) labels))}))
def tokens_and_labels(tokens):
ts = set()
ls = set()
for t, l in tokens:
ts.add(t)
ls.add(l)
return ts, ls
def random_hmm(tokens, labels):
ls = dict((l, dict(map(lambda t: (t, random()), tokens))) for l in labels)
return {
'tokens': tokens,
'label-set': labels,
'labels': ls
}
if __name__ == '__main__':
# token_label_pairs = some generator of (token, label) tuples
random_hmm(*tokens_and_labels(token_label_pairs))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment