Created
May 7, 2012 16:40
-
-
Save erochest/2628865 to your computer and use it in GitHub Desktop.
Versions of m
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(defn m | |
"Measures the number of consonant sequences between | |
the start of word and position j. If c is a consonant | |
sequence and v a vowel sequence, and <...> indicates | |
arbitrary presence, | |
<c><v> -> 0 | |
<c>vc<v> -> 1 | |
<c>vcvc<v> -> 2 | |
<c>vcvcvc<v> -> 3 | |
... | |
" | |
[stemmer] | |
(let [ | |
j (get-index stemmer) | |
count-v (fn [n i] | |
(cond (> i j) [:return n i] | |
(vowel? stemmer i) [:break n i] | |
:else (recur n (inc i)))) | |
count-c (fn [n i] | |
(cond (> i j) [:return n i] | |
(consonant? stemmer i) [:break n i] | |
:else (recur n (inc i)))) | |
count-cluster (fn [n i] | |
(let [[stage1 n1 i1] (count-c n i)] | |
(if (= stage1 :return) | |
n1 | |
(let [[stage2 n2 i2] (count-v (inc n1) (inc i1))] | |
(if (= stage2 :return) | |
n2 | |
(recur n2 (inc i2))))))) | |
[stage n i] (count-v 0 0) | |
] | |
(if (= stage :return) | |
n | |
(count-cluster n (inc i))))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(defn m | |
"Measures the number of consonant sequences between | |
the start of word and position j. If c is a consonant | |
sequence and v a vowel sequence, and <...> indicates | |
arbitrary presence, | |
<c><v> -> 0 | |
<c>vc<v> -> 1 | |
<c>vcvc<v> -> 2 | |
<c>vcvcvc<v> -> 3 | |
... | |
" | |
([stemmer] | |
(m stemmer 0 0)) | |
([stemmer num-c num-cs] | |
(if (not (seq (:word stemmer))) ; Is the word empty? Then we reached the beginning of the stemmer | |
(if (> num-c 1) ; THEN1: More than 2 consonants in current counting? | |
(inc num-cs) ; THEN2: we have one more consonant sequence, and we return the number of sequences found plus 1 | |
num-cs) ; ELSE2: Return the number of sequences found | |
(if (consonant? stemmer) ; ELSE1: Is there a consonant at the current index? | |
(recur (pop-word stemmer) (inc num-c) num-cs) ; THEN3: increase the number of currently consecutice consonants, recur | |
(if (> num-c 1) ; ELSE3: If not, check if we found more than 1 consecutive consonants | |
(recur (pop-word stemmer) 0 (inc num-cs)) ; THEN4: If yes, we found one more sequence | |
(recur (pop-word stemmer) 0 num-cs)))))) ; ELSE4: If not, then we found only one, and start anew |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(defn m | |
"Measures the number of consonant sequences between the start of a word an | |
position j. If c is a consonant sequence and v a vowel sequence, and <...> | |
indicates arbitrary presence, | |
<c><v> -> 0 | |
<c>vc<v> -> 1 | |
<c>vcvc<v> -> 2 | |
<c>vcvcvc<v> -> 3 | |
... | |
" | |
[stemmer] | |
(let [consonant-group? (fn [ws] (consonant? (first ws)))] | |
(->> stemmer | |
(iterate pop-word) ; a sequence of all the parts of the word, from the whole word to the first letter. | |
(take-while (fn [w] (seq (:word w)))) ; stop when done with the word | |
(partition-by consonant?) ; break it into vowel/consonant groups | |
(reverse) ; reverse so the next step works | |
(drop-while consonant-group?) ; remove the first (was at the end of the sequence) constant group | |
(filter consonant-group?) ; filter out vowel groups | |
count))) ; finally, count the remaining consonant groups |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Cool