;; See: http://codereview.stackexchange.com/q/6682/9032 | |
(defn map-invert-preserve-dups [m] | |
(apply merge-with into | |
(for [[k v] m] | |
{v [k]}))) | |
(->> "http://www.weeklyscript.com/Pulp%20Fiction.txt" | |
(slurp) | |
(re-seq #"\w{5,}") | |
(frequencies) | |
(map-invert-preserve-dups) | |
(sort) | |
(reverse) | |
(take 25)) | |
([272 ("VINCENT")] [214 ("JULES")] [182 ("Butch")] [162 ("Vincent")] [147 ("BUTCH")] [94 ("Jules")] [92 ("FABIENNE")] [89 ("fuckin")] [78 ("Marsellus")] [77 ("gonna")] [58 ("LANCE")] [56 ("about")] [54 ("YOUNG")] [51 ("through")] [50 ("looks" "there")] [45 ("never")] [44 ("front")] [43 ("Jimmie" "little")] [42 ("their")] [41 ("right" "NIGHT" "takes")] [37 ("JIMMIE" "other")] [35 ("still")] [34 ("PUMPKIN")] [33 ("Fabienne")] [32 ("think" "watch" "table" "Maynard")]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment