ericnormand/00 Anagrams.md

## 00 Anagrams.md

      
    Raw
  

              00 Anagrams.md
            
          
    anagrams
Two words are anagrams of each other if they have the same letters but in different orders.
This week's challenge is a two-parter.
Write a function to determine if two words are anagrams.
Given a dictionary of words (such as this one), find all of the anagrams for a target word.
Bonus points for efficiency.

  
## Bobby Towers.clj
(defn anagram? [word prospect]
  (and (not= word prospect)
       (= (sort word) (sort prospect))))

(defn anagrams-for [word prospect-list]
  (filter #(anagram? (str/lower-case word)
                     (str/lower-case %))
          prospect-list))

## Caio César.clj
(require '[clojure.java.io :refer [reader]])

;; (def url "http://wiki.puzzlers.org/pub/wordlists/unixdict.txt")
(def url "unixdict.txt")

(defn find-anagrams [word]
  (let [target (sort word)
        word-size (count word)
        words (reader url)]
    (filter (fn [line]
              (if (= (count line) word-size)
                (= target (sort line))
                false))
            (line-seq words))))


(println (time (find-anagrams "listen")))

## Denis Fuenzalida.clj
(ns unicode-anagrams.core
  (:gen-class))

(defn normalize-string
  "Normalize a string `s` before computing anagram information"
  [s]
  (-> s
      .toUpperCase
      (.replaceAll "\\s" "")))

;; I was curious about how hard it could be to match some variants of a letter
;; with the neutral version (eg. match a Ç with the letter C) and the following
;; function was good enough for all the *latin* char variants I could think of.

(defn letter-name
  "Return a vector of a size up to 4 representing the characters unicode name,
  causing latin letter variants to be equal, i.e.:
  (letter-name \"Ç\") ;; => [\"LATIN\" \"CAPITAL\" \"LETTER\" \"C\"]
  "
  [c]
  (let [fullname-parts (-> c str
                           (Character/codePointAt 0)
                           Character/getName
                           (clojure.string/split #"\s+"))]
    (vec (take 4 fullname-parts))))

(defn latin-index [s]
  (->> s normalize-string (map letter-name) frequencies))

(defn ascii-index [s]
  (->> s normalize-string frequencies))

(defn anagrams?
  "Checks if two given strings have the same frequencies of latin letter names"
  [s1 s2]
  (let [f1 (latin-index s1)
        f2 (latin-index s2)]
    (= f1 f2)))

(comment
  ;; A few tests...
  (mapv letter-name (normalize-string "Édúçâtìön"))
  (anagrams? "EDUCATION" "Édúçâtìön")
  (anagrams? "Acrid Avid Jam Shred" "Richard David James")
  (anagrams? "Wax The Nip" "Aphex Twin")
  )

(defonce words
  (->> (slurp "http://wiki.puzzlers.org/pub/wordlists/unixdict.txt")
       clojure.string/split-lines
       (remove clojure.string/blank?)))

(defn anagrams-in [xs]
  (let [indexer latin-index ;; or `ascii-index` for ascii-only, but faster
        groups  (group-by indexer xs)]
    (->> (vals groups)
         (remove #(= 1 (count %)))))) ;; remove groups of one word, those are not anagrams

;; (count (anagrams-in words)) ;; => 1303
;; (last (sort-by count (anagrams-in words))) ;; => ["angel" "angle" "galen" "glean" "lange"]

## Eric Normand.clj
(defn anagram? [w1 w2]
  (= (sort (clojure.string/lower-case w1))
     (sort (clojure.string/lower-case w2))))

(defn- ->index [word]
  (-> word
      clojure.string/lower-case
      frequencies))

(def dict (clojure.string/split-lines (slurp "http://wiki.puzzlers.org/pub/wordlists/unixdict.txt")))

(def dindex (group-by ->index dict))

(defn anagrams-of
  ([dict word]
   (filter #(anagram? word %) dict))
  ([word]
   (get dindex (->index word))))

## James Cash.clj
(ns anagrams.core
  (:gen-class)
  (:require
   [clojure.core.reducers :as r]
   [clojure.string :as string]
   [clojure.java.io :as io]))

(defn gen-primes
  ([] (gen-primes (iterate inc (bigint 2))))
  ([ps]
   (cons (first ps)
         (lazy-seq
           (gen-primes
             (remove #(= 0 (mod % (first ps)))
                     (rest ps)))))))

(def primes (gen-primes))

(def letters
  (into #{} (map char) (range (int \a) (inc (int \z)))))

(def char->prime
  (zipmap letters primes))

(def product (partial reduce * 1))

(defn word->number
  [s]
  (->> s
      (string/lower-case)
      seq
      (filter letters)
      (map char->prime)
      product))

(defn anagram?
  [s1 s2]
  (= (word->number s1)
     (word->number s2)))

(defn load-dict
  []
  (with-open [f (io/reader (io/resource "words.txt"))]
    (loop [line (.readLine f)
           words []]
      (if (nil? line)
        words
        (recur (.readLine f)
               (conj words [(word->number line) [line]]))))))

(defonce dict (load-dict))

(defn anagram-search
  [w]
  (let [needle (word->number w)
        anagramable? (fn [n] (and (<= n needle) (= 0 (mod needle n))))
        possible-words (filter (fn [[n _]] (anagramable? n)) dict)
        next-combos (fn [potentials]
                      (r/foldcat
                        (r/mapcat
                          (fn [[n words]]
                            (r/foldcat
                              (->> possible-words
                                  (r/map (fn [[m [w]]] [(* n m) (vec (sort (conj words w)))]))
                                  (r/filter (fn [[n*m _]] (anagramable? n*m))))))
                          potentials)))]
    ;; new approach: loop over current list of candidates, store any
    ;; that are exactly needle, discard any that are greater than
    ;; needle *or* don't factor it, then form new combinations of the
    ;; single values with the remaining values, until the remaining
    ;; values is empty
    (loop [candidates possible-words
           anagrams #{}]
      (prn "candidates" (count candidates))
      (if (empty? candidates)
        anagrams
        (let [{anas :ana possible :can}
              (->> candidates
                  (group-by (fn [[n words]]
                              (cond
                                (= n needle) :ana
                                (anagramable? n) :can
                                :else nil))))
              next-candidates (next-combos possible)]
          (recur next-candidates (into anagrams (map second anas))))))))

(defn -main
  [& args]
  (println "Anagrams of " (first args))
  (doseq [words (anagram-search (first args))]
    (println words)))

## Juraj Martinka.clj
(ns clojure-experiments.purely-functional.puzzles.0322-anagrams
  "Two parts:
  - Write a function to determine if two words are anagrams.
  - Given a dictionary of words (such as this one: http://wiki.puzzlers.org/pub/wordlists/unixdict.txt),
    find all of the anagrams for a target word.
  Bonus points for efficiency."
  (:require [clojure.java.io :as io]))

;;; Save the dictionary for later processing
(def dictionary-file (io/file "src/clojure_experiments/purely-functional/puzzles/0322-dictionary.txt"))
(defn- save-dictionary
  "Helper function to save dictionary for fast offline access."
  []
  (spit dictionary-file
        (slurp "http://wiki.puzzlers.org/pub/wordlists/unixdict.txt" )))
#_(save-dictionary)

(def my-dictionary (->> dictionary-file
                     slurp
                     (clojure.string/split-lines)))

#_(take 10 my-dictionary)

(defn anagrams? [w1 w2]
  (and (= (count w1) (count w2))
       (= (set w1) (set w2))))

(anagrams? "ahoj" "helo")
;; => false
(anagrams? "helo" "ehlo")
;; => true
(anagrams? "helo" "hello")
;; => false

(defn dictionary-anagrams [word dictionary]
  (reduce
   (fn [anagrams candidate]
     (if (anagrams? word candidate)
       (conj anagrams candidate)
       anagrams))
   []
   dictionary))

(comment
  (dictionary-anagrams "helo" my-dictionary)
  ;; => [hole]

  (dictionary-anagrams "car" my-dictionary)
  ;; => ["arc" "car" "rca"]

  (time (dictionary-anagrams "tire" my-dictionary))
  ;; => ["rite" "tier" "tire"]
  ;; "Elapsed time: 6.253211 msecs"

  ;; end
  )

## Lindsay Davies.clj
(ns anagrams.core
  (:require [clojure.string :as cstr]))

(defn ->lc
  "Slightly faster lower-case function, but only for ASCII text"
  [w]
  (let [sb (StringBuffer.)]
    (run! #(.append sb (char (bit-set (int %) 5))) w)
    (.toString sb)))

(defn word-hash [w] (sort w))

(def dict-location "resources/unixdict.txt")

(def lc
  ->lc               ; faster, but assumes ASCII
  ;#(.toLowerCase %) ; slower but more correct
  )

(defn read-dict
  "Read in the dictionary from path. Assumes one word per line."
  [dict-file-path]
  (->> (cstr/split (slurp dict-file-path) #"\n")
       (reduce #(assoc %1 (word-hash (lc %2)) (conj (get %1 (word-hash (lc %2)) []) (lc %2))) {})))


;---------------------------

(defn is-anagram?
  "Is word 1 an anagram of word 2?"
  [w1 w2]
  (let [w1' (lc w1)
        w2' (lc w2)]
    (and (= (word-hash w1') (word-hash w2'))
       (not= w1' w2'))))

(defn find-anagrams
  "Find anagrams of a word, or empty list if none in the current dictionary"
  ([w] (find-anagrams w (read-dict dict-location)))
  ([w dict]
   (let [w' (lc w) ]
     (filter #(not= w' %) (get dict (word-hash w'))))))

## Magnus Kvalevag.clj
(require '[clojure.string :as string])

(defn example-dict
  []
  (-> "http://wiki.puzzlers.org/pub/wordlists/unixdict.txt"
      (slurp)
      (string/split-lines)))


(defn anagram?
  "Same set of characters, but not equal."
  [s1 s2]
  (and (= (frequencies s1)
          (frequencies s2))
       (not= s1 s2)))


(defn anagrams1
  [s dict]
  (filter (partial anagram? s) dict))                       ; Can be sped up by memoizing `frequencies`


(defn anagrams2
  [s dict]
  (->> (get (group-by frequencies dict)                     ; Group by frequencies
            (frequencies s))
       (remove #{s})))


(defn- remove-letter
  [s letter]
  (let [[first-half last-half] (split-with (complement #{letter}) s)]
    (concat first-half (rest last-half))))

(defn- anagrams3*
  [[f & r] dict]
  (if f                                                     ; While there are more letters to check
    (->> dict
         (filter (fn [{:keys [rest]}] (some #{f} rest)))    ; Filter on words in dict containing first letter of search-string
         (map #(update % :rest remove-letter f))            ; Remove that letter from remaining words in dict
         (recur r))
    dict))

(defn anagrams3
  [s dict]
  (->> dict
       (filter #(= (count s) (count %)))                    ; Anagrams have the same length (not necessary, but speeds up the function quite a bit)
       (map (fn [word] {:word word :rest word}))            ; Preprocess dictionary
       (anagrams3* s)
       (map :word)                                          ; Get back original word
       (remove #{s})))                                      ; Remove the string being anagram'd

## Steve Miner.clj
(ns miner.ana
  (:require [clojure.java.io :as io]))

;;; Eric Normand's Clojure challenge: anagrams
;;; https://purelyfunctional.tv/?p=31286


(defn digest [w]
  (sort (seq w)))

(defn anagram? [a b]
  (and (not= a b)
       (= (digest a) (digest b))))

(defn anagram-search [word anagram-group]
  (let [group (get anagram-group (digest word))]
    (not-empty (remove #{word} group))))

;; assume text file with one word per line
(defn load-words [filename]
  (with-open [fi (io/reader filename)]
    (binding [*in* fi]
      (doall (take-while some? (repeatedly read-line))))))

(defn anagram-group [filename]
  (group-by digest (load-words filename)))


(def default-dict "http://wiki.puzzlers.org/pub/wordlists/unixdict.txt")

(def default-anagram-group (delay (anagram-group default-dict)))

(def local-dict "/usr/share/dict/words")

(def local-anagram-group (delay (anagram-group local-dict)))

;; `dict` can be either a file reference or a predigested anagram group (for better performance)
(defn anagrams
  ([word]
   (anagram-search word @default-anagram-group))
  ([word dict]
   (anagram-search word (if (map? dict) dict (anagram-group dict)))))


#_
(anagrams "crate")
;; ("caret" "carte" "cater" "trace")


#_
(apply max-key count (vals @local-anagram-group))
;; ["angor" "argon" "goran" "grano" "groan" "nagor" "orang" "organ" "rogan"]


#_
(pprint (sort-by first (filter #(>= (count %) 5) (vals @default-anagram-group))))
;; (["abel" "able" "bale" "bela" "elba"]
;;  ["alger" "glare" "lager" "large" "regal"]
;;  ["angel" "angle" "galen" "glean" "lange"]
;;  ["caret" "carte" "cater" "crate" "trace"]
;;  ["elan" "lane" "lean" "lena" "neal"]
;;  ["evil" "levi" "live" "veil" "vile"])

## Teodor Heggelund.clj
(ns scratch.anagrams
  (:require [clojure.string :as string]))

(defn anagrams? [& words]
  (reduce = (map sort words)))

(anagrams? "neo" "one")
;; => true
(anagrams? "jamesreeves" "weavejester")
;; => false

(defn load-dictionary [path]
  (-> path slurp string/split-lines))

(def unix-dict-path "/home/teodorlu/Downloads/unixdict.txt")
;; alternatively, (slurp "http://wiki.puzzlers.org/pub/wordlists/unixdict.txt")

(def unix-dict (load-dictionary unix-dict-path))

(->> unix-dict
     shuffle
     (take 5))
;; => ("inception" "mickey" "interest" "analogue" "discernible")


;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; TAKE 1: just filter through the words

(defn find-anagrams [dictionary word]
  (let [characters (sort word)]
    (filter (fn [dict-word]
              (= (sort dict-word) characters))
            dictionary)))

(def unix-anagrams (partial #'find-anagrams unix-dict))

(unix-anagrams "neo")
;; => ("one")

(time (unix-anagrams "clojure"))
;; => ()

(unix-anagrams "lisp")
;; => ("lisp" "slip")

(time (unix-anagrams "lisp"))
;; => ("lisp" "slip")
"Elapsed time: 0.123896 msecs"

(time (count (load-dictionary unix-dict-path)))
"Elapsed time: 20.51089 msecs"
"Elapsed time: 28.673577 msecs"
"Elapsed time: 25.327939 msecs"
"Elapsed time: 20.880587 msecs"
"Elapsed time: 24.631787 msecs"


;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; TAKE 2: Index on sorted words.

(defn load-anagram-index [path]
  (->> path
       slurp
       string/split-lines
       (group-by sort)))

(time (load-anagram-index unix-dict-path))
"Elapsed time: 81.89557 msecs"

(def unix-dict-index (load-anagram-index unix-dict-path))

(time (unix-dict-index (sort "lisp")))
;; => ["lisp" "slip"]
"Elapsed time: 0.209954 msecs"
"Elapsed time: 0.149922 msecs"
"Elapsed time: 0.135523 msecs"

(time (unix-dict-index (sort "neo")))
;; => ["one"]
"Elapsed time: 0.047577 msecs"
"Elapsed time: 0.191929 msecs"
"Elapsed time: 0.11953 msecs"

(time (dotimes [i 100]
        (unix-dict-index (sort "neo"))))
"Elapsed time: 1.173734 msecs"

(time (dotimes [i (* 10 1000)]
        (unix-dict-index (sort "neo"))))
"Elapsed time: 54.678017 msecs"

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; On efficiency
;;
;; There are two variables here:
;;
;;  - How many anagrams are you going to look up?
;;  - How large is your dictionary?
;;
;; In my case, I couldn't measure a speedup from using the index.
;;
;; Conclusion: plain filter is good for small problems. For larger problems, the
;; dictionary size and the number of words to be checked for whether they are
;; anagrams is relevant.

## Valentin Waeselynck.clj
(defn- to-bag-of-chars
  [w]
  (frequencies w))

(defn- word-signature
  "A fast hash on Strings which is invariant under re-ordering of the characters."
  ^long [^String s]
  (loop [i (int 0)
         sig (long 0)]
    (if (= i (.length s))
      sig
      (recur
        (unchecked-inc-int i)
        (+ sig (.codePointAt s i))))))

(defn anagrams?
  [w1 w2]
  (and
    (and  ;; for efficiency - will catch a lot of easy cases
      (= (count w1) (count w2))
      (= (word-signature w1) (word-signature w2)))
    (=
      (to-bag-of-chars w1)
      (to-bag-of-chars w2))))

(defn anagrams-of
  [dictionary-words target-word]
  (filter
    #(anagrams? target-word %)
    dictionary-words))

(comment
  (require '[clojure.string :as str])

  ;; Fetching our dictionary
  (def dict
    (str/split-lines
      (slurp "http://wiki.puzzlers.org/pub/wordlists/unixdict.txt")))

  ;; Let's have a look at a sample
  (->> dict shuffle (take 10))
  => ("yacht" "decade" "armchair" "audio" "gender" "carrel" "antiquated" "powers" "metro" "skull")

  ;; Example of searching for anagrams of one word
  (anagrams-of dict "art")
  => ("art" "rat" "tar")

  ;; What are the words with the most anagrams?
  (->> dict
    (group-by to-bag-of-chars)
    vals
    (sort-by count) reverse
    (take 10)
    vec)
  =>
  [["elan" "lane" "lean" "lena" "neal"]
   ["abel" "able" "bale" "bela" "elba"]
   ["alger" "glare" "lager" "large" "regal"]
   ["angel" "angle" "galen" "glean" "lange"]
   ["caret" "carte" "cater" "crate" "trace"]
   ["evil" "levi" "live" "veil" "vile"]
   ["beard" "bread" "debar" "debra"]
   ["esprit" "priest" "sprite" "stripe"]
   ["keats" "skate" "stake" "steak"]
   ["diet" "edit" "tide" "tied"]]

  *e)

## Venkatesh Pitta.clj
(ns anagram-challenge.core
  (:gen-class))

(defn anagram? [one two]
  (and (= (count one) (count two))
       (= (into #{} (clojure.string/lower-case one))
          (into #{} (clojure.string/lower-case two)))))

(defn fish-all-anagrams [hook pool]
  (filter #(anagram? hook %) pool))

(defn -main [& args]
  (when-not (empty? args)
    (with-open [rdr (clojure.java.io/reader "unixdict.txt")]
      (let [lines (reduce conj [] (line-seq rdr))]
        (println (fish-all-anagrams (first args) lines))))))
	(defn anagram? [word prospect]
	(and (not= word prospect)
	(= (sort word) (sort prospect))))

	(defn anagrams-for [word prospect-list]
	(filter #(anagram? (str/lower-case word)
	(str/lower-case %))
	prospect-list))
	(require '[clojure.java.io :refer [reader]])

	;; (def url "http://wiki.puzzlers.org/pub/wordlists/unixdict.txt")
	(def url "unixdict.txt")

	(defn find-anagrams [word]
	(let [target (sort word)
	word-size (count word)
	words (reader url)]
	(filter (fn [line]
	(if (= (count line) word-size)
	(= target (sort line))
	false))
	(line-seq words))))


	(println (time (find-anagrams "listen")))
	(ns unicode-anagrams.core
	(:gen-class))

	(defn normalize-string
	"Normalize a string `s` before computing anagram information"
	[s]
	(-> s
	.toUpperCase
	(.replaceAll "\\s" "")))

	;; I was curious about how hard it could be to match some variants of a letter
	;; with the neutral version (eg. match a Ç with the letter C) and the following
	;; function was good enough for all the latin char variants I could think of.

	(defn letter-name
	"Return a vector of a size up to 4 representing the characters unicode name,
	causing latin letter variants to be equal, i.e.:
	(letter-name \"Ç\") ;; => [\"LATIN\" \"CAPITAL\" \"LETTER\" \"C\"]
	"
	[c]
	(let [fullname-parts (-> c str
	(Character/codePointAt 0)
	Character/getName
	(clojure.string/split #"\s+"))]
	(vec (take 4 fullname-parts))))

	(defn latin-index [s]
	(->> s normalize-string (map letter-name) frequencies))

	(defn ascii-index [s]
	(->> s normalize-string frequencies))

	(defn anagrams?
	"Checks if two given strings have the same frequencies of latin letter names"
	[s1 s2]
	(let [f1 (latin-index s1)
	f2 (latin-index s2)]
	(= f1 f2)))

	(comment
	;; A few tests...
	(mapv letter-name (normalize-string "Édúçâtìön"))
	(anagrams? "EDUCATION" "Édúçâtìön")
	(anagrams? "Acrid Avid Jam Shred" "Richard David James")
	(anagrams? "Wax The Nip" "Aphex Twin")
	)

	(defonce words
	(->> (slurp "http://wiki.puzzlers.org/pub/wordlists/unixdict.txt")
	clojure.string/split-lines
	(remove clojure.string/blank?)))

	(defn anagrams-in [xs]
	(let [indexer latin-index ;; or `ascii-index` for ascii-only, but faster
	groups (group-by indexer xs)]
	(->> (vals groups)
	(remove #(= 1 (count %)))))) ;; remove groups of one word, those are not anagrams

	;; (count (anagrams-in words)) ;; => 1303
	;; (last (sort-by count (anagrams-in words))) ;; => ["angel" "angle" "galen" "glean" "lange"]
	(defn anagram? [w1 w2]
	(= (sort (clojure.string/lower-case w1))
	(sort (clojure.string/lower-case w2))))

	(defn- ->index [word]
	(-> word
	clojure.string/lower-case
	frequencies))

	(def dict (clojure.string/split-lines (slurp "http://wiki.puzzlers.org/pub/wordlists/unixdict.txt")))

	(def dindex (group-by ->index dict))

	(defn anagrams-of
	([dict word]
	(filter #(anagram? word %) dict))
	([word]
	(get dindex (->index word))))
	(ns anagrams.core
	(:gen-class)
	(:require
	[clojure.core.reducers :as r]
	[clojure.string :as string]
	[clojure.java.io :as io]))

	(defn gen-primes
	([] (gen-primes (iterate inc (bigint 2))))
	([ps]
	(cons (first ps)
	(lazy-seq
	(gen-primes
	(remove #(= 0 (mod % (first ps)))
	(rest ps)))))))

	(def primes (gen-primes))

	(def letters
	(into #{} (map char) (range (int \a) (inc (int \z)))))

	(def char->prime
	(zipmap letters primes))

	(def product (partial reduce * 1))

	(defn word->number
	[s]
	(->> s
	(string/lower-case)
	seq
	(filter letters)
	(map char->prime)
	product))

	(defn anagram?
	[s1 s2]
	(= (word->number s1)
	(word->number s2)))

	(defn load-dict
	[]
	(with-open [f (io/reader (io/resource "words.txt"))]
	(loop [line (.readLine f)
	words []]
	(if (nil? line)
	words
	(recur (.readLine f)
	(conj words [(word->number line) [line]]))))))

	(defonce dict (load-dict))

	(defn anagram-search
	[w]
	(let [needle (word->number w)
	anagramable? (fn [n] (and (<= n needle) (= 0 (mod needle n))))
	possible-words (filter (fn [[n _]] (anagramable? n)) dict)
	next-combos (fn [potentials]
	(r/foldcat
	(r/mapcat
	(fn [[n words]]
	(r/foldcat
	(->> possible-words
	(r/map (fn [[m [w]]] [(* n m) (vec (sort (conj words w)))]))
	(r/filter (fn [[nm _]] (anagramable? nm))))))
	potentials)))]
	;; new approach: loop over current list of candidates, store any
	;; that are exactly needle, discard any that are greater than
	;; needle or don't factor it, then form new combinations of the
	;; single values with the remaining values, until the remaining
	;; values is empty
	(loop [candidates possible-words
	anagrams #{}]
	(prn "candidates" (count candidates))
	(if (empty? candidates)
	anagrams
	(let [{anas :ana possible :can}
	(->> candidates
	(group-by (fn [[n words]]
	(cond
	(= n needle) :ana
	(anagramable? n) :can
	:else nil))))
	next-candidates (next-combos possible)]
	(recur next-candidates (into anagrams (map second anas))))))))

	(defn -main
	[& args]
	(println "Anagrams of " (first args))
	(doseq [words (anagram-search (first args))]
	(println words)))
	(ns clojure-experiments.purely-functional.puzzles.0322-anagrams
	"Two parts:
	- Write a function to determine if two words are anagrams.
	- Given a dictionary of words (such as this one: http://wiki.puzzlers.org/pub/wordlists/unixdict.txt),
	find all of the anagrams for a target word.
	Bonus points for efficiency."
	(:require [clojure.java.io :as io]))

	;;; Save the dictionary for later processing
	(def dictionary-file (io/file "src/clojure_experiments/purely-functional/puzzles/0322-dictionary.txt"))
	(defn- save-dictionary
	"Helper function to save dictionary for fast offline access."
	[]
	(spit dictionary-file
	(slurp "http://wiki.puzzlers.org/pub/wordlists/unixdict.txt" )))
	#_(save-dictionary)

	(def my-dictionary (->> dictionary-file
	slurp
	(clojure.string/split-lines)))

	#_(take 10 my-dictionary)

	(defn anagrams? [w1 w2]
	(and (= (count w1) (count w2))
	(= (set w1) (set w2))))

	(anagrams? "ahoj" "helo")
	;; => false
	(anagrams? "helo" "ehlo")
	;; => true
	(anagrams? "helo" "hello")
	;; => false

	(defn dictionary-anagrams [word dictionary]
	(reduce
	(fn [anagrams candidate]
	(if (anagrams? word candidate)
	(conj anagrams candidate)
	anagrams))
	[]
	dictionary))

	(comment
	(dictionary-anagrams "helo" my-dictionary)
	;; => [hole]

	(dictionary-anagrams "car" my-dictionary)
	;; => ["arc" "car" "rca"]

	(time (dictionary-anagrams "tire" my-dictionary))
	;; => ["rite" "tier" "tire"]
	;; "Elapsed time: 6.253211 msecs"

	;; end
	)
	(ns anagrams.core
	(:require [clojure.string :as cstr]))

	(defn ->lc
	"Slightly faster lower-case function, but only for ASCII text"
	[w]
	(let [sb (StringBuffer.)]
	(run! #(.append sb (char (bit-set (int %) 5))) w)
	(.toString sb)))

	(defn word-hash [w] (sort w))

	(def dict-location "resources/unixdict.txt")

	(def lc
	->lc ; faster, but assumes ASCII
	;#(.toLowerCase %) ; slower but more correct
	)

	(defn read-dict
	"Read in the dictionary from path. Assumes one word per line."
	[dict-file-path]
	(->> (cstr/split (slurp dict-file-path) #"\n")
	(reduce #(assoc %1 (word-hash (lc %2)) (conj (get %1 (word-hash (lc %2)) []) (lc %2))) {})))


	;---------------------------

	(defn is-anagram?
	"Is word 1 an anagram of word 2?"
	[w1 w2]
	(let [w1' (lc w1)
	w2' (lc w2)]
	(and (= (word-hash w1') (word-hash w2'))
	(not= w1' w2'))))

	(defn find-anagrams
	"Find anagrams of a word, or empty list if none in the current dictionary"
	([w] (find-anagrams w (read-dict dict-location)))
	([w dict]
	(let [w' (lc w) ]
	(filter #(not= w' %) (get dict (word-hash w'))))))
	(require '[clojure.string :as string])

	(defn example-dict
	[]
	(-> "http://wiki.puzzlers.org/pub/wordlists/unixdict.txt"
	(slurp)
	(string/split-lines)))


	(defn anagram?
	"Same set of characters, but not equal."
	[s1 s2]
	(and (= (frequencies s1)
	(frequencies s2))
	(not= s1 s2)))


	(defn anagrams1
	[s dict]
	(filter (partial anagram? s) dict)) ; Can be sped up by memoizing `frequencies`


	(defn anagrams2
	[s dict]
	(->> (get (group-by frequencies dict) ; Group by frequencies
	(frequencies s))
	(remove #{s})))


	(defn- remove-letter
	[s letter]
	(let [[first-half last-half] (split-with (complement #{letter}) s)]
	(concat first-half (rest last-half))))

	(defn- anagrams3*
	[[f & r] dict]
	(if f ; While there are more letters to check
	(->> dict
	(filter (fn [{:keys [rest]}] (some #{f} rest))) ; Filter on words in dict containing first letter of search-string
	(map #(update % :rest remove-letter f)) ; Remove that letter from remaining words in dict
	(recur r))
	dict))

	(defn anagrams3
	[s dict]
	(->> dict
	(filter #(= (count s) (count %))) ; Anagrams have the same length (not necessary, but speeds up the function quite a bit)
	(map (fn [word] {:word word :rest word})) ; Preprocess dictionary
	(anagrams3* s)
	(map :word) ; Get back original word
	(remove #{s}))) ; Remove the string being anagram'd
	(ns miner.ana
	(:require [clojure.java.io :as io]))

	;;; Eric Normand's Clojure challenge: anagrams
	;;; https://purelyfunctional.tv/?p=31286


	(defn digest [w]
	(sort (seq w)))

	(defn anagram? [a b]
	(and (not= a b)
	(= (digest a) (digest b))))

	(defn anagram-search [word anagram-group]
	(let [group (get anagram-group (digest word))]
	(not-empty (remove #{word} group))))

	;; assume text file with one word per line
	(defn load-words [filename]
	(with-open [fi (io/reader filename)]
	(binding [in fi]
	(doall (take-while some? (repeatedly read-line))))))

	(defn anagram-group [filename]
	(group-by digest (load-words filename)))


	(def default-dict "http://wiki.puzzlers.org/pub/wordlists/unixdict.txt")

	(def default-anagram-group (delay (anagram-group default-dict)))

	(def local-dict "/usr/share/dict/words")

	(def local-anagram-group (delay (anagram-group local-dict)))

	;; `dict` can be either a file reference or a predigested anagram group (for better performance)
	(defn anagrams
	([word]
	(anagram-search word @default-anagram-group))
	([word dict]
	(anagram-search word (if (map? dict) dict (anagram-group dict)))))


	#_
	(anagrams "crate")
	;; ("caret" "carte" "cater" "trace")


	#_
	(apply max-key count (vals @local-anagram-group))
	;; ["angor" "argon" "goran" "grano" "groan" "nagor" "orang" "organ" "rogan"]


	#_
	(pprint (sort-by first (filter #(>= (count %) 5) (vals @default-anagram-group))))
	;; (["abel" "able" "bale" "bela" "elba"]
	;; ["alger" "glare" "lager" "large" "regal"]
	;; ["angel" "angle" "galen" "glean" "lange"]
	;; ["caret" "carte" "cater" "crate" "trace"]
	;; ["elan" "lane" "lean" "lena" "neal"]
	;; ["evil" "levi" "live" "veil" "vile"])
	(ns scratch.anagrams
	(:require [clojure.string :as string]))

	(defn anagrams? [& words]
	(reduce = (map sort words)))

	(anagrams? "neo" "one")
	;; => true
	(anagrams? "jamesreeves" "weavejester")
	;; => false

	(defn load-dictionary [path]
	(-> path slurp string/split-lines))

	(def unix-dict-path "/home/teodorlu/Downloads/unixdict.txt")
	;; alternatively, (slurp "http://wiki.puzzlers.org/pub/wordlists/unixdict.txt")

	(def unix-dict (load-dictionary unix-dict-path))

	(->> unix-dict
	shuffle
	(take 5))
	;; => ("inception" "mickey" "interest" "analogue" "discernible")


	;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
	;; TAKE 1: just filter through the words

	(defn find-anagrams [dictionary word]
	(let [characters (sort word)]
	(filter (fn [dict-word]
	(= (sort dict-word) characters))
	dictionary)))

	(def unix-anagrams (partial #'find-anagrams unix-dict))

	(unix-anagrams "neo")
	;; => ("one")

	(time (unix-anagrams "clojure"))
	;; => ()

	(unix-anagrams "lisp")
	;; => ("lisp" "slip")

	(time (unix-anagrams "lisp"))
	;; => ("lisp" "slip")
	"Elapsed time: 0.123896 msecs"

	(time (count (load-dictionary unix-dict-path)))
	"Elapsed time: 20.51089 msecs"
	"Elapsed time: 28.673577 msecs"
	"Elapsed time: 25.327939 msecs"
	"Elapsed time: 20.880587 msecs"
	"Elapsed time: 24.631787 msecs"


	;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
	;; TAKE 2: Index on sorted words.

	(defn load-anagram-index [path]
	(->> path
	slurp
	string/split-lines
	(group-by sort)))

	(time (load-anagram-index unix-dict-path))
	"Elapsed time: 81.89557 msecs"

	(def unix-dict-index (load-anagram-index unix-dict-path))

	(time (unix-dict-index (sort "lisp")))
	;; => ["lisp" "slip"]
	"Elapsed time: 0.209954 msecs"
	"Elapsed time: 0.149922 msecs"
	"Elapsed time: 0.135523 msecs"

	(time (unix-dict-index (sort "neo")))
	;; => ["one"]
	"Elapsed time: 0.047577 msecs"
	"Elapsed time: 0.191929 msecs"
	"Elapsed time: 0.11953 msecs"

	(time (dotimes [i 100]
	(unix-dict-index (sort "neo"))))
	"Elapsed time: 1.173734 msecs"

	(time (dotimes [i (* 10 1000)]
	(unix-dict-index (sort "neo"))))
	"Elapsed time: 54.678017 msecs"

	;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
	;; On efficiency
	;;
	;; There are two variables here:
	;;
	;; - How many anagrams are you going to look up?
	;; - How large is your dictionary?
	;;
	;; In my case, I couldn't measure a speedup from using the index.
	;;
	;; Conclusion: plain filter is good for small problems. For larger problems, the
	;; dictionary size and the number of words to be checked for whether they are
	;; anagrams is relevant.