ericnormand/00 phrasal anagrams.md

## 00 phrasal anagrams.md

      
    Raw
  

              00 phrasal anagrams.md
            
          
    phrasal anagrams
Detecting is one word is an anagram of another is easy. This time, we’re going to do multi-word anagrams. Here’s the problem:
Two phrases are anagrams if they contain the same letters rearranged, ignoring spaces. You should also ignore case.
For instance:

“School master” is an anagram of “The classroom”
“Astronomer” is an anagram of “Moon starer”
“The eyes” is an anagram of “They see”

So, two parts:

Write a function to determine if two phrases are anagrams.
Given a dictionary of words (such as this one, different from last week), take a phrase and generate anagrams of that phrase that contain one or more words from the dictionary.

For instance, if I give you the phrase “funeral”, you should be able to generate, among others, “real fun”.
Remember, the generated anagrams have to use all the letters.

  
## Eric Normand.clj
(def dict
  (clojure.string/split-lines (slurp "https://gist.githubusercontent.com/ericnormand/8c0ccc095edaa64eb8e00f861f70b02c/raw/wordlist.txt")))

(defn to-letters [phrase]
  (-> phrase
      (clojure.string/replace #"\s" "")
      (clojure.string/lower-case)
      sort))

(defn pindex [phrase]
  (frequencies (to-letters phrase)))

(def dindex (group-by pindex dict))

(defn anagram? [p1 p2]
  (= (to-letters p1)
     (to-letters p2)))

(defn subset? [i1 i2]
  (every? (fn [[l n]]
            (<= n (get i2 l 0)))
          i1))

(defn minus [i1 i2]
  (merge-with - i1 i2))

(defn iempty? [pi]
  (every? #(zero? (val %)) pi))

(defn subsets-of [dict pi]
  (filter #(subset? (key %) pi) dict))

(defn anagrams-of*
  [pi]
  (let [f (memoize (fn [f pi]
                     (if (iempty? pi)
                       [[]]
                       (for [[px words] (subsets-of dindex pi)
                             :let [inext (minus pi px)
                                   remains (f f inext)]
                             word words
                             remain remains]
                         (into [word] remain)))))]
    (map #(clojure.string/join " " %) (f f pi))))

(defn anagrams-of [phrase]
  (anagrams-of* (pindex phrase)))

## Ilya Bernshteyn.clj
(ns clj-challenge.harder-anagrams
  (:require [clojure.set :as clj-set]
            [clojure.string :as cs]))

;;
;; Building blocks
;;

(def dictionary
  (-> (slurp "https://gist.githubusercontent.com/ericnormand/8c0ccc095edaa64eb8e00f861f70b02c/raw/01c33b3438bbab6bdd7e8dade55c1f5997ad8027/wordlist.txt")
      (cs/split-lines)
      (set)))

(def alphabet ["a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q" "r" "s" "t" "u" "v" "w" "x" "y" "z"])

(defn prime?
  "The goal of this problem isn't to find how to calculate prime numbers, so let's
  use Java's implementation."
  [x]
  (let [certainty 5] (.isProbablePrime (BigInteger/valueOf x) certainty)))

(def primes
  "Lazy sequence of prime numbers"
  (filter prime? (range)))

(def alphabet-primes
  "A map of each letter of the English alphabet mapped to a unique prime number"
  (into {} (map #(vector %1 %2) alphabet primes)))

;;
;; Utils
;;

(defn clean
  "Lower cases and removes all the spaces from a phrase"
  [phrase]
  (-> phrase
      (str)
      (cs/lower-case)
      (cs/replace #" |\'" "")))

(defn phrase->id
  "Converts a phrase to the product of the prime number values of each letter."
  [phrase]
  (->> (clean phrase)
       (map #(get alphabet-primes (cs/lower-case (str %))))
       (reduce * 1.0)))

;;
;; Let's find some anagrams
;;

(defn anagrams?
  "Checks if two phrases are anagrams"
  [phrase-one phrase-two]
  (let [p-one (clean phrase-one)
        p-two (clean phrase-two)]
    (= (phrase->id p-one) (phrase->id p-two))))

(defn make-anagrams
  "Takes a phrase and returns a vector of sets of anagrams."
  [phrase]
  (let [phrase-id (phrase->id phrase)]
    (loop [dict dictionary
           result []]
      (if (empty? dict)
        result
        (let [maybe-anagrams (reduce (fn [m candidate]
                                       (if (= 0.0 (mod (:phrase-id m) (phrase->id candidate)))
                                         (-> m
                                             (update :anagrams (fn [coll] (conj coll candidate)))
                                             (assoc :phrase-id (/ (:phrase-id m) (phrase->id candidate))))
                                         m))
                                     {:phrase-id phrase-id
                                      :anagrams #{}}
                                     dict)]
          (cond
            ;; We found a set of anagrams
            (= 1.0 (:phrase-id maybe-anagrams)) (recur (clj-set/difference dict (:anagrams maybe-anagrams))
                                                       (conj result (:anagrams maybe-anagrams)))
            ;; No anagrams found, we're done
            (= (:phrase-id maybe-anagrams) phrase-id) (recur [] result)
            ;; Found some candidate anagrams, but not enough, this is where things
            ;; don't go quite right and we remove some candidates that could group
            ;; with others to form an anagram
            (> (:phrase-id maybe-anagrams) 1.0) (recur (clj-set/difference dict (:anagrams maybe-anagrams))
                                                       result)))))))

;;
;; A few REPL expressions to try things out
;;
(comment
  (require 'clj-challenge.harder-anagrams :reload)
  (anagrams? "School master" "The classroom")
  (anagrams? "Astronomer" "Moon starer")
  (anagrams? "The eyes" "They see")
  (anagrams? "These are" "not anagrams")
  (make-anagrams "kcor")
  (make-anagrams "rock party")
  (prime? 13)
  (take 26 primes)
  alphabet-primes
  (phrase->id "rock")
  (phrase->id "somebody move")
  dictionary)

## Jos van Bakel.clj
(ns scratchpad.pftv
  (:require [clojure.string :as str]
            [clojure.test :refer :all]))

(defn letters [s]
  (->> s
     str/lower-case
     seq
     (remove #{\space})
     sort))

(defn anagram? [a b]
  (= (letters a) (letters b)))

(deftest test-anagram?
  (is (anagram? "School master" "The Classroom"))
  (is (anagram? "Astronomer" "Moon starer"))
  (is (anagram? "The eyes" "They see")))

(def dictionary-url "https://gist.githubusercontent.com/ericnormand/8c0ccc095edaa64eb8e00f861f70b02c/raw/01c33b3438bbab6bdd7e8dade55c1f5997ad8027/wordlist.txt")
(def dictionary (str/split-lines (slurp dictionary-url)))

(defn map-values [f m]
  (into {} (for [[k v] m] [k (f v)])))

(defn letter-freq [s]
  (->> s
     str/lower-case
     seq
     (remove #{\space})
     (group-by identity)
     (map-values count)))

(deftest test-anagrams-by-letter-freq
  (is (= (letter-freq "School master") (letter-freq "The Classroom")))
  (is (= (letter-freq "Astronomer") (letter-freq "Moon starer")))
  (is (= (letter-freq "The eyes") (letter-freq "They see"))))

(defn word-in-alphabet? [alphabet word]
  (reduce
   (fn [alphabet letter]
     (if (pos? (or (alphabet letter) 0))
       (update alphabet letter dec)
       (reduced nil)))
   alphabet
   (seq word)))

(defn empty-alphabet? [alphabet]
  (every? zero? (vals alphabet)))

(deftest test-word-in-alphabet?
  (is (empty-alphabet?
       (word-in-alphabet? {\r 1 \e 1 \a 1 \l 1 \f 1 \u 1 \n 1}
                          "funeral"))))

;; inspired by https://stackoverflow.com/a/26076145/248948
(defn generate-anagram-words [alphabet]
  (lazy-seq
   (apply concat
          (map (fn [word]
                 (if-let [new-alphabet (word-in-alphabet? alphabet word)]
                   (if (empty-alphabet? new-alphabet)
                     [[word]]
                     (map #(cons word %) (generate-anagram-words new-alphabet)))))
               dictionary))))

(defn generate-anagram-phrases [phrase]
  (map #(str/join \space %)
       (generate-anagram-words (letter-freq phrase))))

(defn random-words [num]
  (let [n (count dictionary)
        p (/ 1.0 n)]
    (shuffle (take num
                   (random-sample p (cycle dictionary))))))

(defn random-phrase [num-words]
  (str/join \space (random-words num-words)))

(deftest test-generate-anagram-phrases
  (let [input "hello world"
        anagrams (generate-anagram-phrases input)]
    (is (pos? (count anagrams)))
    (is (every? #(anagram? input %) anagrams))))

(run-tests)

## Keith Harper.clj
(defn anagrams? [words1 words2]
  (let [xf (comp sort
                 (partial re-seq #"[^\s]")
                 clojure.string/lower-case
                 str)]
    (= (xf words1)
       (xf words2))))

(filter (partial anagrams? "real fun")
        dictionary)
;;=> ("funeral")

(filter (partial anagrams? "stand under")
        dictionary)
;;=> ("understand")

## Magnus Kvalevag.clj
;; Create a trie of all phrases that are anagrams of the given phrase.
;; The paths from root of trie to each leaf is an anagram of the given phrase.

(require '[clojure.string :as string]
         '[clojure.zip :as zip])


(defn subset-word?
  "Is word1 a subset of word2? I.e.: Does word2 contain all the characters in word1?"
  [word1 word2]
  (let [word1-frequencies (frequencies word1)
        word2-frequencies (frequencies word2)]
    (every?
      (fn [[character frequency]]
        ; word2 has at least as many of the same character as word1
        (<= frequency (get word2-frequencies character 0)))
      word1-frequencies)))


(defn remove-letter
  "Remove the first occurrence of the letter from the word."
  [word letter]
  (let [[first-half last-half] (split-with (complement #{letter}) word)]
    (concat first-half (rest last-half))))


(defn remove-letters
  "Remove the letters of word2 from word1."
  [word1 word2]
  (->> (reduce remove-letter word1 word2)
       (apply str)))


(defrecord AnagramTrie [word children])

(defn anagram-trie
  "Create a trie where every path from root to any leaf is made from strings that when concatenated
   together creates a substring of word (has no letters not in original word)."
  ([word dict]
   (anagram-trie word "" dict))
  ([word subset-word dict]
   (letfn [(child-trie [subset-word]
             (anagram-trie
               (remove-letters word subset-word)            ; The recursive call only looks at the remaining characters of the word.
               subset-word
               (remove #{subset-word} dict)))]              ; Remove this word from the dictionary so that it can't be reused.
     (->> dict
          (filter #(<= (count %) (count word)))             ; Only words that are the same size or smaller
          (filter #(subset-word? % word))                   ; Only words that contains a subset of the characters
          (map child-trie)                                  ; Recursive call for each subset word
          (->AnagramTrie subset-word)))))


(defn AnagramTrie->Zipper
  [anagram-trie]
  (zip/zipper
    (partial instance? AnagramTrie)
    :children
    (fn [{:keys [word]} children]
      (->AnagramTrie word children))
    anagram-trie))


; From http://josf.info/blog/2014/04/14/seqs-of-clojure-zippers/#recursion-smell
(defn leafnodes
  "Return all leaf nodes in loc. "
  [loc]
  (filter (complement zip/branch?)                          ;filter only non-branch nodes
          (take-while (complement zip/end?)                 ;take until the :end
                      (iterate zip/next loc))))


(defn anagram-trie-paths
  [anagram-trie]
  (->> (leafnodes
         (AnagramTrie->Zipper anagram-trie))
       (map #(->> (zip/path %)
                  (map :word)
                  (rest)                                    ; The first one is always empty string because of arity-2 of anagram-trie
                  (into #{})))))


(defn example-dict
  []
  (-> "https://gist.githubusercontent.com/ericnormand/8c0ccc095edaa64eb8e00f861f70b02c/raw/01c33b3438bbab6bdd7e8dade55c1f5997ad8027/wordlist.txt"
      (slurp)
      (string/split-lines)))

(defn anagrams?
  [phrase1 phrase2]
  (= (frequencies (apply str phrase1))
     (frequencies (apply str phrase2))))

(defn anagrams
  [phrase dict]
  (let [word (-> (apply str phrase)                         ; We only care about the characters in the phrase
                 (string/lower-case)
                 (string/replace " " ""))
        dict (map #(-> (string/lower-case %)
                       (string/replace " " ""))
                  dict)]
    (->> (anagram-trie word dict)                           ; Create a trie of all phrases that are potential anagrams of the given phrase.
         (anagram-trie-paths)                               ; Take all the paths from root of trie to each leaf.
         (filter (partial anagrams? phrase))                ; Remove phrases that are not an actual anagram of the original word.
         (remove #(= (set phrase) (set %)))                 ; Remove the exact same phrase.
         (into #{}))))                                      ; Remove duplicates by putting them all into a set.

;; (anagrams "astronomer" (example-dict))

## Steve Miner.clj
(ns miner.ana2
  (:require [clojure.java.io :as io]
            [clojure.math.combinatorics :as mc]
            [clojure.string :as str]))

;; https://purelyfunctional.tv/issues/purelyfunctional-tv-newsletter-333-tool-rebel-readline/
;; Harder anagrams for phrases

;; My solution uses frequency maps of the characters of the dictionary words and compares
;; them to the frequency map of the source phrase.  As a first pass, I collect the words
;; that could individually match part of the phrase.  Then I test the longest words first
;; and search for a combination that works.  As a word matches, I save the whole remaining
;; word list on the "ana" stack.  If a sequence fails, I backtrack and start again with rest
;; of that word-list segment.  If a sequence succeeds, the anagram is taken from the first
;; of each word-list on the stack.  When pretty-printing the final results, I expand all the
;; permutations of the matching words.

;; Some executive decisions:
;;  1.  Don't allow anagram phrases to contain any words from the original phrase.
;;  2.  Skip dictionary words of fewer than three characters.
;;  3.  Add a few words to the source dictionary to make my test examples work.

(def eric-dict
  "https://gist.githubusercontent.com/ericnormand/8c0ccc095edaa64eb8e00f861f70b02c/raw/wordlist.txt")

;; A few extra words to make my test examples work
(def extra-words ["astronomer" "cinder" "fun" "master" "mint" "miser" "Roman" "severe"])

;; modified by SEM to add the extra words for my test examples
#_
(def sem-dict "resources/sem-wordlist.txt")


(defn word-digest [word]
  (-> word
      str/lower-case
      frequencies))

(defn phrase-digest [phrase]
  (-> phrase
      str/lower-case
      (str/replace #"[^a-z]" "")
      frequencies))

(defn phrase-words [phrase]
  (-> phrase
      str/lower-case
      (str/replace #"[^a-z]" " ")
      str/trim
      (str/split #" +")))

(defn ana-phrase? [phrase1 phrase2]
  (and (= (phrase-digest phrase1) (phrase-digest phrase2))
       (not= (sort (phrase-words phrase1)) (sort (phrase-words phrase2)))))

;; longest words first, = length by alphabetical order
(defn compare-word-length-alpha [^String a ^String b]
  (cond (and (nil? a) (nil? b)) 0
        (nil? a) -1
        (nil? b) 1
        (> (.length a) (.length b)) -1
        (< (.length a) (.length b)) 1
        :else (compare a b)))

;; assume text file with one word per line
;; Note: the file could contain capitalized and lowercase versions of the same spelling so we
;; need to dedupe after lower-casing.
(defn load-words [filename]
  (into []
        (comp (map str/lower-case) (dedupe))
        (with-open [rdr (io/reader filename)] (doall (line-seq rdr)))))

;; by default consider only words of 3 or more characters
(defn load-freqs
  ([filename] (load-freqs 3 [] filename))
  ([min extras filename]
   (reduce (fn [m w] (assoc m w (word-digest w)))
           {}
           (concat extras
                   (remove #(< (count %) min) (load-words filename))))))


;; subtract character counts from frequency map
;; dissoc character when count would be zero
;; returns nil for failure

(defn subtract-freq [working freq]
  (reduce-kv (fn [res ch cnt]
               (let [old (get res ch)]
                 (cond (nil? old) (reduced nil)
                       (= cnt old) (dissoc res ch)
                       (< cnt old) (update res ch - cnt)
                       :else (reduced nil))))
             working
             freq))

(defn add-freq [working freq]
  (merge-with + working freq))

(defn pprint-results [results]
  (when (seq results)
    (clojure.pprint/pprint (map #(str/join " " %) (mapcat mc/permutations results)))))


;; could be faster if you kept a letter count total to prune more words

;; ana is a vector of word-lists. Each word-list has a first which is the accepted word and
;; rest which has yet to be searched.


(defn search-freqs [phrase dict-freqs]
  (let [pdig (phrase-digest phrase)
        freqs (reduce dissoc dict-freqs (phrase-words phrase))
        xwords (sort compare-word-length-alpha
                     (filter #(subtract-freq pdig (get freqs %)) (keys freqs)))]
    (loop [ana [] remaining pdig ws xwords results []]
      (if (empty? remaining)
        ;; found an anagram, backtrack for more
        (recur (pop ana)
               (add-freq remaining (get freqs (first (peek ana))))
               (rest (peek ana))
               (conj results (map first ana)))
        (if (empty? ws)
          (if (empty? ana)
            ;; finished
            results
            ;; backtrack
            (recur (pop ana)
                   (add-freq remaining (get freqs (first (peek ana))))
                   (rest (peek ana))
                   results))
            (if-let [rem1 (subtract-freq remaining (get freqs (first ws)))]
              (recur (conj ana ws) rem1 (rest ws) results)
              (recur ana remaining (rest ws) results)))))))

(def default-freqs (load-freqs 3 extra-words eric-dict))

(defn anagram-lists
  ([phrase] (anagram-lists phrase default-freqs))
  ([phrase dict]
   (search-freqs phrase (if (map? dict) dict (load-freqs dict)))))

(defn pprint-anagram-phrases
  ([phrase] (pprint-anagram-phrases phrase default-freqs))
  ([phrase dict]  (pprint-results (anagram-lists phrase dict))))


(defn smoke-test-phrases []
  (assert (ana-phrase? "the classroom" "school master"))
  (assert (not (ana-phrase? "master school" "school master")))
  (assert (not (ana-phrase? "school master" "School  Master")))
  (assert (ana-phrase? "Astronomer" "Moon starer"))
  (assert (ana-phrase? "The Eyes" "They see"))
  (assert (ana-phrase? "Steve Miner" "event miser"))
  (assert (ana-phrase? "Steve Miner" "severe mint"))
  (assert (ana-phrase? "Eric Normand" "Roman cinder"))
  true)


(defn my-phrase-test []
  (let [results (set (anagram-lists "Steve Miner"))]
    (assert (not (contains? results ["even" "steven"])))
    (assert (contains? results '("event" "miser")))
    (assert (contains? results '("severe" "mint"))))
  true)

## wordlist.txt
a
ability
able
about
above
accept
according
account
across
act
action
activity
actually
add
address
administration
admit
adult
affect
after
again
against
age
agency
agent
ago
agree
agreement
ahead
air
all
allow
almost
alone
along
already
also
although
always
American
among
amount
analysis
and
animal
another
answer
any
anyone
anything
appear
apply
approach
area
argue
arm
around
arrive
art
article
artist
as
ask
assume
at
attack
attention
attorney
audience
author
authority
available
avoid
away
baby
back
bad
bag
ball
bank
bar
base
be
beat
beautiful
because
become
bed
before
begin
behavior
behind
believe
benefit
best
better
between
beyond
big
bill
billion
bit
black
blood
blue
board
body
book
born
both
box
boy
break
bring
brother
budget
build
building
business
but
buy
by
call
camera
campaign
can
cancer
candidate
capital
car
card
care
career
carry
case
catch
cause
cell
center
central
century
certain
certainly
chair
challenge
chance
change
character
charge
check
child
choice
choose
church
citizen
city
civil
claim
class
clear
clearly
close
coach
cold
collection
college
color
come
commercial
common
community
company
compare
computer
concern
condition
conference
Congress
consider
consumer
contain
continue
control
cost
could
country
couple
course
court
cover
create
crime
cultural
culture
cup
current
customer
cut
dark
data
daughter
day
dead
deal
death
debate
decade
decide
decision
deep
defense
degree
Democrat
democratic
describe
design
despite
detail
determine
develop
development
die
difference
different
difficult
dinner
direction
director
discover
discuss
discussion
disease
do
doctor
dog
door
down
draw
dream
drive
drop
drug
during
each
early
east
easy
eat
economic
economy
edge
education
effect
effort
eight
either
election
else
employee
end
energy
enjoy
enough
enter
entire
environment
environmental
especially
establish
even
evening
event
ever
every
everybody
everyone
everything
evidence
exactly
example
executive
exist
expect
experience
expert
explain
eye
face
fact
factor
fail
fall
family
far
fast
father
fear
federal
feel
feeling
few
field
fight
figure
fill
film
final
finally
financial
find
fine
finger
finish
fire
firm
first
fish
five
floor
fly
focus
follow
food
foot
for
force
foreign
forget
form
former
forward
four
free
friend
from
front
full
fund
funeral
future
game
garden
gas
general
generation
get
girl
give
glass
go
goal
good
government
great
green
ground
group
grow
growth
guess
gun
guy
hair
half
hand
hang
happen
happy
hard
have
he
head
health
hear
heart
heat
heavy
help
her
here
herself
high
him
himself
his
history
hit
hold
home
hope
hospital
hot
hotel
hour
house
how
however
huge
human
hundred
husband
I
idea
identify
if
image
imagine
impact
important
improve
in
include
including
increase
indeed
indicate
individual
industry
information
inside
instead
institution
interest
interesting
international
interview
into
investment
involve
issue
it
item
its
itself
job
join
just
keep
key
kid
kill
kind
kitchen
know
knowledge
land
language
large
last
late
later
laugh
law
lawyer
lay
lead
leader
learn
least
leave
left
leg
legal
less
let
letter
level
lie
life
light
like
likely
line
list
listen
little
live
local
long
look
lose
loss
lot
love
low
machine
magazine
main
maintain
major
majority
make
man
manage
management
manager
many
market
marriage
material
matter
may
maybe
me
mean
measure
media
medical
meet
meeting
member
memory
mention
message
method
middle
might
military
million
mind
minute
miss
mission
model
modern
moment
money
month
moon
more
morning
most
mother
mouth
move
movement
movie
Mr
Mrs
much
music
must
my
myself
name
nation
national
natural
nature
near
nearly
necessary
need
network
never
new
news
newspaper
next
nice
night
no
none
nor
north
not
note
nothing
notice
now
n't
number
occur
of
off
offer
office
officer
official
often
oh
oil
ok
old
on
once
one
only
onto
open
operation
opportunity
option
or
order
organization
other
others
our
out
outside
over
own
owner
page
pain
painting
paper
parent
part
participant
particular
particularly
partner
party
pass
past
patient
pattern
pay
peace
people
per
perform
performance
perhaps
period
person
personal
phone
physical
pick
picture
piece
place
plan
plant
play
player
PM
point
police
policy
political
politics
poor
popular
population
position
positive
possible
power
practice
prepare
present
president
pressure
pretty
prevent
price
private
probably
problem
process
produce
product
production
professional
professor
program
project
property
protect
prove
provide
public
pull
purpose
push
put
quality
question
quickly
quite
race
radio
raise
range
rate
rather
reach
read
ready
real
reality
realize
really
reason
receive
recent
recently
recognize
record
red
reduce
reflect
region
relate
relationship
religious
remain
remember
remove
report
represent
Republican
require
research
resource
respond
response
responsibility
rest
result
return
reveal
rich
right
rise
risk
road
rock
role
room
rule
run
safe
same
save
say
scene
school
science
scientist
score
sea
season
seat
second
section
security
see
seek
seem
sell
send
senior
sense
series
serious
serve
service
set
seven
several
sex
sexual
shake
share
she
shoot
short
shot
should
shoulder
show
side
sign
significant
similar
simple
simply
since
sing
single
sister
sit
site
situation
six
size
skill
skin
small
smile
so
social
society
soldier
some
somebody
someone
something
sometimes
son
song
soon
sort
sound
source
south
southern
space
speak
special
specific
speech
spend
sport
spring
staff
stage
stand
standard
star
stare
starer
start
state
statement
station
stay
step
still
stock
stop
store
story
strategy
street
strong
structure
student
study
stuff
style
subject
success
successful
such
suddenly
suffer
suggest
summer
support
sure
surface
system
table
take
talk
task
tax
teach
teacher
team
technology
television
tell
ten
tend
term
test
than
thank
that
the
their
them
themselves
then
theory
there
these
they
thing
think
third
this
those
though
thought
thousand
threat
three
through
throughout
throw
thus
time
to
today
together
tonight
too
top
total
tough
toward
town
trade
traditional
training
travel
treat
treatment
tree
trial
trip
trouble
true
truth
try
turn
TV
two
type
under
understand
unit
until
up
upon
us
use
usually
value
various
very
victim
view
violence
visit
voice
vote
wait
walk
wall
want
war
watch
water
way
we
weapon
wear
week
weight
well
west
western
what
whatever
when
where
whether
which
while
white
who
whole
whom
whose
why
wide
wife
will
win
wind
window
wish
with
within
without
woman
wonder
word
work
worker
world
worry
would
write
writer
wrong
yard
yeah
year
yes
yet
you
young
your
yourself
	(def dict
	(clojure.string/split-lines (slurp "https://gist.githubusercontent.com/ericnormand/8c0ccc095edaa64eb8e00f861f70b02c/raw/wordlist.txt")))

	(defn to-letters [phrase]
	(-> phrase
	(clojure.string/replace #"\s" "")
	(clojure.string/lower-case)
	sort))

	(defn pindex [phrase]
	(frequencies (to-letters phrase)))

	(def dindex (group-by pindex dict))

	(defn anagram? [p1 p2]
	(= (to-letters p1)
	(to-letters p2)))

	(defn subset? [i1 i2]
	(every? (fn [[l n]]
	(<= n (get i2 l 0)))
	i1))

	(defn minus [i1 i2]
	(merge-with - i1 i2))

	(defn iempty? [pi]
	(every? #(zero? (val %)) pi))

	(defn subsets-of [dict pi]
	(filter #(subset? (key %) pi) dict))

	(defn anagrams-of*
	[pi]
	(let [f (memoize (fn [f pi]
	(if (iempty? pi)
	[[]]
	(for [[px words] (subsets-of dindex pi)
	:let [inext (minus pi px)
	remains (f f inext)]
	word words
	remain remains]
	(into [word] remain)))))]
	(map #(clojure.string/join " " %) (f f pi))))

	(defn anagrams-of [phrase]
	(anagrams-of* (pindex phrase)))
	(ns clj-challenge.harder-anagrams
	(:require [clojure.set :as clj-set]
	[clojure.string :as cs]))

	;;
	;; Building blocks
	;;

	(def dictionary
	(-> (slurp "https://gist.githubusercontent.com/ericnormand/8c0ccc095edaa64eb8e00f861f70b02c/raw/01c33b3438bbab6bdd7e8dade55c1f5997ad8027/wordlist.txt")
	(cs/split-lines)
	(set)))

	(def alphabet ["a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q" "r" "s" "t" "u" "v" "w" "x" "y" "z"])

	(defn prime?
	"The goal of this problem isn't to find how to calculate prime numbers, so let's
	use Java's implementation."
	[x]
	(let [certainty 5] (.isProbablePrime (BigInteger/valueOf x) certainty)))

	(def primes
	"Lazy sequence of prime numbers"
	(filter prime? (range)))

	(def alphabet-primes
	"A map of each letter of the English alphabet mapped to a unique prime number"
	(into {} (map #(vector %1 %2) alphabet primes)))

	;;
	;; Utils
	;;

	(defn clean
	"Lower cases and removes all the spaces from a phrase"
	[phrase]
	(-> phrase
	(str)
	(cs/lower-case)
	(cs/replace #" \|\'" "")))

	(defn phrase->id
	"Converts a phrase to the product of the prime number values of each letter."
	[phrase]
	(->> (clean phrase)
	(map #(get alphabet-primes (cs/lower-case (str %))))
	(reduce * 1.0)))

	;;
	;; Let's find some anagrams
	;;

	(defn anagrams?
	"Checks if two phrases are anagrams"
	[phrase-one phrase-two]
	(let [p-one (clean phrase-one)
	p-two (clean phrase-two)]
	(= (phrase->id p-one) (phrase->id p-two))))

	(defn make-anagrams
	"Takes a phrase and returns a vector of sets of anagrams."
	[phrase]
	(let [phrase-id (phrase->id phrase)]
	(loop [dict dictionary
	result []]
	(if (empty? dict)
	result
	(let [maybe-anagrams (reduce (fn [m candidate]
	(if (= 0.0 (mod (:phrase-id m) (phrase->id candidate)))
	(-> m
	(update :anagrams (fn [coll] (conj coll candidate)))
	(assoc :phrase-id (/ (:phrase-id m) (phrase->id candidate))))
	m))
	{:phrase-id phrase-id
	:anagrams #{}}
	dict)]
	(cond
	;; We found a set of anagrams
	(= 1.0 (:phrase-id maybe-anagrams)) (recur (clj-set/difference dict (:anagrams maybe-anagrams))
	(conj result (:anagrams maybe-anagrams)))
	;; No anagrams found, we're done
	(= (:phrase-id maybe-anagrams) phrase-id) (recur [] result)
	;; Found some candidate anagrams, but not enough, this is where things
	;; don't go quite right and we remove some candidates that could group
	;; with others to form an anagram
	(> (:phrase-id maybe-anagrams) 1.0) (recur (clj-set/difference dict (:anagrams maybe-anagrams))
	result)))))))

	;;
	;; A few REPL expressions to try things out
	;;
	(comment
	(require 'clj-challenge.harder-anagrams :reload)
	(anagrams? "School master" "The classroom")
	(anagrams? "Astronomer" "Moon starer")
	(anagrams? "The eyes" "They see")
	(anagrams? "These are" "not anagrams")
	(make-anagrams "kcor")
	(make-anagrams "rock party")
	(prime? 13)
	(take 26 primes)
	alphabet-primes
	(phrase->id "rock")
	(phrase->id "somebody move")
	dictionary)
	(ns scratchpad.pftv
	(:require [clojure.string :as str]
	[clojure.test :refer :all]))

	(defn letters [s]
	(->> s
	str/lower-case
	seq
	(remove #{\space})
	sort))

	(defn anagram? [a b]
	(= (letters a) (letters b)))

	(deftest test-anagram?
	(is (anagram? "School master" "The Classroom"))
	(is (anagram? "Astronomer" "Moon starer"))
	(is (anagram? "The eyes" "They see")))

	(def dictionary-url "https://gist.githubusercontent.com/ericnormand/8c0ccc095edaa64eb8e00f861f70b02c/raw/01c33b3438bbab6bdd7e8dade55c1f5997ad8027/wordlist.txt")
	(def dictionary (str/split-lines (slurp dictionary-url)))

	(defn map-values [f m]
	(into {} (for [[k v] m] [k (f v)])))

	(defn letter-freq [s]
	(->> s
	str/lower-case
	seq
	(remove #{\space})
	(group-by identity)
	(map-values count)))

	(deftest test-anagrams-by-letter-freq
	(is (= (letter-freq "School master") (letter-freq "The Classroom")))
	(is (= (letter-freq "Astronomer") (letter-freq "Moon starer")))
	(is (= (letter-freq "The eyes") (letter-freq "They see"))))

	(defn word-in-alphabet? [alphabet word]
	(reduce
	(fn [alphabet letter]
	(if (pos? (or (alphabet letter) 0))
	(update alphabet letter dec)
	(reduced nil)))
	alphabet
	(seq word)))

	(defn empty-alphabet? [alphabet]
	(every? zero? (vals alphabet)))

	(deftest test-word-in-alphabet?
	(is (empty-alphabet?
	(word-in-alphabet? {\r 1 \e 1 \a 1 \l 1 \f 1 \u 1 \n 1}
	"funeral"))))

	;; inspired by https://stackoverflow.com/a/26076145/248948
	(defn generate-anagram-words [alphabet]
	(lazy-seq
	(apply concat
	(map (fn [word]
	(if-let [new-alphabet (word-in-alphabet? alphabet word)]
	(if (empty-alphabet? new-alphabet)
	[[word]]
	(map #(cons word %) (generate-anagram-words new-alphabet)))))
	dictionary))))

	(defn generate-anagram-phrases [phrase]
	(map #(str/join \space %)
	(generate-anagram-words (letter-freq phrase))))

	(defn random-words [num]
	(let [n (count dictionary)
	p (/ 1.0 n)]
	(shuffle (take num
	(random-sample p (cycle dictionary))))))

	(defn random-phrase [num-words]
	(str/join \space (random-words num-words)))

	(deftest test-generate-anagram-phrases
	(let [input "hello world"
	anagrams (generate-anagram-phrases input)]
	(is (pos? (count anagrams)))
	(is (every? #(anagram? input %) anagrams))))

	(run-tests)
	(defn anagrams? [words1 words2]
	(let [xf (comp sort
	(partial re-seq #"[^\s]")
	clojure.string/lower-case
	str)]
	(= (xf words1)
	(xf words2))))

	(filter (partial anagrams? "real fun")
	dictionary)
	;;=> ("funeral")

	(filter (partial anagrams? "stand under")
	dictionary)
	;;=> ("understand")
	;; Create a trie of all phrases that are anagrams of the given phrase.
	;; The paths from root of trie to each leaf is an anagram of the given phrase.

	(require '[clojure.string :as string]
	'[clojure.zip :as zip])


	(defn subset-word?
	"Is word1 a subset of word2? I.e.: Does word2 contain all the characters in word1?"
	[word1 word2]
	(let [word1-frequencies (frequencies word1)
	word2-frequencies (frequencies word2)]
	(every?
	(fn [[character frequency]]
	; word2 has at least as many of the same character as word1
	(<= frequency (get word2-frequencies character 0)))
	word1-frequencies)))


	(defn remove-letter
	"Remove the first occurrence of the letter from the word."
	[word letter]
	(let [[first-half last-half] (split-with (complement #{letter}) word)]
	(concat first-half (rest last-half))))


	(defn remove-letters
	"Remove the letters of word2 from word1."
	[word1 word2]
	(->> (reduce remove-letter word1 word2)
	(apply str)))


	(defrecord AnagramTrie [word children])

	(defn anagram-trie
	"Create a trie where every path from root to any leaf is made from strings that when concatenated
	together creates a substring of word (has no letters not in original word)."
	([word dict]
	(anagram-trie word "" dict))
	([word subset-word dict]
	(letfn [(child-trie [subset-word]
	(anagram-trie
	(remove-letters word subset-word) ; The recursive call only looks at the remaining characters of the word.
	subset-word
	(remove #{subset-word} dict)))] ; Remove this word from the dictionary so that it can't be reused.
	(->> dict
	(filter #(<= (count %) (count word))) ; Only words that are the same size or smaller
	(filter #(subset-word? % word)) ; Only words that contains a subset of the characters
	(map child-trie) ; Recursive call for each subset word
	(->AnagramTrie subset-word)))))


	(defn AnagramTrie->Zipper
	[anagram-trie]
	(zip/zipper
	(partial instance? AnagramTrie)
	:children
	(fn [{:keys [word]} children]
	(->AnagramTrie word children))
	anagram-trie))


	; From http://josf.info/blog/2014/04/14/seqs-of-clojure-zippers/#recursion-smell
	(defn leafnodes
	"Return all leaf nodes in loc. "
	[loc]
	(filter (complement zip/branch?) ;filter only non-branch nodes
	(take-while (complement zip/end?) ;take until the :end
	(iterate zip/next loc))))


	(defn anagram-trie-paths
	[anagram-trie]
	(->> (leafnodes
	(AnagramTrie->Zipper anagram-trie))
	(map #(->> (zip/path %)
	(map :word)
	(rest) ; The first one is always empty string because of arity-2 of anagram-trie
	(into #{})))))


	(defn example-dict
	[]
	(-> "https://gist.githubusercontent.com/ericnormand/8c0ccc095edaa64eb8e00f861f70b02c/raw/01c33b3438bbab6bdd7e8dade55c1f5997ad8027/wordlist.txt"
	(slurp)
	(string/split-lines)))

	(defn anagrams?
	[phrase1 phrase2]
	(= (frequencies (apply str phrase1))
	(frequencies (apply str phrase2))))

	(defn anagrams
	[phrase dict]
	(let [word (-> (apply str phrase) ; We only care about the characters in the phrase
	(string/lower-case)
	(string/replace " " ""))
	dict (map #(-> (string/lower-case %)
	(string/replace " " ""))
	dict)]
	(->> (anagram-trie word dict) ; Create a trie of all phrases that are potential anagrams of the given phrase.
	(anagram-trie-paths) ; Take all the paths from root of trie to each leaf.
	(filter (partial anagrams? phrase)) ; Remove phrases that are not an actual anagram of the original word.
	(remove #(= (set phrase) (set %))) ; Remove the exact same phrase.
	(into #{})))) ; Remove duplicates by putting them all into a set.

	;; (anagrams "astronomer" (example-dict))
	(ns miner.ana2
	(:require [clojure.java.io :as io]
	[clojure.math.combinatorics :as mc]
	[clojure.string :as str]))

	;; https://purelyfunctional.tv/issues/purelyfunctional-tv-newsletter-333-tool-rebel-readline/
	;; Harder anagrams for phrases

	;; My solution uses frequency maps of the characters of the dictionary words and compares
	;; them to the frequency map of the source phrase. As a first pass, I collect the words
	;; that could individually match part of the phrase. Then I test the longest words first
	;; and search for a combination that works. As a word matches, I save the whole remaining
	;; word list on the "ana" stack. If a sequence fails, I backtrack and start again with rest
	;; of that word-list segment. If a sequence succeeds, the anagram is taken from the first
	;; of each word-list on the stack. When pretty-printing the final results, I expand all the
	;; permutations of the matching words.

	;; Some executive decisions:
	;; 1. Don't allow anagram phrases to contain any words from the original phrase.
	;; 2. Skip dictionary words of fewer than three characters.
	;; 3. Add a few words to the source dictionary to make my test examples work.

	(def eric-dict
	"https://gist.githubusercontent.com/ericnormand/8c0ccc095edaa64eb8e00f861f70b02c/raw/wordlist.txt")

	;; A few extra words to make my test examples work
	(def extra-words ["astronomer" "cinder" "fun" "master" "mint" "miser" "Roman" "severe"])

	;; modified by SEM to add the extra words for my test examples
	#_
	(def sem-dict "resources/sem-wordlist.txt")


	(defn word-digest [word]
	(-> word
	str/lower-case
	frequencies))

	(defn phrase-digest [phrase]
	(-> phrase
	str/lower-case
	(str/replace #"[^a-z]" "")
	frequencies))

	(defn phrase-words [phrase]
	(-> phrase
	str/lower-case
	(str/replace #"[^a-z]" " ")
	str/trim
	(str/split #" +")))

	(defn ana-phrase? [phrase1 phrase2]
	(and (= (phrase-digest phrase1) (phrase-digest phrase2))
	(not= (sort (phrase-words phrase1)) (sort (phrase-words phrase2)))))

	;; longest words first, = length by alphabetical order
	(defn compare-word-length-alpha [^String a ^String b]
	(cond (and (nil? a) (nil? b)) 0
	(nil? a) -1
	(nil? b) 1
	(> (.length a) (.length b)) -1
	(< (.length a) (.length b)) 1
	:else (compare a b)))

	;; assume text file with one word per line
	;; Note: the file could contain capitalized and lowercase versions of the same spelling so we
	;; need to dedupe after lower-casing.
	(defn load-words [filename]
	(into []
	(comp (map str/lower-case) (dedupe))
	(with-open [rdr (io/reader filename)] (doall (line-seq rdr)))))

	;; by default consider only words of 3 or more characters
	(defn load-freqs
	([filename] (load-freqs 3 [] filename))
	([min extras filename]
	(reduce (fn [m w] (assoc m w (word-digest w)))
	{}
	(concat extras
	(remove #(< (count %) min) (load-words filename))))))


	;; subtract character counts from frequency map
	;; dissoc character when count would be zero
	;; returns nil for failure

	(defn subtract-freq [working freq]
	(reduce-kv (fn [res ch cnt]
	(let [old (get res ch)]
	(cond (nil? old) (reduced nil)
	(= cnt old) (dissoc res ch)
	(< cnt old) (update res ch - cnt)
	:else (reduced nil))))
	working
	freq))

	(defn add-freq [working freq]
	(merge-with + working freq))

	(defn pprint-results [results]
	(when (seq results)
	(clojure.pprint/pprint (map #(str/join " " %) (mapcat mc/permutations results)))))


	;; could be faster if you kept a letter count total to prune more words

	;; ana is a vector of word-lists. Each word-list has a first which is the accepted word and
	;; rest which has yet to be searched.


	(defn search-freqs [phrase dict-freqs]
	(let [pdig (phrase-digest phrase)
	freqs (reduce dissoc dict-freqs (phrase-words phrase))
	xwords (sort compare-word-length-alpha
	(filter #(subtract-freq pdig (get freqs %)) (keys freqs)))]
	(loop [ana [] remaining pdig ws xwords results []]
	(if (empty? remaining)
	;; found an anagram, backtrack for more
	(recur (pop ana)
	(add-freq remaining (get freqs (first (peek ana))))
	(rest (peek ana))
	(conj results (map first ana)))
	(if (empty? ws)
	(if (empty? ana)
	;; finished
	results
	;; backtrack
	(recur (pop ana)
	(add-freq remaining (get freqs (first (peek ana))))
	(rest (peek ana))
	results))
	(if-let [rem1 (subtract-freq remaining (get freqs (first ws)))]
	(recur (conj ana ws) rem1 (rest ws) results)
	(recur ana remaining (rest ws) results)))))))

	(def default-freqs (load-freqs 3 extra-words eric-dict))

	(defn anagram-lists
	([phrase] (anagram-lists phrase default-freqs))
	([phrase dict]
	(search-freqs phrase (if (map? dict) dict (load-freqs dict)))))

	(defn pprint-anagram-phrases
	([phrase] (pprint-anagram-phrases phrase default-freqs))
	([phrase dict] (pprint-results (anagram-lists phrase dict))))


	(defn smoke-test-phrases []
	(assert (ana-phrase? "the classroom" "school master"))
	(assert (not (ana-phrase? "master school" "school master")))
	(assert (not (ana-phrase? "school master" "School Master")))
	(assert (ana-phrase? "Astronomer" "Moon starer"))
	(assert (ana-phrase? "The Eyes" "They see"))
	(assert (ana-phrase? "Steve Miner" "event miser"))
	(assert (ana-phrase? "Steve Miner" "severe mint"))
	(assert (ana-phrase? "Eric Normand" "Roman cinder"))
	true)


	(defn my-phrase-test []
	(let [results (set (anagram-lists "Steve Miner"))]
	(assert (not (contains? results ["even" "steven"])))
	(assert (contains? results '("event" "miser")))
	(assert (contains? results '("severe" "mint"))))
	true)
	a
	ability
	able
	about
	above
	accept
	according
	account
	across
	act
	action
	activity
	actually
	add
	address
	administration
	admit
	adult
	affect
	after
	again
	against
	age
	agency
	agent
	ago
	agree
	agreement
	ahead
	air
	all
	allow
	almost
	alone
	along
	already
	also
	although
	always
	American
	among
	amount
	analysis
	and
	animal
	another
	answer
	any
	anyone
	anything
	appear
	apply
	approach
	area
	argue
	arm
	around
	arrive
	art
	article
	artist
	as
	ask
	assume
	at
	attack
	attention
	attorney
	audience
	author
	authority
	available
	avoid
	away
	baby
	back
	bad
	bag
	ball
	bank
	bar
	base
	be
	beat
	beautiful
	because
	become
	bed
	before
	begin
	behavior
	behind
	believe
	benefit
	best
	better
	between
	beyond
	big
	bill
	billion
	bit
	black
	blood
	blue
	board
	body
	book
	born
	both
	box
	boy
	break
	bring
	brother
	budget
	build
	building
	business
	but
	buy
	by
	call
	camera
	campaign
	can
	cancer
	candidate
	capital
	car
	card
	care
	career
	carry
	case
	catch
	cause
	cell
	center
	central
	century
	certain
	certainly
	chair
	challenge
	chance
	change
	character
	charge
	check
	child
	choice
	choose
	church
	citizen
	city
	civil
	claim
	class
	clear
	clearly
	close
	coach
	cold
	collection
	college
	color
	come
	commercial
	common
	community
	company
	compare
	computer
	concern
	condition
	conference
	Congress
	consider
	consumer
	contain
	continue
	control
	cost
	could
	country
	couple
	course
	court
	cover
	create
	crime
	cultural
	culture
	cup
	current
	customer
	cut
	dark
	data
	daughter
	day
	dead
	deal
	death
	debate
	decade
	decide
	decision
	deep
	defense
	degree
	Democrat
	democratic
	describe
	design
	despite
	detail
	determine
	develop
	development
	die
	difference
	different
	difficult
	dinner
	direction
	director
	discover
	discuss
	discussion
	disease
	do
	doctor
	dog
	door
	down
	draw
	dream
	drive
	drop
	drug
	during
	each
	early
	east
	easy
	eat
	economic
	economy
	edge
	education
	effect
	effort
	eight
	either
	election
	else
	employee
	end
	energy
	enjoy
	enough
	enter
	entire
	environment
	environmental
	especially
	establish
	even
	evening
	event
	ever
	every
	everybody
	everyone
	everything
	evidence
	exactly
	example
	executive
	exist
	expect
	experience
	expert
	explain
	eye
	face
	fact
	factor
	fail
	fall
	family
	far
	fast
	father
	fear
	federal
	feel
	feeling
	few
	field
	fight
	figure
	fill
	film
	final
	finally
	financial
	find
	fine
	finger
	finish
	fire
	firm
	first
	fish
	five
	floor
	fly
	focus
	follow
	food
	foot
	for
	force
	foreign
	forget
	form
	former
	forward
	four
	free
	friend
	from
	front
	full
	fund
	funeral
	future
	game
	garden
	gas
	general
	generation
	get
	girl
	give
	glass
	go
	goal
	good
	government
	great
	green
	ground
	group
	grow
	growth
	guess
	gun
	guy
	hair
	half
	hand
	hang
	happen
	happy
	hard
	have
	he
	head
	health
	hear
	heart
	heat
	heavy
	help
	her
	here
	herself
	high
	him
	himself
	his
	history
	hit
	hold
	home
	hope
	hospital
	hot
	hotel
	hour
	house
	how
	however
	huge
	human
	hundred
	husband
	I
	idea
	identify
	if
	image
	imagine
	impact
	important
	improve
	in
	include
	including
	increase
	indeed
	indicate
	individual
	industry
	information
	inside
	instead
	institution
	interest
	interesting
	international
	interview
	into
	investment
	involve
	issue
	it
	item
	its
	itself
	job
	join
	just
	keep
	key
	kid
	kill
	kind
	kitchen
	know
	knowledge
	land
	language
	large
	last
	late
	later
	laugh
	law
	lawyer
	lay
	lead
	leader
	learn
	least
	leave
	left
	leg
	legal
	less
	let
	letter
	level
	lie
	life
	light
	like
	likely
	line
	list
	listen
	little
	live
	local
	long
	look
	lose
	loss
	lot
	love
	low
	machine
	magazine
	main
	maintain
	major
	majority
	make
	man
	manage
	management
	manager
	many
	market
	marriage
	material
	matter
	may
	maybe
	me
	mean
	measure
	media
	medical
	meet
	meeting
	member
	memory
	mention
	message
	method
	middle
	might
	military
	million
	mind
	minute
	miss
	mission
	model
	modern
	moment
	money
	month
	moon
	more
	morning
	most
	mother
	mouth
	move
	movement
	movie
	Mr
	Mrs
	much
	music
	must
	my
	myself
	name
	nation
	national
	natural
	nature
	near
	nearly
	necessary
	need
	network
	never
	new
	news
	newspaper
	next
	nice
	night
	no
	none
	nor
	north
	not
	note
	nothing
	notice
	now
	n't
	number
	occur
	of
	off
	offer
	office
	officer
	official
	often
	oh
	oil
	ok
	old
	on
	once
	one
	only
	onto
	open
	operation
	opportunity
	option
	or
	order
	organization
	other
	others
	our
	out
	outside
	over
	own
	owner
	page
	pain
	painting
	paper
	parent
	part
	participant
	particular
	particularly
	partner
	party
	pass
	past
	patient
	pattern
	pay
	peace
	people
	per
	perform
	performance
	perhaps
	period
	person
	personal
	phone
	physical
	pick
	picture
	piece
	place
	plan
	plant
	play
	player
	PM
	point
	police
	policy
	political
	politics
	poor
	popular
	population
	position
	positive
	possible
	power
	practice
	prepare
	present
	president
	pressure
	pretty
	prevent
	price
	private
	probably
	problem
	process
	produce
	product
	production
	professional
	professor
	program
	project
	property
	protect
	prove
	provide
	public
	pull
	purpose
	push
	put
	quality
	question
	quickly
	quite
	race
	radio
	raise
	range
	rate
	rather
	reach
	read
	ready
	real
	reality
	realize
	really
	reason
	receive
	recent
	recently
	recognize
	record
	red
	reduce
	reflect
	region
	relate
	relationship
	religious
	remain
	remember
	remove
	report
	represent
	Republican
	require
	research
	resource
	respond
	response
	responsibility
	rest
	result
	return
	reveal
	rich
	right
	rise
	risk
	road
	rock
	role
	room
	rule
	run
	safe
	same
	save
	say
	scene
	school
	science
	scientist
	score
	sea
	season
	seat
	second
	section
	security
	see
	seek
	seem
	sell
	send
	senior
	sense
	series
	serious
	serve
	service
	set
	seven
	several
	sex
	sexual
	shake
	share
	she
	shoot
	short
	shot
	should
	shoulder
	show
	side
	sign
	significant
	similar
	simple
	simply
	since
	sing
	single
	sister
	sit
	site
	situation
	six
	size
	skill
	skin
	small
	smile
	so
	social
	society
	soldier
	some
	somebody
	someone
	something
	sometimes
	son
	song
	soon
	sort
	sound
	source
	south
	southern
	space
	speak
	special
	specific
	speech
	spend
	sport
	spring
	staff
	stage
	stand
	standard
	star
	stare
	starer
	start
	state
	statement
	station
	stay
	step
	still
	stock
	stop
	store
	story
	strategy
	street
	strong
	structure
	student
	study
	stuff
	style
	subject
	success
	successful
	such
	suddenly
	suffer
	suggest
	summer
	support
	sure
	surface
	system
	table
	take
	talk
	task
	tax
	teach
	teacher
	team
	technology
	television
	tell
	ten
	tend
	term
	test
	than
	thank
	that
	the
	their
	them
	themselves
	then
	theory
	there
	these
	they
	thing
	think
	third
	this
	those
	though
	thought
	thousand
	threat
	three
	through
	throughout
	throw
	thus
	time
	to
	today
	together
	tonight
	too
	top
	total
	tough
	toward
	town
	trade
	traditional
	training
	travel
	treat
	treatment
	tree
	trial
	trip
	trouble
	true
	truth
	try
	turn
	TV
	two
	type
	under
	understand
	unit
	until
	up
	upon
	us
	use
	usually
	value
	various
	very
	victim
	view
	violence
	visit
	voice
	vote
	wait
	walk
	wall
	want
	war
	watch
	water
	way
	we
	weapon
	wear
	week
	weight
	well
	west
	western
	what
	whatever
	when
	where
	whether
	which
	while
	white
	who
	whole
	whom
	whose
	why
	wide
	wife
	will
	win
	wind
	window
	wish
	with
	within
	without
	woman
	wonder
	word
	work
	worker
	world
	worry
	would
	write
	writer
	wrong
	yard
	yeah
	year
	yes
	yet
	you
	young
	your
	yourself