Skip to content

Instantly share code, notes, and snippets.

@jdar
Created September 17, 2009 16:30
Show Gist options
  • Save jdar/188565 to your computer and use it in GitHub Desktop.
Save jdar/188565 to your computer and use it in GitHub Desktop.
## words/word.py
import pdb
import scriptutil as SU
import re
import psycopg2
from psycopg2.extras import DictCursor
from psycopg2.extensions import adapt
try:
db = psycopg2.connect(database="scrabble", user="python", password="python")
cur = db.cursor(cursor_factory=psycopg2.extras.DictCursor)
# cur.execute ("CREATE TABLE words (name varchar, probability int, frequency int, catches varchar, hangs varchar);")
except:
print "I am unable to connect to the database"
sys.ext()
try:
"trying to find a wordlist reference file"
except:
"failing to find a wordlist reference file. You're on your own, you database-dependent chump!"
class Word:
"""legal scrabble words
1) in official lists, and
2) have point/frequency attributes that are derived --- not from it's own letters ---
but rather from the point/prob sums of all the possible _derivative_ scrabble-legal words
# raw data from official scrabble lists. Can be downloaded from hasbro into "./*.txt"
# other dependencies: special words lists in "./words/*.txt" and python-scriptutil.py
"""
letters = "_ a b c d e f g h i j k l m n o p q r s t u v w x y z".split()
frequencies = (2, 9, 2, 2, 4, 12, 2, 3, 2, 9, 1, 1, 4, 2, 6, 8, 2, 1, 6, 4, 6, 4, 2, 2, 1, 2, 1)
points = (0, 1, 3, 3, 2, 1, 4, 2, 4, 1, 8, 5, 1, 3, 1, 1, 3, 10, 1, 1, 1, 1, 4, 4, 8, 4, 10)
letter_frequencies = dict(zip(letters,frequencies))
letter_points = dict(zip(letters,frequencies))
def calculate_probability(self):
return sum(map(lambda letter: letter_points[letter], self.catches))
def calculate_frequencies(self):
return sum(map(lambda letter: letter_frequencies[letter], self.catches))
def __init__(self,name,points=None,frequency=None,catches=None,hangs=None):
self.name = name
if catches is None: self.catches = catches
if frequency is None: self.frequency = frequency
if points is None: self.points = points
if hangs is None: self.hangs = hangs
@staticmethod
def count(finder_sql = ""):
"""rails-style finder
"""
cur.execute("select * from words {0}".format(finder_sql))
return cur.rowcount
def hangs(self):
""" one-lettter shorter
"""
return self.name[0:-1]
@staticmethod
def find_or_create_all_by_name(names):
"""routes finder calls to database AND/OR word lists ... or creates.
merge
VS
cur.copy_in( ... scratch ... )
insert into words select * from (select distinct * from scratch) uniq where not exists (select 1 from words where words.name = uniq.name);
"""
# MYTODO escape names ... learning exercise.
matches = Word.find_all("""where words.name in {0}""".format(tuple(names)))
unmatched = set(names) - set(map(lambda w: w.name, matches))
pdb.set_trace()
invalid_words = []
created_words = []
for n in unmatched:
w = Word(n)
try:
w.new()
created_words.append(w)
except NameError:
invalid_words.append(n)
# MYTODO: hose invalid words over to the output somehow ... through a logger, if nothing else
if not len(created_words) == 0: db.commit()
return created_words.extend(matches) or []
def new(self):
""" vaguely rails-AR-like new()
validates, find-greps for catches, and pre-commits instance to the db
#MYTODO: profiling. Is it worth it to split up the two grep searches? (above)
"""
self.validate_against_local_lists()
grepd_catches = self.fgrep_catches_in_directories(("./words",))
flat_catches = []
for c in grepd_catches: flat_catches.extend(c) #split()
self.catches = "".join(map(lambda catch: catch+" ", set(flat_catches))).strip()
cur.execute("""INSERT INTO words VALUES {0}""".format(
(
self.name,
self.calculate_probability(),
self.calculate_frequencies(),
self.catches,
# hangs
self.name[1:] + " " + self.name[:-1],
)
))
def validate_against_local_lists(self, lists=(".",)):
"""if not found in any text file => not a legal word!
this will also catch all the weird things people might throw. Like numbers.
"""
if [self.name] not in self.fgrep_in_directories(lists):
raise NameError, "not in ./words/*.txt. Look again, shall we?"
pass
def fgrep_in_directories(self, directories=(".",),search_string=None):
""" grep in dir ("." by default)
find a word in local .txt files
"""
if search_string is None:
search_tuple = (("^{0}$".format(self.name), re.I),)
else:
search_tuple = ((search_string, re.M),)
result = map(lambda directory:
SU.ffindgrep(directory, namefs=(lambda s: s.endswith('.txt'),),
regexl=search_tuple
).values(),
directories)
return [catch[0] for catch in result if len(catch) is not 0]
def fgrep_catches_in_directories(self, directories=(".",)):
"""find all _catches_
find a word in local .txt files
"""
temp = []
temp.extend(self.fgrep_in_directories(("./words",), "^{0}.$".format(self.name)))
temp.extend(self.fgrep_in_directories(("./words",), "^.{0}$".format(self.name)))
return temp
# raise ArgumentError
@staticmethod
def find_all(finder_sql = ""):
"""rails-style finder
"""
cur.execute("select * from words {0}".format(finder_sql))
return map(lambda properties: Word(*properties), cur.fetchall())
def flatten(l):
if l is []:
pass
elif isinstance(l,list):
return sum(map(flatten,l))
else:
return l
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment