Skip to content

Instantly share code, notes, and snippets.

@shadda
Last active January 23, 2022 15:08
Show Gist options
  • Save shadda/f0b974108eae8afeb1ca80598e68b49e to your computer and use it in GitHub Desktop.
Save shadda/f0b974108eae8afeb1ca80598e68b49e to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import sys
import re
import json
import urllib.request
#Word dictionary. JSON list.
dictionary_url = "https://gist.githubusercontent.com/shadda/bd47678f63b421d070c13ee16eaeb1c9/raw/e532576f1919c13652605897e2898139b7835aa3/dictionary.json"
#Trim out any words in our dictionary that are invalid
bchars = r'[a-zA-Z]{5}'
#Letters you know are not in the word
nletters = 'welshbngovadmxu'
#Letters you know should be in the word
wletters = 'ip'
#\w = any letter. adjust as needed.
wpattern = r'^' + \
r'\w' + \
r'\w' + \
r'\w' + \
r'\w' + \
r'\w' + \
r'$'
data = None
with urllib.request.urlopen(dictionary_url) as rp:
data = json.load(rp)
data = [x.lower() for x in data if len(x) == 5 and re.match(bchars, x)]
#Remove any unique entries and make everything lowercase
data = sorted(list(set(data)))
#Filter words that contain letters we know aren't in the word
words = [x for x in data if not any(i in nletters for i in x)]
#Filter for words that definitely contain the letters we're interested in
round2 = [x for x in words if not any( w not in x for w in wletters)]
#Filter based on the fixed letter positions we know
round3 = [x for x in round2 if re.match(wpattern, x)]
#Any additional expressions
round4 = [x for x in round3 if x[1] != 'i' and x[3] != 'i' and x[4] != 'p' and x[2] != 'p']
print(round4)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment