Skip to content

Instantly share code, notes, and snippets.

@cincodenada
Created October 3, 2018 01:18
Show Gist options
  • Save cincodenada/e4d685451eb1bbd124f6eccf08bdd6ec to your computer and use it in GitHub Desktop.
Save cincodenada/e4d685451eb1bbd124f6eccf08bdd6ec to your computer and use it in GitHub Desktop.
A quick script to find words that are subsets of other words. Designed to find country names that contain other countries, but should be generally applicable.
import fileinput
import re
from collections import defaultdict
class Word:
def __init__(self, word):
self.word = word
self.allcaps = re.sub("[^A-Z]","",word.upper())
self.countified()
def countified(self):
if(not hasattr(self, '_countified')):
self.counts = {}
for l in self.allcaps.lower():
if l not in self.counts:
self.counts[l] = 0
self.counts[l] += 1
self._countified = ""
for k in sorted(self.counts.keys()):
self._countified += k.lower() + str(self.counts[k])
return self._countified
def contains(self, other):
if(self == other):
return None
else:
for l in other.counts:
if (l not in self.counts) or (other.counts[l] > self.counts[l]):
return False
return True
def __str__(self):
return self.word
unscramble = defaultdict(list)
allcaps = []
for ctry in fileinput.input():
word = Word(ctry.replace('\n',''))
allcaps.append(word.allcaps)
unscramble[word.countified()].append(word)
subwords = defaultdict(list)
included_count = defaultdict(lambda: 0)
for containerl in unscramble.values():
container = containerl[0]
for subwordl in unscramble.values():
subword = subwordl[0]
if container.contains(subword):
subwords[container.countified()].append(subword)
included_count[subword.word]+=1
containers = reversed(sorted(subwords.keys(), key=lambda k: len(subwords[k])))
for k in containers:
print("[spoiler={} ({})]".format(
','.join([w.word for w in unscramble[k]]),
len(subwords[k])
))
print('\n'.join([w.word for w in subwords[k]]))
print("[/spoiler]", end="")
included = sorted(included_count.keys(), key=lambda k: included_count[k])
for i in included:
print("{} ({})".format(i, included_count[i]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment