kylemcdonald/yinxyz.py

## yinxyz.py
# run this as:
# python yinxyz.py > pairs.txt
# and in another shell:
# sort -rn pairs.txt | head -500 | cut -f2-

# word list is from http://norvig.com/ngrams/count_1w.txt
pairs = [line.strip().split('\t') for line in open('count_1w.txt')]
count = {}
for w, c in pairs:
	count[w] = int(c)
words = list(count.keys())

words.sort(key = len)
words4 = [x for x in words if len(x) > 4]
words6 = [x for x in words if len(x) > 6]
for xyz in words6:
	mid = xyz[1:-1]
	for y in words4:
		if len(y) > len(mid):
			break
		if y in mid: # faster if we http://stackoverflow.com/a/6934237/940196
			match = count[y] * count[xyz]
			print '\t'.join([str(match), y, xyz])
	# run this as:
	# python yinxyz.py > pairs.txt
	# and in another shell:
	# sort -rn pairs.txt \| head -500 \| cut -f2-

	# word list is from http://norvig.com/ngrams/count_1w.txt
	pairs = [line.strip().split('\t') for line in open('count_1w.txt')]
	count = {}
	for w, c in pairs:
	count[w] = int(c)
	words = list(count.keys())

	words.sort(key = len)
	words4 = [x for x in words if len(x) > 4]
	words6 = [x for x in words if len(x) > 6]
	for xyz in words6:
	mid = xyz[1:-1]
	for y in words4:
	if len(y) > len(mid):
	break
	if y in mid: # faster if we http://stackoverflow.com/a/6934237/940196
	match = count[y] * count[xyz]
	print '\t'.join([str(match), y, xyz])