asimihsan/!stdout

## !stdout
HOMOGENAKA
SPALLYILL
ENEUCHSTROKY
BOBACHUELESS
SYNAPTEPIERAGE
HUMBLEENCRATY
EMITTERSOMEDAY
BUGLETINNLESS
BEAUPAINTER
TOXEMIASHALLOW
SANDANBOXER
WARMFULTEASER
TERSESTINGY
MALEOBEGLOZE
KENNERCHOLLER
PROPLEXUNHUMAN
ATROPICGABY
SCENICSURGE
OPTICASHY
BETROTHONRUSH

## nsa_codewords.py
#!/usr/bin/env python

import contextlib
import cPickle as pickle
import gzip
import os
import random
import re
import sys

import nltk


def get_nsa_codewords(pickle_cache_file="nsa_codewords.pickle.gz",
                      token_min_length=3, token_max_length=7,
                      total_codewords=20):
    """Generate NSA-style code words.

    This function requires NLTK [1] to be installed.

    An NSA-style codeword is typically two singular nouns (NN), for
    example "HOWLERMONKEY". Less frequently it is an adjective (JJ)
    then a singular noun, for example "EGOTISTICALGIRAFFE" [2].
    Hence only two types of codeword are generated:

    1.   (NN, NN)
    2.   (JJ, NN)

    The first time you run this function it will take a long time to
    complete. However it will build a pickled cache file and re-use
    that to make subsequent runs faster.

    Using NLTK's default part-of-speech (POS) tagger, a Maximum
    Entropy (maxent) tagger trained on the Penn Treebank corpus,
    isn't ideal. This is because we're passing in single words as-is
    without the context of a surrounding sentence, whereas he model
    is trained on sentences. [3] [4]

    param: pickle_cache_file: path to file to cache lists of
    adjectives and nouns.
    param: token_min_length: minimum (inclusive) length of a token
    in a code word.
    param: token_max_length: maximum (inclusive) length of a token
    in a code word.
    param: total_codewords: total number of codewords to generate.

    References

    [1] http://nltk.org/

    [2] NSA Codewords
    http://cpunks.wordpress.com/2013/10/12/nsa-codewords

    [3] A Maximum Entropy Model for Part-of-Speech Tagging
    http://acl.ldc.upenn.edu/W/W96/W96-0213.pdf

    [4] Learning to Classify Text, The NLTK Book
    http://nltk.org/book/ch06.html
    """

    if os.path.isfile(pickle_cache_file):
        with contextlib.closing(gzip.open(pickle_cache_file,
                                          "rb")) as f_in:
            words = pickle.load(f_in)
        nouns = words["nouns"]
        adjectives = words["adjectives"]
    else:
        nouns = []
        adjectives = []
        with open("/usr/share/dict/words") as f_in:
            lines = (line.strip() for line in f_in)
            good_lines = (line for line in lines
                          if re.match("^[a-z]{%s,%s}$" %
                                      (token_min_length,
                                       token_max_length), line))
            for line in good_lines:
                tag = nltk.pos_tag([line])[0][1]
                if tag == "NN":
                    nouns.append(line)
                elif tag == "JJ":
                    adjectives.append(line)
        with contextlib.closing(gzip.open(pickle_cache_file,
                                          "wb")) as f_out:
            pickle.dump({"nouns": nouns, "adjectives": adjectives},
                        f_out, -1)
    for i in xrange(total_codewords):
        if random.random() > 0.5:
            output = [random.choice(nouns), random.choice(nouns)]
        else:
            output = [random.choice(adjectives),
                      random.choice(nouns)]
        yield "%s" % "".join(output).upper()


if __name__ == "__main__":
    for codeword in get_nsa_codewords():
        sys.stdout.write("%s\n" % codeword)
	HOMOGENAKA
	SPALLYILL
	ENEUCHSTROKY
	BOBACHUELESS
	SYNAPTEPIERAGE
	HUMBLEENCRATY
	EMITTERSOMEDAY
	BUGLETINNLESS
	BEAUPAINTER
	TOXEMIASHALLOW
	SANDANBOXER
	WARMFULTEASER
	TERSESTINGY
	MALEOBEGLOZE
	KENNERCHOLLER
	PROPLEXUNHUMAN
	ATROPICGABY
	SCENICSURGE
	OPTICASHY
	BETROTHONRUSH
	#!/usr/bin/env python

	import contextlib
	import cPickle as pickle
	import gzip
	import os
	import random
	import re
	import sys

	import nltk


	def get_nsa_codewords(pickle_cache_file="nsa_codewords.pickle.gz",
	token_min_length=3, token_max_length=7,
	total_codewords=20):
	"""Generate NSA-style code words.

	This function requires NLTK [1] to be installed.

	An NSA-style codeword is typically two singular nouns (NN), for
	example "HOWLERMONKEY". Less frequently it is an adjective (JJ)
	then a singular noun, for example "EGOTISTICALGIRAFFE" [2].
	Hence only two types of codeword are generated:

	1. (NN, NN)
	2. (JJ, NN)

	The first time you run this function it will take a long time to
	complete. However it will build a pickled cache file and re-use
	that to make subsequent runs faster.

	Using NLTK's default part-of-speech (POS) tagger, a Maximum
	Entropy (maxent) tagger trained on the Penn Treebank corpus,
	isn't ideal. This is because we're passing in single words as-is
	without the context of a surrounding sentence, whereas he model
	is trained on sentences. [3] [4]

	param: pickle_cache_file: path to file to cache lists of
	adjectives and nouns.
	param: token_min_length: minimum (inclusive) length of a token
	in a code word.
	param: token_max_length: maximum (inclusive) length of a token
	in a code word.
	param: total_codewords: total number of codewords to generate.

	References

	[1] http://nltk.org/

	[2] NSA Codewords
	http://cpunks.wordpress.com/2013/10/12/nsa-codewords

	[3] A Maximum Entropy Model for Part-of-Speech Tagging
	http://acl.ldc.upenn.edu/W/W96/W96-0213.pdf

	[4] Learning to Classify Text, The NLTK Book
	http://nltk.org/book/ch06.html
	"""

	if os.path.isfile(pickle_cache_file):
	with contextlib.closing(gzip.open(pickle_cache_file,
	"rb")) as f_in:
	words = pickle.load(f_in)
	nouns = words["nouns"]
	adjectives = words["adjectives"]
	else:
	nouns = []
	adjectives = []
	with open("/usr/share/dict/words") as f_in:
	lines = (line.strip() for line in f_in)
	good_lines = (line for line in lines
	if re.match("^[a-z]{%s,%s}$" %
	(token_min_length,
	token_max_length), line))
	for line in good_lines:
	tag = nltk.pos_tag([line])[0][1]
	if tag == "NN":
	nouns.append(line)
	elif tag == "JJ":
	adjectives.append(line)
	with contextlib.closing(gzip.open(pickle_cache_file,
	"wb")) as f_out:
	pickle.dump({"nouns": nouns, "adjectives": adjectives},
	f_out, -1)
	for i in xrange(total_codewords):
	if random.random() > 0.5:
	output = [random.choice(nouns), random.choice(nouns)]
	else:
	output = [random.choice(adjectives),
	random.choice(nouns)]
	yield "%s" % "".join(output).upper()


	if __name__ == "__main__":
	for codeword in get_nsa_codewords():
	sys.stdout.write("%s\n" % codeword)