adamnew123456/case-gen.py

## case-gen.py
from collections import namedtuple
import random
import string
import sys

Node = namedtuple('Node', ['grammar'])
Output = namedtuple('Output', ['text'])

# All of these are easier to compute than to specify literally in the grammar
ALPHABET_NODES = [[Output(letter)] for letter in string.letters]

# This is a *very* liberal interpretation of the input grammar. It does say something along the lines
# of "all 128 ASCII characters except for <special-chars> and <SP>", and odd-beasts like NUL are
# definitely not in those sets.
CHAR_NODES = []
SPECIAL_CHARS = '\n \t<>()[]\\.,;:@"'
for byte in range(128):
    if chr(byte) not in SPECIAL_CHARS:
        CHAR_NODES.append([Output(chr(byte))])

DIGIT_NODES = [[Output(str(x))] for x in range(10)]

GRAMMAR_GRAPH = {
    'mail-from-cmd': [
        [Output('MAIL'), Node('whitespace'), Output('FROM:'),
         Node('nullspace'), Node('reverse-path'), Node('nullspace'),
         Node('CRLF')]
    ],
    'whitespace': [
        [Node('SP')],
        [Node('SP'), Node('whitespace')]
    ],
    'nullspace': [
        [],
        [Node('whitespace')]
    ],
    'reverse-path': [[Node('path')]],
    'path': [
        [Output('<'), Node('mailbox'), Output('>')]
    ],
    'mailbox': [
        [Node('local-part'), Output('@'), Node('domain')]
    ],
    'domain': [
        [Node('element')],
        [Node('element'), Output('.'), Node('domain')]
    ],
    'element': [[Node('name')]],
    'local-part': [[Node('string')]],
    'name': [
        [Node('a'), Node('let-dig-str')]
    ],
    'let-dig-str': [
        [Node('let-dig')],
        [Node('let-dig'), Node('let-dig-str')]
    ],
    'let-dig': [
        [Node('a')],
        [Node('d')]
    ],
    'string': [
        [Node('char')],
        [Node('char'), Node('string')]
    ],
    'char': [[Node('c')]],
    'CRLF': [[Output('\n')]],
    'SP': [
        [Output(' ')],
        [Output('\t')]
    ],
    'a': ALPHABET_NODES,
    'c': CHAR_NODES,
    'd': DIGIT_NODES
}

def generate_grammar(grammar, starting_node):
    """Returns a string in the given grammar.

    The grammar should be a dictionary, where keys are grammar production
    names and values are lists-of-lists. Each top-level list represents
    a group of alternative productions, while each inner list represents
    the series of actions in that alternative.

    Each action can be either 'Output' or 'Node' - 'Output' indicates that
    the given character should be output as-is, while 'Node' indicates that
    the generator should execute another production.
    """
    choices = grammar[starting_node]
    chosen = random.choice(choices)
    buffer = ''
    for action in chosen:
        if isinstance(action, Node):
            buffer += generate_grammar(grammar, action.grammar)
        elif isinstance(action, Output):
            buffer += action.text
        else:
            assert False, "Fail: %s" % (type(action),)

    return buffer

if __name__ == '__main__':
    # Feel free to change N to generate more cases
    N = 1000
    for x in range(N):
        email = generate_grammar(GRAMMAR_GRAPH, 'mail-from-cmd')
        sys.stdout.write(email)
	from collections import namedtuple
	import random
	import string
	import sys

	Node = namedtuple('Node', ['grammar'])
	Output = namedtuple('Output', ['text'])

	# All of these are easier to compute than to specify literally in the grammar
	ALPHABET_NODES = [[Output(letter)] for letter in string.letters]

	# This is a very liberal interpretation of the input grammar. It does say something along the lines
	# of "all 128 ASCII characters except for <special-chars> and <SP>", and odd-beasts like NUL are
	# definitely not in those sets.
	CHAR_NODES = []
	SPECIAL_CHARS = '\n \t<>()[]\\.,;:@"'
	for byte in range(128):
	if chr(byte) not in SPECIAL_CHARS:
	CHAR_NODES.append([Output(chr(byte))])

	DIGIT_NODES = [[Output(str(x))] for x in range(10)]

	GRAMMAR_GRAPH = {
	'mail-from-cmd': [
	[Output('MAIL'), Node('whitespace'), Output('FROM:'),
	Node('nullspace'), Node('reverse-path'), Node('nullspace'),
	Node('CRLF')]
	],
	'whitespace': [
	[Node('SP')],
	[Node('SP'), Node('whitespace')]
	],
	'nullspace': [
	[],
	[Node('whitespace')]
	],
	'reverse-path': [[Node('path')]],
	'path': [
	[Output('<'), Node('mailbox'), Output('>')]
	],
	'mailbox': [
	[Node('local-part'), Output('@'), Node('domain')]
	],
	'domain': [
	[Node('element')],
	[Node('element'), Output('.'), Node('domain')]
	],
	'element': [[Node('name')]],
	'local-part': [[Node('string')]],
	'name': [
	[Node('a'), Node('let-dig-str')]
	],
	'let-dig-str': [
	[Node('let-dig')],
	[Node('let-dig'), Node('let-dig-str')]
	],
	'let-dig': [
	[Node('a')],
	[Node('d')]
	],
	'string': [
	[Node('char')],
	[Node('char'), Node('string')]
	],
	'char': [[Node('c')]],
	'CRLF': [[Output('\n')]],
	'SP': [
	[Output(' ')],
	[Output('\t')]
	],
	'a': ALPHABET_NODES,
	'c': CHAR_NODES,
	'd': DIGIT_NODES
	}

	def generate_grammar(grammar, starting_node):
	"""Returns a string in the given grammar.

	The grammar should be a dictionary, where keys are grammar production
	names and values are lists-of-lists. Each top-level list represents
	a group of alternative productions, while each inner list represents
	the series of actions in that alternative.

	Each action can be either 'Output' or 'Node' - 'Output' indicates that
	the given character should be output as-is, while 'Node' indicates that
	the generator should execute another production.
	"""
	choices = grammar[starting_node]
	chosen = random.choice(choices)
	buffer = ''
	for action in chosen:
	if isinstance(action, Node):
	buffer += generate_grammar(grammar, action.grammar)
	elif isinstance(action, Output):
	buffer += action.text
	else:
	assert False, "Fail: %s" % (type(action),)

	return buffer

	if __name__ == '__main__':
	# Feel free to change N to generate more cases
	N = 1000
	for x in range(N):
	email = generate_grammar(GRAMMAR_GRAPH, 'mail-from-cmd')
	sys.stdout.write(email)