Created
January 17, 2017 21:18
-
-
Save adamnew123456/c2bcbb6a0a29db709158136140b4c7dd to your computer and use it in GitHub Desktop.
431 - HW1 Positive-Case Generator
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import namedtuple | |
import random | |
import string | |
import sys | |
Node = namedtuple('Node', ['grammar']) | |
Output = namedtuple('Output', ['text']) | |
# All of these are easier to compute than to specify literally in the grammar | |
ALPHABET_NODES = [[Output(letter)] for letter in string.letters] | |
# This is a *very* liberal interpretation of the input grammar. It does say something along the lines | |
# of "all 128 ASCII characters except for <special-chars> and <SP>", and odd-beasts like NUL are | |
# definitely not in those sets. | |
CHAR_NODES = [] | |
SPECIAL_CHARS = '\n \t<>()[]\\.,;:@"' | |
for byte in range(128): | |
if chr(byte) not in SPECIAL_CHARS: | |
CHAR_NODES.append([Output(chr(byte))]) | |
DIGIT_NODES = [[Output(str(x))] for x in range(10)] | |
GRAMMAR_GRAPH = { | |
'mail-from-cmd': [ | |
[Output('MAIL'), Node('whitespace'), Output('FROM:'), | |
Node('nullspace'), Node('reverse-path'), Node('nullspace'), | |
Node('CRLF')] | |
], | |
'whitespace': [ | |
[Node('SP')], | |
[Node('SP'), Node('whitespace')] | |
], | |
'nullspace': [ | |
[], | |
[Node('whitespace')] | |
], | |
'reverse-path': [[Node('path')]], | |
'path': [ | |
[Output('<'), Node('mailbox'), Output('>')] | |
], | |
'mailbox': [ | |
[Node('local-part'), Output('@'), Node('domain')] | |
], | |
'domain': [ | |
[Node('element')], | |
[Node('element'), Output('.'), Node('domain')] | |
], | |
'element': [[Node('name')]], | |
'local-part': [[Node('string')]], | |
'name': [ | |
[Node('a'), Node('let-dig-str')] | |
], | |
'let-dig-str': [ | |
[Node('let-dig')], | |
[Node('let-dig'), Node('let-dig-str')] | |
], | |
'let-dig': [ | |
[Node('a')], | |
[Node('d')] | |
], | |
'string': [ | |
[Node('char')], | |
[Node('char'), Node('string')] | |
], | |
'char': [[Node('c')]], | |
'CRLF': [[Output('\n')]], | |
'SP': [ | |
[Output(' ')], | |
[Output('\t')] | |
], | |
'a': ALPHABET_NODES, | |
'c': CHAR_NODES, | |
'd': DIGIT_NODES | |
} | |
def generate_grammar(grammar, starting_node): | |
"""Returns a string in the given grammar. | |
The grammar should be a dictionary, where keys are grammar production | |
names and values are lists-of-lists. Each top-level list represents | |
a group of alternative productions, while each inner list represents | |
the series of actions in that alternative. | |
Each action can be either 'Output' or 'Node' - 'Output' indicates that | |
the given character should be output as-is, while 'Node' indicates that | |
the generator should execute another production. | |
""" | |
choices = grammar[starting_node] | |
chosen = random.choice(choices) | |
buffer = '' | |
for action in chosen: | |
if isinstance(action, Node): | |
buffer += generate_grammar(grammar, action.grammar) | |
elif isinstance(action, Output): | |
buffer += action.text | |
else: | |
assert False, "Fail: %s" % (type(action),) | |
return buffer | |
if __name__ == '__main__': | |
# Feel free to change N to generate more cases | |
N = 1000 | |
for x in range(N): | |
email = generate_grammar(GRAMMAR_GRAPH, 'mail-from-cmd') | |
sys.stdout.write(email) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment