Skip to content

Instantly share code, notes, and snippets.

@adamnew123456
Created January 17, 2017 21:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save adamnew123456/c2bcbb6a0a29db709158136140b4c7dd to your computer and use it in GitHub Desktop.
Save adamnew123456/c2bcbb6a0a29db709158136140b4c7dd to your computer and use it in GitHub Desktop.
431 - HW1 Positive-Case Generator
from collections import namedtuple
import random
import string
import sys
Node = namedtuple('Node', ['grammar'])
Output = namedtuple('Output', ['text'])
# All of these are easier to compute than to specify literally in the grammar
ALPHABET_NODES = [[Output(letter)] for letter in string.letters]
# This is a *very* liberal interpretation of the input grammar. It does say something along the lines
# of "all 128 ASCII characters except for <special-chars> and <SP>", and odd-beasts like NUL are
# definitely not in those sets.
CHAR_NODES = []
SPECIAL_CHARS = '\n \t<>()[]\\.,;:@"'
for byte in range(128):
if chr(byte) not in SPECIAL_CHARS:
CHAR_NODES.append([Output(chr(byte))])
DIGIT_NODES = [[Output(str(x))] for x in range(10)]
GRAMMAR_GRAPH = {
'mail-from-cmd': [
[Output('MAIL'), Node('whitespace'), Output('FROM:'),
Node('nullspace'), Node('reverse-path'), Node('nullspace'),
Node('CRLF')]
],
'whitespace': [
[Node('SP')],
[Node('SP'), Node('whitespace')]
],
'nullspace': [
[],
[Node('whitespace')]
],
'reverse-path': [[Node('path')]],
'path': [
[Output('<'), Node('mailbox'), Output('>')]
],
'mailbox': [
[Node('local-part'), Output('@'), Node('domain')]
],
'domain': [
[Node('element')],
[Node('element'), Output('.'), Node('domain')]
],
'element': [[Node('name')]],
'local-part': [[Node('string')]],
'name': [
[Node('a'), Node('let-dig-str')]
],
'let-dig-str': [
[Node('let-dig')],
[Node('let-dig'), Node('let-dig-str')]
],
'let-dig': [
[Node('a')],
[Node('d')]
],
'string': [
[Node('char')],
[Node('char'), Node('string')]
],
'char': [[Node('c')]],
'CRLF': [[Output('\n')]],
'SP': [
[Output(' ')],
[Output('\t')]
],
'a': ALPHABET_NODES,
'c': CHAR_NODES,
'd': DIGIT_NODES
}
def generate_grammar(grammar, starting_node):
"""Returns a string in the given grammar.
The grammar should be a dictionary, where keys are grammar production
names and values are lists-of-lists. Each top-level list represents
a group of alternative productions, while each inner list represents
the series of actions in that alternative.
Each action can be either 'Output' or 'Node' - 'Output' indicates that
the given character should be output as-is, while 'Node' indicates that
the generator should execute another production.
"""
choices = grammar[starting_node]
chosen = random.choice(choices)
buffer = ''
for action in chosen:
if isinstance(action, Node):
buffer += generate_grammar(grammar, action.grammar)
elif isinstance(action, Output):
buffer += action.text
else:
assert False, "Fail: %s" % (type(action),)
return buffer
if __name__ == '__main__':
# Feel free to change N to generate more cases
N = 1000
for x in range(N):
email = generate_grammar(GRAMMAR_GRAPH, 'mail-from-cmd')
sys.stdout.write(email)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment