FLamparski/README.md

## README.md

      
    Raw
  

              README.md
            
          
    I'm messing about with generative context-free grammars in Python.
The generator itself is in gen_grammar.py, and the test grammar used is
in grammar.json.
Program usage

$ ./gen_grammar.py grammar.json will generate a single sentence from
the given grammar.
Grammar format

The grammar format is JSON, and is fairly simple. Look at the
included grammr.json file for an example.
Grammars must have an S symbol, which is used to start the process. After
that, you can name your symbols how you like. Considering this simple grammar:
{
  "S": [
    {"weight": 0.6, "production": ["a", {"var": "S"}, "a"]},
    {"weight": 0.4, "production": ["b"]}
  ]
}
Formally, it may be defined as:

S → a S a | b

The weight parameter affects how likely a given rule is to be chosen. All rules
for a given symbol must have weights in order for weighting to be used, else
a simple random.choice() is used.
Please note that there are no safeguards as to recursion depth.

  
## gen_grammar.py
#!/usr/bin/env python3

# usage: gen_grammar.py grammar.json

from json import load
from random import uniform, choice
from sys import argv

def all(xs, f):
    return len(xs) == len([x for x in xs if f(x)])

def grammar_choice(rules):
    if (len(rules) == 1):
        return rules[0]

    if not all(rules, lambda r: 'weight' in r):
        return choice(rules)

    total = sum(r['weight'] for r in rules)
    v = uniform(0, total)
    upto = 0
    for r in rules:
        if upto + r['weight'] >= v:
            return r
        else:
            upto += r['weight']

class Grammar:
    def __init__(self, file):
        self._grammar = load(file)

    @classmethod
    def from_file(self, path):
        with open(path) as f:
            return Grammar(f)

    def generate(self):
        def on_token(t):
            return t if isinstance(t, str) else walk(t['var'])
        def walk(symbol):
            rule = grammar_choice(self._grammar[symbol])
            return ' '.join(on_token(t) for t in rule['production'])
        return walk('S')

if __name__ == '__main__':
    print(Grammar.from_file(argv[1]).generate())

## grammar.json
{
  "S": [
    { "weight": 0.7, "production": ["This", "is", "a", {"var": "TEST"}, "grammar"] },
    { "weight": 0.3, "production": ["This", "grammar", "is", "a", {"var": "TEST"}] }
  ],
  "TEST": [
    { "production": ["test"] },
    { "production": ["sample"] },
    { "production": ["work", "in", "progress"] }
  ]
}
	#!/usr/bin/env python3

	# usage: gen_grammar.py grammar.json

	from json import load
	from random import uniform, choice
	from sys import argv

	def all(xs, f):
	return len(xs) == len([x for x in xs if f(x)])

	def grammar_choice(rules):
	if (len(rules) == 1):
	return rules[0]

	if not all(rules, lambda r: 'weight' in r):
	return choice(rules)

	total = sum(r['weight'] for r in rules)
	v = uniform(0, total)
	upto = 0
	for r in rules:
	if upto + r['weight'] >= v:
	return r
	else:
	upto += r['weight']

	class Grammar:
	def __init__(self, file):
	self._grammar = load(file)

	@classmethod
	def from_file(self, path):
	with open(path) as f:
	return Grammar(f)

	def generate(self):
	def on_token(t):
	return t if isinstance(t, str) else walk(t['var'])
	def walk(symbol):
	rule = grammar_choice(self._grammar[symbol])
	return ' '.join(on_token(t) for t in rule['production'])
	return walk('S')

	if __name__ == '__main__':
	print(Grammar.from_file(argv[1]).generate())
	{
	"S": [
	{ "weight": 0.7, "production": ["This", "is", "a", {"var": "TEST"}, "grammar"] },
	{ "weight": 0.3, "production": ["This", "grammar", "is", "a", {"var": "TEST"}] }
	],
	"TEST": [
	{ "production": ["test"] },
	{ "production": ["sample"] },
	{ "production": ["work", "in", "progress"] }
	]
	}