Skip to content

Instantly share code, notes, and snippets.

@vrthra
Last active July 27, 2021 11:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save vrthra/6bb0701e08a5db3a8a1ac2be1ee3f837 to your computer and use it in GitHub Desktop.
Save vrthra/6bb0701e08a5db3a8a1ac2be1ee3f837 to your computer and use it in GitHub Desktop.
Count strings of length n
grammar = {
'<start>': [['<expr>']],
'<expr>': [ ['<term>', '+', '<expr>'], ['<term>', '-', '<expr>'], ['<term>']],
'<term>': [ ['<fact>', '*', '<term>'], ['<fact>', '/', '<term>'], ['<fact>']],
'<fact>': [ ['<digits>'], ['(','<expr>',')']],
'<digits>': [ ['<digit>','<digits>'], ['<digit>']],
'<digit>': [["%s" % str(i)] for i in range(10)],
}
grammar1 = {
'<start>': [['<digits>']],
'<digits>': [ ['<digit>','<digits>'], ['<digit>']],
'<digit>': [["%s" % str(i)] for i in range(10)],
}
START = '<start>'
class RuleNode:
def __init__(self, key, tail, l_str, count):
self.key = key
self.tail = tail
self.l_str = l_str
self.count = count
assert count
def __str__(self):
return "head: %s tail: (%s) <%d> count:%d" % (repr(self.key.token), repr(self.tail), self.l_str, self.count)
def __repr__(self):
return "head: %s tail: (%s) <%d> count:%d" % (repr(self.key.token), repr(self.tail), self.l_str, self.count)
class KeyNode:
def __init__(self, token, l_str, count, rules):
self.token = token
self.l_str = l_str
self.count = count
self.rules = rules
def __str__(self):
return "key: %s <%d> count:%d" % (repr(self.token), self.l_str, self.count)
def __repr__(self):
return "key: %s <%d> count:%d" % (repr(self.token), self.l_str, self.count)
rule_strs = { }
key_strs = { }
EmptyKey = KeyNode(token=None, l_str=None, count=0, rules = None)
def get_def(key, grammar, l_str):
if (key, l_str) in key_strs: return key_strs[(key, l_str)]
if key not in grammar:
if l_str == len(key):
key_strs[(key, l_str)] = KeyNode(token=key, l_str=l_str, count=1, rules = [])
return key_strs[(key, l_str)]
else:
key_strs[(key, l_str)] = EmptyKey
return key_strs[(key, l_str)]
# number strings in definition = sum of number of strings in rules
rules = grammar[key]
s = []
count = 0
for rule in rules:
r_s = []
s_s = get_rules(rule, grammar, l_str) # returns RuleNode (should it return array?)
for s_ in s_s:
assert s_.count
count += s_.count
s.append(s_)
key_strs[(key, l_str)] = KeyNode(token=key, l_str=l_str, count=count, rules = s)
return key_strs[(key, l_str)]
def get_rules(rule_, g, l_str):
rule = tuple(rule_)
if not rule: return []
if (rule, l_str) in rule_strs: return rule_strs[(rule, l_str)]
token, *remaining_rules = rule
sum_rule = []
count = 0
for l_str_x in range(0, l_str+1): # inclusive. We should not care about epsilons because they do not contribute to count TODO
s_ = get_def(token, grammar, l_str_x) # returns KeyNode
if not s_.count: continue
if remaining_rules == []:
if (l_str - l_str_x) != 0: continue #-- no match here.
count += s_.count
rn = RuleNode(key=s_, tail=[], l_str=l_str_x, count=s_.count)
sum_rule.append(rn)
continue
rem = get_rules(remaining_rules, g, l_str - l_str_x) # this returns RuleNodes
#for s1 in s_:
# for sr in rem:
# sum_rule.append(s1 + sr)
count_ = 0
for r in rem:
count_ += s_.count * r.count
if count_:
count += count_
rn = RuleNode(key=s_, tail=rem, l_str=l_str_x, count=count_)
sum_rule.append(rn)
rule_strs[(rule, l_str)] = sum_rule
return rule_strs[(rule, l_str)]
def extract_strings_rule(rule_node):
# head, tail
strings = []
s_k = extract_strings_key(rule_node.key)
for rule in rule_node.tail:
s_t = extract_strings_rule(rule)
for s1 in s_k:
for s2 in s_t:
strings.append(s1 + s2)
if not rule_node.tail:
strings.extend(s_k)
return strings
def extract_strings_key(key_node):
# key node has a set of rules
if not key_node.rules: return [key_node.token]
strings = []
for rule in key_node.rules:
s = extract_strings_rule(rule)
if s:
strings.extend(s)
return strings
import sys
key_node = get_def('<start>', grammar, int(sys.argv[1]))
print(key_node.count)
strings = extract_strings_key(key_node)
print(strings)
grammar = {
'<start>': [['<expr>']],
'<expr>': [ ['<term>', '+', '<expr>'], ['<term>', '-', '<expr>'], ['<term>']],
'<term>': [ ['<fact>', '*', '<term>'], ['<fact>', '/', '<term>'], ['<fact>']],
'<fact>': [ ['<digits>'], ['(','<expr>',')']],
'<digits>': [ ['<digit>','<digits>'], ['<digit>']],
'<digit>': [["%s" % str(i)] for i in range(10)],
}
grammar1 = {
'<start>': [['<digits>']],
'<digits>': [ ['<digit>','<digits>'], ['<digit>']],
'<digit>': [["%s" % str(i)] for i in range(10)],
}
START = '<start>'
length_of_production = {
}
def get_number_of_strings_of_length_in_definition(key, grammar, l_str):
if key not in grammar:
if l_str == len(key):
return 1
else:
return 0
# number strings in definition = sum of number of strings in rules
rules = grammar[key]
s = 0
for rule in rules:
s_ = get_number_of_strings_of_length_in_rule(rule, grammar, l_str)
s += s_
return s
def get_number_of_strings_of_length_in_rule(rule, g, l_str):
# multiplication of lengths of each of elements
# now each token may go from 0 to l_str
if not rule: return 0
token, *rule_rem = rule
sum_rule = 0
for l_str_x in range(l_str+1): # inclusive
s_ = get_number_of_strings_of_length_in_definition(token, grammar, l_str_x)
if s_ == 0: continue
# no dice
if rule_rem == []:
if (l_str - l_str_x) != 0: continue
sum_rule += s_
continue
rem = get_number_of_strings_of_length_in_rule(rule_rem, g, l_str - l_str_x)
sum_rule += s_ * rem
return sum_rule
g = get_number_of_strings_of_length_in_definition('<start>', grammar, 5)
print(g)
grammar = {
'<start>': [['<expr>']],
'<expr>': [ ['<term>', '+', '<expr>'], ['<term>', '-', '<expr>'], ['<term>']],
'<term>': [ ['<fact>', '*', '<term>'], ['<fact>', '/', '<term>'], ['<fact>']],
'<fact>': [ ['<digits>'], ['(','<expr>',')']],
'<digits>': [ ['<digit>','<digits>'], ['<digit>']],
'<digit>': [["%s" % str(i)] for i in range(10)],
}
grammar1 = {
'<start>': [['<digits>']],
'<digits>': [ ['<digit>','<digits>'], ['<digit>']],
'<digit>': [["%s" % str(i)] for i in range(2)],
}
START = '<start>'
class RuleNode:
def __init__(self, key, tail, l_str, count):
self.key = key
self.tail = tail
self.l_str = l_str
self.count = count
assert count
def __str__(self):
return "head: %s tail: (%s) <%d> count:%d" % (repr(self.key.token), repr(self.tail), self.l_str, self.count)
def __repr__(self):
return "head: %s tail: (%s) <%d> count:%d" % (repr(self.key.token), repr(self.tail), self.l_str, self.count)
class KeyNode:
def __init__(self, token, l_str, count, rules):
self.token = token
self.l_str = l_str
self.count = count
self.rules = rules
def __str__(self):
return "key: %s <%d> count:%d" % (repr(self.token), self.l_str, self.count)
def __repr__(self):
return "key: %s <%d> count:%d" % (repr(self.token), self.l_str, self.count)
rule_strs = { }
key_strs = { }
EmptyKey = KeyNode(token=None, l_str=None, count=0, rules = None)
def get_def(key, grammar, l_str):
if (key, l_str) in key_strs: return key_strs[(key, l_str)]
if key not in grammar:
if l_str == len(key):
key_strs[(key, l_str)] = KeyNode(token=key, l_str=l_str, count=1, rules = [])
return key_strs[(key, l_str)]
else:
key_strs[(key, l_str)] = EmptyKey
return key_strs[(key, l_str)]
# number strings in definition = sum of number of strings in rules
rules = grammar[key]
s = []
count = 0
for rule in rules:
r_s = []
s_s = get_rules(rule, grammar, l_str) # returns RuleNode (should it return array?)
for s_ in s_s:
assert s_.count
count += s_.count
s.append(s_)
key_strs[(key, l_str)] = KeyNode(token=key, l_str=l_str, count=count, rules = s)
return key_strs[(key, l_str)]
def get_rules(rule_, g, l_str):
rule = tuple(rule_)
if not rule: return []
if (rule, l_str) in rule_strs: return rule_strs[(rule, l_str)]
token, *remaining_rules = rule
sum_rule = []
count = 0
for l_str_x in range(0, l_str+1): # inclusive. We should not care about epsilons because they do not contribute to count TODO
s_ = get_def(token, grammar, l_str_x) # returns KeyNode
if not s_.count: continue
if remaining_rules == []:
if (l_str - l_str_x) != 0: continue #-- no match here.
count += s_.count
rn = RuleNode(key=s_, tail=[], l_str=l_str_x, count=s_.count)
sum_rule.append(rn)
continue
rem = get_rules(remaining_rules, g, l_str - l_str_x) # this returns RuleNodes
#for s1 in s_:
# for sr in rem:
# sum_rule.append(s1 + sr)
count_ = 0
for r in rem:
count_ += s_.count * r.count
if count_:
count += count_
rn = RuleNode(key=s_, tail=rem, l_str=l_str_x, count=count_)
sum_rule.append(rn)
rule_strs[(rule, l_str)] = sum_rule
return rule_strs[(rule, l_str)]
def rule_extract_strings(rule_node):
# head, tail
strings = []
s_k = key_extract_strings(rule_node.key)
for rule in rule_node.tail:
s_t = rule_extract_strings(rule)
for s1 in s_k:
for s2 in s_t:
strings.append(s1 + s2)
if not rule_node.tail:
strings.extend(s_k)
return strings
def key_extract_strings(key_node):
# key node has a set of rules
if not key_node.rules: return [key_node.token]
strings = []
for rule in key_node.rules:
s = rule_extract_strings(rule)
strings.extend(s)
return strings
def key_get_len(key_node):
if not key_node.rules: return 1
slen = 0
for rule in key_node.rules:
s = rule_get_len(rule)
slen += s
return slen
def rule_get_len(rule_node):
slen = 0
s_k = key_get_len(rule_node.key)
for rule in rule_node.tail:
s_t = rule_get_len(rule)
slen = s_k * s_t
#for s1 in s_k:
# for s2 in s_t:
# slen += s1 + s2
if not rule_node.tail:
slen += s_k
return slen
def key_get_string_at(key_node, at):
assert at < key_node.count
if not key_node.rules: return key_node.token
at_ = 0
for rule in key_node.rules:
if at < (at_ + rule.count):
return rule_get_string_at(rule, at - at_)
else:
at_ += rule.count
return None
def rule_get_string_at(rule_node, at):
# first get the number of strings under
# key len_s_k
assert at < rule_node.count
if not rule_node.tail:
s_k = key_get_string_at(rule_node.key, at)
return s_k
#s_k = key_get_string_at(rule_node.key, 0)
#len_s_k = key_get_len(rule_node.key)
len_s_k = rule_node.key.count
at_ = 0
for rule in rule_node.tail:
for i in range(len_s_k):
if at < (at_ + rule.count):
s_k = key_get_string_at(rule_node.key, i)
return s_k + rule_get_string_at(rule, at - at_)
else:
at_ += rule.count
return None
import sys
at = int(sys.argv[2])
key_node = get_def('<start>', grammar, int(sys.argv[1]))
print("count:", key_node.count)
slen = key_get_len(key_node)
print("len(strting) = %d" % slen)
strings = key_extract_strings(key_node)
print("strting[%d]" % at, repr(strings[at]))
r = key_get_string_at(key_node, at)
print(r)
grammar = {
'<start>': [['<expr>']],
'<expr>': [ ['<term>', '+', '<expr>'], ['<term>', '-', '<expr>'], ['<term>']],
'<term>': [ ['<fact>', '*', '<term>'], ['<fact>', '/', '<term>'], ['<fact>']],
'<fact>': [ ['<digits>'], ['(','<expr>',')']],
'<digits>': [ ['<digit>','<digits>'], ['<digit>']],
'<digit>': [["%s" % str(i)] for i in range(10)],
}
grammar1 = {
'<start>': [['<digits>']],
'<digits>': [ ['<digit>','<digits>'], ['<digit>']],
'<digit>': [["%s" % str(i)] for i in range(10)],
}
START = '<start>'
length_of_production = {
}
def get_strings_of_length_in_definition(key, grammar, l_str):
if key not in grammar:
if l_str == len(key):
return [key]
else:
return []
# number strings in definition = sum of number of strings in rules
rules = grammar[key]
s = []
for rule in rules:
s_ = get_strings_of_length_in_rule(rule, grammar, l_str)
s.extend(s_)
return s
def get_strings_of_length_in_rule(rule, g, l_str):
# multiplication of lengths of each of elements
# now each token may go from 0 to l_str
if not rule: return []
token, *rule_rem = rule
sum_rule = []
for l_str_x in range(l_str+1): # inclusive
s_ = get_strings_of_length_in_definition(token, grammar, l_str_x)
if not s_: continue
# no dice
if rule_rem == []:
if (l_str - l_str_x) != 0: continue
sum_rule.extend(s_)
continue
rem = get_strings_of_length_in_rule(rule_rem, g, l_str - l_str_x)
for s1 in s_:
for sr in rem:
sum_rule.append(s1 + sr)
return sum_rule
g = get_strings_of_length_in_definition('<start>', grammar, 4)
print(g)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment