Last active
July 27, 2021 11:49
-
-
Save vrthra/6bb0701e08a5db3a8a1ac2be1ee3f837 to your computer and use it in GitHub Desktop.
Count strings of length n
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
grammar = { | |
'<start>': [['<expr>']], | |
'<expr>': [ ['<term>', '+', '<expr>'], ['<term>', '-', '<expr>'], ['<term>']], | |
'<term>': [ ['<fact>', '*', '<term>'], ['<fact>', '/', '<term>'], ['<fact>']], | |
'<fact>': [ ['<digits>'], ['(','<expr>',')']], | |
'<digits>': [ ['<digit>','<digits>'], ['<digit>']], | |
'<digit>': [["%s" % str(i)] for i in range(10)], | |
} | |
grammar1 = { | |
'<start>': [['<digits>']], | |
'<digits>': [ ['<digit>','<digits>'], ['<digit>']], | |
'<digit>': [["%s" % str(i)] for i in range(10)], | |
} | |
START = '<start>' | |
class RuleNode: | |
def __init__(self, key, tail, l_str, count): | |
self.key = key | |
self.tail = tail | |
self.l_str = l_str | |
self.count = count | |
assert count | |
def __str__(self): | |
return "head: %s tail: (%s) <%d> count:%d" % (repr(self.key.token), repr(self.tail), self.l_str, self.count) | |
def __repr__(self): | |
return "head: %s tail: (%s) <%d> count:%d" % (repr(self.key.token), repr(self.tail), self.l_str, self.count) | |
class KeyNode: | |
def __init__(self, token, l_str, count, rules): | |
self.token = token | |
self.l_str = l_str | |
self.count = count | |
self.rules = rules | |
def __str__(self): | |
return "key: %s <%d> count:%d" % (repr(self.token), self.l_str, self.count) | |
def __repr__(self): | |
return "key: %s <%d> count:%d" % (repr(self.token), self.l_str, self.count) | |
rule_strs = { } | |
key_strs = { } | |
EmptyKey = KeyNode(token=None, l_str=None, count=0, rules = None) | |
def get_def(key, grammar, l_str): | |
if (key, l_str) in key_strs: return key_strs[(key, l_str)] | |
if key not in grammar: | |
if l_str == len(key): | |
key_strs[(key, l_str)] = KeyNode(token=key, l_str=l_str, count=1, rules = []) | |
return key_strs[(key, l_str)] | |
else: | |
key_strs[(key, l_str)] = EmptyKey | |
return key_strs[(key, l_str)] | |
# number strings in definition = sum of number of strings in rules | |
rules = grammar[key] | |
s = [] | |
count = 0 | |
for rule in rules: | |
r_s = [] | |
s_s = get_rules(rule, grammar, l_str) # returns RuleNode (should it return array?) | |
for s_ in s_s: | |
assert s_.count | |
count += s_.count | |
s.append(s_) | |
key_strs[(key, l_str)] = KeyNode(token=key, l_str=l_str, count=count, rules = s) | |
return key_strs[(key, l_str)] | |
def get_rules(rule_, g, l_str): | |
rule = tuple(rule_) | |
if not rule: return [] | |
if (rule, l_str) in rule_strs: return rule_strs[(rule, l_str)] | |
token, *remaining_rules = rule | |
sum_rule = [] | |
count = 0 | |
for l_str_x in range(0, l_str+1): # inclusive. We should not care about epsilons because they do not contribute to count TODO | |
s_ = get_def(token, grammar, l_str_x) # returns KeyNode | |
if not s_.count: continue | |
if remaining_rules == []: | |
if (l_str - l_str_x) != 0: continue #-- no match here. | |
count += s_.count | |
rn = RuleNode(key=s_, tail=[], l_str=l_str_x, count=s_.count) | |
sum_rule.append(rn) | |
continue | |
rem = get_rules(remaining_rules, g, l_str - l_str_x) # this returns RuleNodes | |
#for s1 in s_: | |
# for sr in rem: | |
# sum_rule.append(s1 + sr) | |
count_ = 0 | |
for r in rem: | |
count_ += s_.count * r.count | |
if count_: | |
count += count_ | |
rn = RuleNode(key=s_, tail=rem, l_str=l_str_x, count=count_) | |
sum_rule.append(rn) | |
rule_strs[(rule, l_str)] = sum_rule | |
return rule_strs[(rule, l_str)] | |
def extract_strings_rule(rule_node): | |
# head, tail | |
strings = [] | |
s_k = extract_strings_key(rule_node.key) | |
for rule in rule_node.tail: | |
s_t = extract_strings_rule(rule) | |
for s1 in s_k: | |
for s2 in s_t: | |
strings.append(s1 + s2) | |
if not rule_node.tail: | |
strings.extend(s_k) | |
return strings | |
def extract_strings_key(key_node): | |
# key node has a set of rules | |
if not key_node.rules: return [key_node.token] | |
strings = [] | |
for rule in key_node.rules: | |
s = extract_strings_rule(rule) | |
if s: | |
strings.extend(s) | |
return strings | |
import sys | |
key_node = get_def('<start>', grammar, int(sys.argv[1])) | |
print(key_node.count) | |
strings = extract_strings_key(key_node) | |
print(strings) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
grammar = { | |
'<start>': [['<expr>']], | |
'<expr>': [ ['<term>', '+', '<expr>'], ['<term>', '-', '<expr>'], ['<term>']], | |
'<term>': [ ['<fact>', '*', '<term>'], ['<fact>', '/', '<term>'], ['<fact>']], | |
'<fact>': [ ['<digits>'], ['(','<expr>',')']], | |
'<digits>': [ ['<digit>','<digits>'], ['<digit>']], | |
'<digit>': [["%s" % str(i)] for i in range(10)], | |
} | |
grammar1 = { | |
'<start>': [['<digits>']], | |
'<digits>': [ ['<digit>','<digits>'], ['<digit>']], | |
'<digit>': [["%s" % str(i)] for i in range(10)], | |
} | |
START = '<start>' | |
length_of_production = { | |
} | |
def get_number_of_strings_of_length_in_definition(key, grammar, l_str): | |
if key not in grammar: | |
if l_str == len(key): | |
return 1 | |
else: | |
return 0 | |
# number strings in definition = sum of number of strings in rules | |
rules = grammar[key] | |
s = 0 | |
for rule in rules: | |
s_ = get_number_of_strings_of_length_in_rule(rule, grammar, l_str) | |
s += s_ | |
return s | |
def get_number_of_strings_of_length_in_rule(rule, g, l_str): | |
# multiplication of lengths of each of elements | |
# now each token may go from 0 to l_str | |
if not rule: return 0 | |
token, *rule_rem = rule | |
sum_rule = 0 | |
for l_str_x in range(l_str+1): # inclusive | |
s_ = get_number_of_strings_of_length_in_definition(token, grammar, l_str_x) | |
if s_ == 0: continue | |
# no dice | |
if rule_rem == []: | |
if (l_str - l_str_x) != 0: continue | |
sum_rule += s_ | |
continue | |
rem = get_number_of_strings_of_length_in_rule(rule_rem, g, l_str - l_str_x) | |
sum_rule += s_ * rem | |
return sum_rule | |
g = get_number_of_strings_of_length_in_definition('<start>', grammar, 5) | |
print(g) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
grammar = { | |
'<start>': [['<expr>']], | |
'<expr>': [ ['<term>', '+', '<expr>'], ['<term>', '-', '<expr>'], ['<term>']], | |
'<term>': [ ['<fact>', '*', '<term>'], ['<fact>', '/', '<term>'], ['<fact>']], | |
'<fact>': [ ['<digits>'], ['(','<expr>',')']], | |
'<digits>': [ ['<digit>','<digits>'], ['<digit>']], | |
'<digit>': [["%s" % str(i)] for i in range(10)], | |
} | |
grammar1 = { | |
'<start>': [['<digits>']], | |
'<digits>': [ ['<digit>','<digits>'], ['<digit>']], | |
'<digit>': [["%s" % str(i)] for i in range(2)], | |
} | |
START = '<start>' | |
class RuleNode: | |
def __init__(self, key, tail, l_str, count): | |
self.key = key | |
self.tail = tail | |
self.l_str = l_str | |
self.count = count | |
assert count | |
def __str__(self): | |
return "head: %s tail: (%s) <%d> count:%d" % (repr(self.key.token), repr(self.tail), self.l_str, self.count) | |
def __repr__(self): | |
return "head: %s tail: (%s) <%d> count:%d" % (repr(self.key.token), repr(self.tail), self.l_str, self.count) | |
class KeyNode: | |
def __init__(self, token, l_str, count, rules): | |
self.token = token | |
self.l_str = l_str | |
self.count = count | |
self.rules = rules | |
def __str__(self): | |
return "key: %s <%d> count:%d" % (repr(self.token), self.l_str, self.count) | |
def __repr__(self): | |
return "key: %s <%d> count:%d" % (repr(self.token), self.l_str, self.count) | |
rule_strs = { } | |
key_strs = { } | |
EmptyKey = KeyNode(token=None, l_str=None, count=0, rules = None) | |
def get_def(key, grammar, l_str): | |
if (key, l_str) in key_strs: return key_strs[(key, l_str)] | |
if key not in grammar: | |
if l_str == len(key): | |
key_strs[(key, l_str)] = KeyNode(token=key, l_str=l_str, count=1, rules = []) | |
return key_strs[(key, l_str)] | |
else: | |
key_strs[(key, l_str)] = EmptyKey | |
return key_strs[(key, l_str)] | |
# number strings in definition = sum of number of strings in rules | |
rules = grammar[key] | |
s = [] | |
count = 0 | |
for rule in rules: | |
r_s = [] | |
s_s = get_rules(rule, grammar, l_str) # returns RuleNode (should it return array?) | |
for s_ in s_s: | |
assert s_.count | |
count += s_.count | |
s.append(s_) | |
key_strs[(key, l_str)] = KeyNode(token=key, l_str=l_str, count=count, rules = s) | |
return key_strs[(key, l_str)] | |
def get_rules(rule_, g, l_str): | |
rule = tuple(rule_) | |
if not rule: return [] | |
if (rule, l_str) in rule_strs: return rule_strs[(rule, l_str)] | |
token, *remaining_rules = rule | |
sum_rule = [] | |
count = 0 | |
for l_str_x in range(0, l_str+1): # inclusive. We should not care about epsilons because they do not contribute to count TODO | |
s_ = get_def(token, grammar, l_str_x) # returns KeyNode | |
if not s_.count: continue | |
if remaining_rules == []: | |
if (l_str - l_str_x) != 0: continue #-- no match here. | |
count += s_.count | |
rn = RuleNode(key=s_, tail=[], l_str=l_str_x, count=s_.count) | |
sum_rule.append(rn) | |
continue | |
rem = get_rules(remaining_rules, g, l_str - l_str_x) # this returns RuleNodes | |
#for s1 in s_: | |
# for sr in rem: | |
# sum_rule.append(s1 + sr) | |
count_ = 0 | |
for r in rem: | |
count_ += s_.count * r.count | |
if count_: | |
count += count_ | |
rn = RuleNode(key=s_, tail=rem, l_str=l_str_x, count=count_) | |
sum_rule.append(rn) | |
rule_strs[(rule, l_str)] = sum_rule | |
return rule_strs[(rule, l_str)] | |
def rule_extract_strings(rule_node): | |
# head, tail | |
strings = [] | |
s_k = key_extract_strings(rule_node.key) | |
for rule in rule_node.tail: | |
s_t = rule_extract_strings(rule) | |
for s1 in s_k: | |
for s2 in s_t: | |
strings.append(s1 + s2) | |
if not rule_node.tail: | |
strings.extend(s_k) | |
return strings | |
def key_extract_strings(key_node): | |
# key node has a set of rules | |
if not key_node.rules: return [key_node.token] | |
strings = [] | |
for rule in key_node.rules: | |
s = rule_extract_strings(rule) | |
strings.extend(s) | |
return strings | |
def key_get_len(key_node): | |
if not key_node.rules: return 1 | |
slen = 0 | |
for rule in key_node.rules: | |
s = rule_get_len(rule) | |
slen += s | |
return slen | |
def rule_get_len(rule_node): | |
slen = 0 | |
s_k = key_get_len(rule_node.key) | |
for rule in rule_node.tail: | |
s_t = rule_get_len(rule) | |
slen = s_k * s_t | |
#for s1 in s_k: | |
# for s2 in s_t: | |
# slen += s1 + s2 | |
if not rule_node.tail: | |
slen += s_k | |
return slen | |
def key_get_string_at(key_node, at): | |
assert at < key_node.count | |
if not key_node.rules: return key_node.token | |
at_ = 0 | |
for rule in key_node.rules: | |
if at < (at_ + rule.count): | |
return rule_get_string_at(rule, at - at_) | |
else: | |
at_ += rule.count | |
return None | |
def rule_get_string_at(rule_node, at): | |
# first get the number of strings under | |
# key len_s_k | |
assert at < rule_node.count | |
if not rule_node.tail: | |
s_k = key_get_string_at(rule_node.key, at) | |
return s_k | |
#s_k = key_get_string_at(rule_node.key, 0) | |
#len_s_k = key_get_len(rule_node.key) | |
len_s_k = rule_node.key.count | |
at_ = 0 | |
for rule in rule_node.tail: | |
for i in range(len_s_k): | |
if at < (at_ + rule.count): | |
s_k = key_get_string_at(rule_node.key, i) | |
return s_k + rule_get_string_at(rule, at - at_) | |
else: | |
at_ += rule.count | |
return None | |
import sys | |
at = int(sys.argv[2]) | |
key_node = get_def('<start>', grammar, int(sys.argv[1])) | |
print("count:", key_node.count) | |
slen = key_get_len(key_node) | |
print("len(strting) = %d" % slen) | |
strings = key_extract_strings(key_node) | |
print("strting[%d]" % at, repr(strings[at])) | |
r = key_get_string_at(key_node, at) | |
print(r) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
grammar = { | |
'<start>': [['<expr>']], | |
'<expr>': [ ['<term>', '+', '<expr>'], ['<term>', '-', '<expr>'], ['<term>']], | |
'<term>': [ ['<fact>', '*', '<term>'], ['<fact>', '/', '<term>'], ['<fact>']], | |
'<fact>': [ ['<digits>'], ['(','<expr>',')']], | |
'<digits>': [ ['<digit>','<digits>'], ['<digit>']], | |
'<digit>': [["%s" % str(i)] for i in range(10)], | |
} | |
grammar1 = { | |
'<start>': [['<digits>']], | |
'<digits>': [ ['<digit>','<digits>'], ['<digit>']], | |
'<digit>': [["%s" % str(i)] for i in range(10)], | |
} | |
START = '<start>' | |
length_of_production = { | |
} | |
def get_strings_of_length_in_definition(key, grammar, l_str): | |
if key not in grammar: | |
if l_str == len(key): | |
return [key] | |
else: | |
return [] | |
# number strings in definition = sum of number of strings in rules | |
rules = grammar[key] | |
s = [] | |
for rule in rules: | |
s_ = get_strings_of_length_in_rule(rule, grammar, l_str) | |
s.extend(s_) | |
return s | |
def get_strings_of_length_in_rule(rule, g, l_str): | |
# multiplication of lengths of each of elements | |
# now each token may go from 0 to l_str | |
if not rule: return [] | |
token, *rule_rem = rule | |
sum_rule = [] | |
for l_str_x in range(l_str+1): # inclusive | |
s_ = get_strings_of_length_in_definition(token, grammar, l_str_x) | |
if not s_: continue | |
# no dice | |
if rule_rem == []: | |
if (l_str - l_str_x) != 0: continue | |
sum_rule.extend(s_) | |
continue | |
rem = get_strings_of_length_in_rule(rule_rem, g, l_str - l_str_x) | |
for s1 in s_: | |
for sr in rem: | |
sum_rule.append(s1 + sr) | |
return sum_rule | |
g = get_strings_of_length_in_definition('<start>', grammar, 4) | |
print(g) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment