Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
substring probability, n successes in a row
#!/usr/bin/env python
import random
def id_mat(n):
return [[int(i == j) for j in xrange(n)] for i in xrange(n)]
def my_pow(x, n, mul_fun, one):
r = one
assert n >= 0 and int(n) == n
while n != 0:
if n % 2 == 1:
r = mul_fun(r, x)
x = mul_fun(x, x)
n = n // 2
return r
def mul(a, b):
sz = len(a)
assert sz == len(b)
return [
[ sum(a[i][k]*b[k][j] for k in xrange(sz)) for j in xrange(sz) ]
for i in xrange(sz)
]
def count_matrix(m, p, n):
res = my_pow(m, n, mul, id_mat(p))
return sum(row[0] for row in res)
def mul_num(m, b):
return [[b*x for x in row] for row in m]
def count_same(p, al, n):
m = [[al-1] * p] + [[int(j == i) for j in xrange(p)] for i in xrange(p-1)]
m = mul_num(m, 1.0 / al)
return count_matrix(m, p, n)
def count_dif(p, al, n):
m = [[al-2] * p] + [[1] * p] + [[int(j == i+1) for j in xrange(p)] for i in xrange(p-2)]
m[0][0] += 1
m = mul_num(m, 1.0 / al)
return count_matrix(m, p, n)
def make_word_matrix(w, al):
p = len(w)
m = [[0]*p for _i in xrange(p)]
#for every pair of prefixes (Pi, Pj), for every char 'c',
#write to matrix[i][j] number of transitions (Pj + C -> Pi),
#when full prefix w[0:len(w)] is forbidden
for j in xrange(p):
st = {}
end = 0
for i in xrange(1, j+2):
c = w[i-1]
end = (j == p-1) and (c == w[p-1])
if not end:
if (w[0:i-1] == w[j-i+1:j]):
st[c] = max(st.get(c, -1), i)
m[0][j] = al - (len(st) + end)
for c, i in st.iteritems():
m[i][j] = 1
return m
def count_word(w, al, n):
return count_matrix(make_word_matrix(w, al), len(w), n)
def count_word_prob(w, al, n):
return count_matrix(mul_num(make_word_matrix(w, al), 1.0/al), len(w), n)
def count_word_simple(w, al, n):
import itertools
def gen_alph(k):
return ''.join(chr(i) for i in xrange(ord('a'), ord('a') + k))
return sum((w not in ''.join(x)) for x in itertools.product(gen_alph(al), repeat=n))
def main():
#from math import log as ln
import os, sys
try:
_, word, alphabet_size, string_len = sys.argv
alphabet_size = int(alphabet_size)
string_len = int(string_len)
assert alphabet_size > 0
assert string_len >= 0
assert len(set(word)) <= alphabet_size, "there are more characters in word than in alphabet"
except:
print >> sys.stderr, (
'count number of word occurences in random strings, and probability\n'
'\n'
'usage: python %s word, alphabet_size, string_len\n'
'alphabet assumed to be "a, b, c, ..." of alphabet_size size\n'
'example: python %s "abab" 26 100\n' ) % tuple([os.path.basename(sys.argv[0])] * 2)
print >> sys.stderr
print >> sys.stderr
raise
print 1.0 - count_word_prob(word, alphabet_size, string_len)
#all = alphabet_size ** string_len
#print '%s/%s' % (all - count_word(word, alphabet_size, string_len), all)
#print 1-count_word_prob('abcd', 256, 2**32)
#print 1-count_word_prob('abab', 256, 2**32)
#print 1-count_word_prob('aaaa', 256, 2**32)
#test by trivial count_word_simple:
def gen_alph(k):
return ''.join(chr(i) for i in xrange(ord('a'), ord('a') + k))
import itertools
for wl in xrange(5):
for x in itertools.product(gen_alph(wl)):
w = ''.join(x)
assert count_word_simple(w, 4, 6) == count_word(w, 4, 6)
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.