Skip to content

Instantly share code, notes, and snippets.

@DrSkippy
Last active January 12, 2016 20:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save DrSkippy/8d90e0c75c2a3072efbc to your computer and use it in GitHub Desktop.
Save DrSkippy/8d90e0c75c2a3072efbc to your computer and use it in GitHub Desktop.
Simulate patterns in sequences of of random symbols
#!/usr/bin/env python
# Scott Hendrickson
# @drskippy
# On average, how long do we have to wait for the various patterns in a random sequence
import random
import itertools
import string
import sys
import numpy as np
from multiprocessing import Pool
from collections import defaultdict
from operator import itemgetter
if len(sys.argv) < 4:
print "Usage: ./sim.py <# of trials> <# symbols> <# patttern length>"
sys.exit()
# parameters
n_procs = 4
values = int(sys.argv[2])
n_sequence = int(sys.argv[3])
trials = int(sys.argv[1])/n_procs
# setup
p_set = set()
for x in itertools.combinations_with_replacement(string.letters[:values], n_sequence):
for y in itertools.permutations(x):
p_set.add("".join(y))
print "patterns: {} with {} trials in {} processes".format(p_set, trials, n_procs)
patterns = {pattern:[] for pattern in list(p_set)}
prob = 1./values
def worker(_patterns, _prob):
for t in range(trials):
l = 0
sequence = ['.' for i in range(n_sequence)]
while any([len(y) < t for x,y in _patterns.items()]):
r = random.random()
i = values
while r < i*_prob:
i -= 1
f = string.letters[i]
sequence.append(f)
sequence.pop(0)
l += 1
test_p = ''.join(sequence)
for p in _patterns:
if p == test_p:
if len(_patterns[p]) < t:
_patterns[p].append(l)
return _patterns
if __name__ == '__main__':
pool = Pool(processes=n_procs)
workers = [ pool.apply_async(worker, (patterns, prob)) for i in range(n_procs) ]
# combining dictionaries with same keys
patterns = defaultdict(list)
for d in [w.get() for w in workers]:
for key, value in d.iteritems():
patterns[key].extend(value)
# calculate stats
data = []
for p in patterns:
data.append([p
, np.average(patterns[p])
, np.std(patterns[p])
, int(round(np.average(patterns[p]),0))*"#"])
# simple output, sorted by average time
data.sort(key=itemgetter(1), reverse=True)
for d in data:
print "{}: avg: {:03.6f} std: {:03.6f} | {}".format(*d)
@DrSkippy
Copy link
Author

$ ./sim.py 1000000 2 2
patterns: set(['aa', 'ab', 'ba', 'bb']) with 250000 trials in 4 processes
bb: avg: 6.001282 std: 4.698673 | ######
aa: avg: 5.995649 std: 4.688103 | ######
ba: avg: 4.000496 std: 1.997509 | ####
ab: avg: 4.000023 std: 2.000249 | ####

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment