Skip to content

Instantly share code, notes, and snippets.

@coffeemakr
Last active August 29, 2015 14:07
Show Gist options
  • Save coffeemakr/9265b0e9537d145498f4 to your computer and use it in GitHub Desktop.
Save coffeemakr/9265b0e9537d145498f4 to your computer and use it in GitHub Desktop.
Search for repetitions
from collections import OrderedDict
from math import sqrt
import re
encrypted = "Something or some thing?"
#encrypted = re.sub('[^A-Z0-9+/\']', '', encrypted)
#print(encrypted)
primes = len(encrypted)
def find_repetitions(s, ignore_space=True):
patterns = {}
for i in xrange(len(s)):
for l in xrange(1, len(s[i:])+1):
p = s[i:i+l]
if ignore_space:
if s[i] == ' ':
continue
p = p.rstrip()
if p and not p in patterns:
cnt = s[i:].count(p)
if cnt > 1:
patterns[p] = cnt
result = {}
for pat, cnt in patterns.items():
if not True in [(pat in p and p != pat) for p in patterns]:
result[pat] = cnt
return result
def get_distance(s, p):
result = [m.start() for m in re.finditer(re.escape(p), s)]
return [(result[i+1] - start) for i, start in enumerate(result[:-1])]
def factorise(n):
pFact, limit, check, num = [], int(sqrt(n)) + 1, 2, n
if n == 1:
return [1]
for check in xrange(2, limit):
while num % check == 0:
pFact.append(check)
num /= check
if num > 1:
pFact.append(num)
return pFact
res = find_repetitions(encrypted)
print(len(res))
print(res) # to compare
by_length = OrderedDict(sorted(res.items(), key=lambda x:len(x[0]),reverse=True))
by_count = OrderedDict(sorted(res.items(), key=lambda x:x[1]),reverse=True)
for s, c in by_length.items():
dist = get_distance(encrypted, s)
print("%-10r: %-5d distances: %-28s -- %s" % (s,c, dist, ' - '.join(map(str, map(factorise, dist)))))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment