Last active
August 16, 2016 09:27
-
-
Save gauteh/f3e761a14d529a05771e9802d92fa10d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/python | |
# | |
# Author: Gaute Hope <eg@gaute.vetsj.com> / 2016-08-12 | |
# | |
# Find all substrings in a string with the number of occurences of a set of | |
# characters pre-defined. | |
# | |
# A prime number is assigned to each character in the key set, all other | |
# characters are assigned 1. The product of the key set will match the product | |
# of the substring. Search the string with a running product to find substring | |
# matching the correct product. | |
# | |
charcount = { 'a': 3, 'b' : 1 }; | |
str = "kjhdfsbabasdadaaaaasdkaaajbajerhhayeom" | |
def find (c, s): | |
Ns = len (s) | |
C = list (c.keys ()) | |
D = list (c.values ()) | |
# prime numbers assigned to the first 25 chars | |
prmsi = [ 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89 , 97] | |
# primes used in the key, all other set to 1 | |
prms = [] | |
Cord = [ord(c) - ord('a') for c in C] | |
for e,p in enumerate(prmsi): | |
if e in Cord: | |
prms.append (p) | |
else: | |
prms.append (1) | |
# Product of match | |
T = 1 | |
for c,d in zip(C,D): | |
p = prms[ord (c) - ord('a')] | |
T *= p**d | |
print ("T=", T) | |
t = 1 # product of current string | |
f = 0 | |
i = 0 | |
matches = [] | |
mi = 0 | |
mn = Ns | |
mm = 0 | |
while i < Ns: | |
k = prms[ord(s[i]) - ord ('a')] | |
t *= k | |
print ("testing:", s[f:i+1]) | |
if (t > T): | |
# included too many chars: move start | |
t /= prms[ord(s[f]) - ord('a')] # remove first char, usually division by 1 | |
f += 1 # increment start position | |
t /= k # will be retested, could be replaced with bool | |
elif t == T: | |
# found match | |
print ("FOUND match:", s[f:i+1]) | |
matches.append (s[f:i+1]) | |
if (i - f) < mn: | |
mm = mi | |
mn = i - f | |
mi += 1 | |
t /= prms[ord(s[f]) - ord('a')] # remove first matching char | |
# look for next match | |
i += 1 | |
f += 1 | |
else: | |
# no match yet, keep searching | |
i += 1 | |
return (mm, matches) | |
print (find (charcount, str)) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment