Skip to content

Instantly share code, notes, and snippets.

@gauteh
Last active August 16, 2016 09:27
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gauteh/f3e761a14d529a05771e9802d92fa10d to your computer and use it in GitHub Desktop.
Save gauteh/f3e761a14d529a05771e9802d92fa10d to your computer and use it in GitHub Desktop.
#! /usr/bin/python
#
# Author: Gaute Hope <eg@gaute.vetsj.com> / 2016-08-12
#
# Find all substrings in a string with the number of occurences of a set of
# characters pre-defined.
#
# A prime number is assigned to each character in the key set, all other
# characters are assigned 1. The product of the key set will match the product
# of the substring. Search the string with a running product to find substring
# matching the correct product.
#
charcount = { 'a': 3, 'b' : 1 };
str = "kjhdfsbabasdadaaaaasdkaaajbajerhhayeom"
def find (c, s):
Ns = len (s)
C = list (c.keys ())
D = list (c.values ())
# prime numbers assigned to the first 25 chars
prmsi = [ 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89 , 97]
# primes used in the key, all other set to 1
prms = []
Cord = [ord(c) - ord('a') for c in C]
for e,p in enumerate(prmsi):
if e in Cord:
prms.append (p)
else:
prms.append (1)
# Product of match
T = 1
for c,d in zip(C,D):
p = prms[ord (c) - ord('a')]
T *= p**d
print ("T=", T)
t = 1 # product of current string
f = 0
i = 0
matches = []
mi = 0
mn = Ns
mm = 0
while i < Ns:
k = prms[ord(s[i]) - ord ('a')]
t *= k
print ("testing:", s[f:i+1])
if (t > T):
# included too many chars: move start
t /= prms[ord(s[f]) - ord('a')] # remove first char, usually division by 1
f += 1 # increment start position
t /= k # will be retested, could be replaced with bool
elif t == T:
# found match
print ("FOUND match:", s[f:i+1])
matches.append (s[f:i+1])
if (i - f) < mn:
mm = mi
mn = i - f
mi += 1
t /= prms[ord(s[f]) - ord('a')] # remove first matching char
# look for next match
i += 1
f += 1
else:
# no match yet, keep searching
i += 1
return (mm, matches)
print (find (charcount, str))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment