gauteh/find_match_prime.py

## find_match_prime.py
#! /usr/bin/python
#
# Author: Gaute Hope <eg@gaute.vetsj.com> / 2016-08-12
#
# Find all substrings in a string with the number of occurences of a set of
# characters pre-defined.
#
# A prime number is assigned to each character in the key set, all other
# characters are assigned 1. The product of the key set will match the product
# of the substring. Search the string with a running product to find substring
# matching the correct product.
#


charcount = { 'a': 3, 'b' : 1 };
str = "kjhdfsbabasdadaaaaasdkaaajbajerhhayeom"

def find (c, s):
  Ns = len (s)

  C = list (c.keys ())
  D = list (c.values ())

  # prime numbers assigned to the first 25 chars
  prmsi = [ 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89 , 97]

  # primes used in the key, all other set to 1
  prms = []
  Cord = [ord(c) - ord('a') for c in C]

  for e,p in enumerate(prmsi):
    if e in Cord:
      prms.append (p)
    else:
      prms.append (1)

  # Product of match
  T = 1
  for c,d in zip(C,D):
    p = prms[ord (c) - ord('a')]
    T *= p**d

  print ("T=", T)

  t = 1 # product of current string
  f = 0
  i = 0

  matches = []
  mi = 0
  mn = Ns
  mm = 0

  while i < Ns:
    k = prms[ord(s[i]) - ord ('a')]
    t *= k

    print ("testing:", s[f:i+1])

    if (t > T):
      # included too many chars: move start
      t /= prms[ord(s[f]) - ord('a')] # remove first char, usually division by 1
      f += 1 # increment start position
      t /= k # will be retested, could be replaced with bool

    elif t == T:
      # found match
      print ("FOUND match:", s[f:i+1])
      matches.append (s[f:i+1])

      if (i - f) < mn:
        mm = mi
        mn = i - f

      mi += 1

      t /= prms[ord(s[f]) - ord('a')] # remove first matching char

      # look for next match
      i += 1
      f += 1

    else:
      # no match yet, keep searching
      i += 1

  return (mm, matches)


print (find (charcount, str))
	#! /usr/bin/python
	#
	# Author: Gaute Hope <eg@gaute.vetsj.com> / 2016-08-12
	#
	# Find all substrings in a string with the number of occurences of a set of
	# characters pre-defined.
	#
	# A prime number is assigned to each character in the key set, all other
	# characters are assigned 1. The product of the key set will match the product
	# of the substring. Search the string with a running product to find substring
	# matching the correct product.
	#


	charcount = { 'a': 3, 'b' : 1 };
	str = "kjhdfsbabasdadaaaaasdkaaajbajerhhayeom"

	def find (c, s):
	Ns = len (s)

	C = list (c.keys ())
	D = list (c.values ())

	# prime numbers assigned to the first 25 chars
	prmsi = [ 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89 , 97]

	# primes used in the key, all other set to 1
	prms = []
	Cord = [ord(c) - ord('a') for c in C]

	for e,p in enumerate(prmsi):
	if e in Cord:
	prms.append (p)
	else:
	prms.append (1)

	# Product of match
	T = 1
	for c,d in zip(C,D):
	p = prms[ord (c) - ord('a')]
	T = p*d

	print ("T=", T)

	t = 1 # product of current string
	f = 0
	i = 0

	matches = []
	mi = 0
	mn = Ns
	mm = 0

	while i < Ns:
	k = prms[ord(s[i]) - ord ('a')]
	t *= k

	print ("testing:", s[f:i+1])

	if (t > T):
	# included too many chars: move start
	t /= prms[ord(s[f]) - ord('a')] # remove first char, usually division by 1
	f += 1 # increment start position
	t /= k # will be retested, could be replaced with bool

	elif t == T:
	# found match
	print ("FOUND match:", s[f:i+1])
	matches.append (s[f:i+1])

	if (i - f) < mn:
	mm = mi
	mn = i - f

	mi += 1

	t /= prms[ord(s[f]) - ord('a')] # remove first matching char

	# look for next match
	i += 1
	f += 1

	else:
	# no match yet, keep searching
	i += 1

	return (mm, matches)


	print (find (charcount, str))