hyponymous/anagram_substring.py

## anagram_substring.py
import copy

# fun from Ernest!
#
# Given two strings s and t, determine whether some anagram of t is a substring of s.
# For example: if s = "udacity" and t = "ad", then the function returns True.
# Your function definition should look like: question1(s, t) and return a boolean True or False.

def q_mike(s, t):
    def add_to_hist(c, hist):
        hist[c] = 1 if c not in hist else hist[c] + 1

    def count_mismatches(a_hist, b_hist):
        return len([1 for c in all_chars if a_hist[c] != b_hist[c]])

    all_chars = {}
    for c in t:
        all_chars[c] = 0
    for c in s:
        all_chars[c] = 0

    t_hist = copy.deepcopy(all_chars)
    for c in t:
        add_to_hist(c, t_hist)

    window_hist = copy.deepcopy(all_chars)

    num_mismatches = 0
    for (i, c) in enumerate(s):
        # first len(t) characters: build histogram
        if i < len(t):
            add_to_hist(c, window_hist)
            if i == len(t) - 1:
                # initialize num_mismatches
                num_mismatches = count_mismatches(t_hist, window_hist)

        # sliding window phase
        else:
            if num_mismatches == 0:
                return True

            # shift sliding window
            left_c = s[i - len(t)]

            pre_left_match = t_hist[left_c] == window_hist[left_c]
            pre_right_match = t_hist[c] == window_hist[c]

            window_hist[left_c] -= 1
            window_hist[c] += 1

            post_left_match = t_hist[left_c] == window_hist[left_c]
            post_right_match = t_hist[c] == window_hist[c]

            if pre_left_match and not post_left_match:
                num_mismatches += 1
            elif not pre_left_match and post_left_match:
                num_mismatches -= 1
            if pre_right_match and not post_right_match:
                num_mismatches += 1
            elif not pre_right_match and post_right_match:
                num_mismatches -= 1

    return num_mismatches == 0

# approach by orion.
# thanks to steve for pointing out error in backing up.
# i think this is no longer O(Ns) because we back-up by O(Nt).
def q_orion(s, t):
  # look thru s for the start of a possible anagram
  looking_good = 0

  # turn t into a map where the key is the letter and the value is the number of occurrences
  t_map = {}
  for c in t:
    if not c in t_map:
      t_map[c] = 0
    t_map[c] += 1

  working_map = copy.deepcopy(t_map)

  n = 0
  while n < len(s):
    c = s[n]
    if c in working_map:
      looking_good += 1
      working_map[c] -= 1
      if working_map[c] == 0:
        del working_map[c]

      if len(working_map) == 0:
        return True
    else:
      if looking_good > 0:
        working_map = copy.deepcopy(t_map)
        n -= looking_good
      looking_good = 0
    n += 1

  return len(working_map) == 0

from collections import Counter

# approach by steve
def q_steve(s,t):
    # create frequency count (O(N))
    tCount = Counter(t)
    for i in range(len(s)-len(t)+1):
        if (Counter(s[i:i+len(t)]) == tCount):
            return True
    return False

examples = [
  ['udacity', 'ad'     ],
  ['udacity', 'adi'    ],
  ['udacity', 'ytic'   ],
  ['udacity', 'ytc'    ],
  ['udacity', 'udacity'],
  ['udacity', 'cityuda'],
  ['blep'   , 'ee'     ],
  ['bleeper', 'ee'     ],
  ['bleeper', 'eee'    ],
  ['bleeper', 'eeep'   ],
  ['eep'    , 'ep'     ],
  ['abcadef','bcad'    ],
]

for ex in examples:
  s = ex[0]
  t = ex[1]
  print("o %20s in %20s ? %s" % (t, s, q_orion(s, t)))
  print("s %20s in %20s ? %s" % (t, s, q_steve(s, t)))
  print("m %20s in %20s ? %s" % (t, s, q_mike(s, t)))

 # output
 #########################################
 #      ad in              udacity ? True
 #     adi in              udacity ? False
 #    ytic in              udacity ? True
 #     ytc in              udacity ? False
 # udacity in              udacity ? True
 # cityuda in              udacity ? True
 #      ee in                 blep ? False
 #      ee in              bleeper ? True
 #     eee in              bleeper ? False
 #    eeep in              bleeper ? True
 #      ep in                  eep ? True
	import copy

	# fun from Ernest!
	#
	# Given two strings s and t, determine whether some anagram of t is a substring of s.
	# For example: if s = "udacity" and t = "ad", then the function returns True.
	# Your function definition should look like: question1(s, t) and return a boolean True or False.

	def q_mike(s, t):
	def add_to_hist(c, hist):
	hist[c] = 1 if c not in hist else hist[c] + 1

	def count_mismatches(a_hist, b_hist):
	return len([1 for c in all_chars if a_hist[c] != b_hist[c]])

	all_chars = {}
	for c in t:
	all_chars[c] = 0
	for c in s:
	all_chars[c] = 0

	t_hist = copy.deepcopy(all_chars)
	for c in t:
	add_to_hist(c, t_hist)

	window_hist = copy.deepcopy(all_chars)

	num_mismatches = 0
	for (i, c) in enumerate(s):
	# first len(t) characters: build histogram
	if i < len(t):
	add_to_hist(c, window_hist)
	if i == len(t) - 1:
	# initialize num_mismatches
	num_mismatches = count_mismatches(t_hist, window_hist)

	# sliding window phase
	else:
	if num_mismatches == 0:
	return True

	# shift sliding window
	left_c = s[i - len(t)]

	pre_left_match = t_hist[left_c] == window_hist[left_c]
	pre_right_match = t_hist[c] == window_hist[c]

	window_hist[left_c] -= 1
	window_hist[c] += 1

	post_left_match = t_hist[left_c] == window_hist[left_c]
	post_right_match = t_hist[c] == window_hist[c]

	if pre_left_match and not post_left_match:
	num_mismatches += 1
	elif not pre_left_match and post_left_match:
	num_mismatches -= 1
	if pre_right_match and not post_right_match:
	num_mismatches += 1
	elif not pre_right_match and post_right_match:
	num_mismatches -= 1

	return num_mismatches == 0

	# approach by orion.
	# thanks to steve for pointing out error in backing up.
	# i think this is no longer O(Ns) because we back-up by O(Nt).
	def q_orion(s, t):
	# look thru s for the start of a possible anagram
	looking_good = 0

	# turn t into a map where the key is the letter and the value is the number of occurrences
	t_map = {}
	for c in t:
	if not c in t_map:
	t_map[c] = 0
	t_map[c] += 1

	working_map = copy.deepcopy(t_map)

	n = 0
	while n < len(s):
	c = s[n]
	if c in working_map:
	looking_good += 1
	working_map[c] -= 1
	if working_map[c] == 0:
	del working_map[c]

	if len(working_map) == 0:
	return True
	else:
	if looking_good > 0:
	working_map = copy.deepcopy(t_map)
	n -= looking_good
	looking_good = 0
	n += 1

	return len(working_map) == 0

	from collections import Counter

	# approach by steve
	def q_steve(s,t):
	# create frequency count (O(N))
	tCount = Counter(t)
	for i in range(len(s)-len(t)+1):
	if (Counter(s[i:i+len(t)]) == tCount):
	return True
	return False

	examples = [
	['udacity', 'ad' ],
	['udacity', 'adi' ],
	['udacity', 'ytic' ],
	['udacity', 'ytc' ],
	['udacity', 'udacity'],
	['udacity', 'cityuda'],
	['blep' , 'ee' ],
	['bleeper', 'ee' ],
	['bleeper', 'eee' ],
	['bleeper', 'eeep' ],
	['eep' , 'ep' ],
	['abcadef','bcad' ],
	]

	for ex in examples:
	s = ex[0]
	t = ex[1]
	print("o %20s in %20s ? %s" % (t, s, q_orion(s, t)))
	print("s %20s in %20s ? %s" % (t, s, q_steve(s, t)))
	print("m %20s in %20s ? %s" % (t, s, q_mike(s, t)))

	# output
	#########################################
	# ad in udacity ? True
	# adi in udacity ? False
	# ytic in udacity ? True
	# ytc in udacity ? False
	# udacity in udacity ? True
	# cityuda in udacity ? True
	# ee in blep ? False
	# ee in bleeper ? True
	# eee in bleeper ? False
	# eeep in bleeper ? True
	# ep in eep ? True