Skip to content

Instantly share code, notes, and snippets.

@elenzil
Forked from hyponymous/anagram_substring.py
Last active July 6, 2018 18:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save elenzil/18007a9825e435b8c3c47b4fe1ea81bb to your computer and use it in GitHub Desktop.
Save elenzil/18007a9825e435b8c3c47b4fe1ea81bb to your computer and use it in GitHub Desktop.
import copy
import time
# fun from Ernest!
#
# Given two strings s and t, determine whether some anagram of t is a substring of s.
# For example: if s = "udacity" and t = "ad", then the function returns True.
# Your function definition should look like: question1(s, t) and return a boolean True or False.
def q_ernest(s, t):
"""Ernest is learning python!"""
originalChecklist = {}
# Create checklist of characters
for c in t:
if c in originalChecklist:
originalChecklist[c] += 1
elif c not in s:
return False
else:
originalChecklist[c] = 1
# Variables for stuff
startChar = ''
checklist = originalChecklist.copy()
originalChecklistL = len(checklist)
checklistL = originalChecklistL
lenT = len(t)
remaining = lenT
iteration = 0
maxIterations = len(s) - lenT + 1
for c in s:
if c in checklist:
if(lenT == checklistL):
startChar = c
checklist[c] -= 1
if checklist[c] == 0:
del checklist[c]
checklistL -= 1
remaining -= 1
if remaining <= 0:
return True
else:
if(iteration >= maxIterations):
return False
if(c != startChar):
checklistL = originalChecklistL
checklist = originalChecklist.copy()
remaining = lenT
if c in checklist:
if(lenT == checklistL):
startChar = c
checklist[c] -= 1
if checklist[c] == 0:
del checklist[c]
checklistL -= 1
remaining -= 1
if remaining <= 0:
return True
iteration += 1
return False
def q_ernest2(s, t):
"""Abstract approach"""
# Create checklist of characters
# Basic check: if char in t not in s, return False
checklist = {}
for c in t:
if c not in s:
return False
if c not in checklist:
checklist[c] = 1
else:
checklist[c] += 1
startingCheckChar = t[0]
# print("Starting char: " + startingCheckChar)
checklist[startingCheckChar] -= 1
if(checklist[startingCheckChar] == 0):
del checklist[startingCheckChar]
i = 0
for c in s:
if c == startingCheckChar:
if(q_ernest2_check(i, i, 1, checklist.copy())):
return True
i += 1
return False
def q_ernest2_check(i, originalIndex, direction, modifiedChecklist):
if(direction == 1 and i + direction >= len(s)):
# print(" No more chars to the right")
return q_ernest2_check(originalIndex, originalIndex, -1, modifiedChecklist)
if(direction == -1 and i + direction < 0):
# print(" No more chars to the left. False!")
return False
# blah = 'right'
# if(direction == -1):
# blah = 'left'
# print(" checking " + blah + " (" + s[i + direction] + ") in " + str(modifiedChecklist))
charToCheck = s[i + direction]
if(charToCheck in modifiedChecklist):
modifiedChecklist[charToCheck] -= 1
if(modifiedChecklist[charToCheck] == 0):
del modifiedChecklist[charToCheck]
if(len(modifiedChecklist) <= 0):
# print(" Returning true!!!")
return True
# print(" checks out! New modifiedChecklist: " + str(modifiedChecklist))
return q_ernest2_check(i + direction, originalIndex, direction, modifiedChecklist)
else:
if(direction == 1):
return q_ernest2_check(originalIndex, originalIndex, -1, modifiedChecklist)
# print(" doesn't check out")
return False
def q_jonathan(s, t):
""" I tried to build this off of array.array with a width of 256 indexing by ord(c) but python is slow at making arrays :("""
counts = {}
for c in t:
if c in counts:
counts[c] += 1
else:
counts[c] = 1
current = counts.copy()
i = 0
jump = i
remaining = len(t)
while i < len(s):
c = s[i]
if current.get(c, 0) <= 0:
i = jump
jump += 1
if remaining != len(t):
current = counts.copy()
remaining = len(t)
continue
current[c] -= 1
remaining -= 1
i += 1
if remaining == 0:
return True
return False
def q_jonathan2(s, t):
counts = {}
for c in t:
if c in counts:
counts[c] += 1
else:
counts[c] = 1
current = counts.copy()
i = 0
remaining = len(t)
while i < len(s):
c = s[i]
if current.get(c, 0) <= 0:
if remaining != len(t):
if s[i - (len(t)-remaining)] != c:
current = counts.copy()
remaining = len(t)
i += 1
continue
current[c] -= 1
remaining -= 1
i += 1
if remaining == 0:
return True
return False
def q_jonathan3(s, t):
"""Kinda silly level of optimizations hear. Not easy to undestand."""
counts = {}
for c in t:
if c in counts:
counts[c] += 1
else:
counts[c] = 1
current = counts.copy()
i = 0
j = len(t) - 1
remaining = len(t)
reset = False
while j < len(s):
c = s[j]
if current.get(c, 0) <= 0:
i = j + 1
j += len(t)
if reset is True:
current = counts.copy()
remaining = len(t)
else:
reset = True
continue
current[c] -= 1
j -= 1
remaining -= 1
if remaining == 0:
return True
return False
def q_mike(s, t):
def add_to_hist(c, hist):
hist[c] = 1 if c not in hist else hist[c] + 1
def count_mismatches(a_hist, b_hist):
return len([1 for c in all_chars if a_hist[c] != b_hist[c]])
all_chars = {}
for c in t:
all_chars[c] = 0
for c in s:
all_chars[c] = 0
t_hist = copy.deepcopy(all_chars)
for c in t:
add_to_hist(c, t_hist)
window_hist = copy.deepcopy(all_chars)
num_mismatches = 0
for (i, c) in enumerate(s):
# first len(t) characters: build histogram
if i < len(t):
add_to_hist(c, window_hist)
if i == len(t) - 1:
# initialize num_mismatches
num_mismatches = count_mismatches(t_hist, window_hist)
# sliding window phase
else:
if num_mismatches == 0:
return True
# shift sliding window
left_c = s[i - len(t)]
pre_left_match = t_hist[left_c] == window_hist[left_c]
pre_right_match = t_hist[c] == window_hist[c]
window_hist[left_c] -= 1
window_hist[c] += 1
post_left_match = t_hist[left_c] == window_hist[left_c]
post_right_match = t_hist[c] == window_hist[c]
if pre_left_match and not post_left_match:
num_mismatches += 1
elif not pre_left_match and post_left_match:
num_mismatches -= 1
if pre_right_match and not post_right_match:
num_mismatches += 1
elif not pre_right_match and post_right_match:
num_mismatches -= 1
return num_mismatches == 0
# mike's approach coded up by orion.
# approach here is to slide a window from left to right along s.
# at the beginning we build a histogram of the window,
# and compare how many _KEYS_ in the histogram of s are correct for the histogram of t.
# we notice that each time we slide the window, one character leaves and one joins.
# based on that we:
# 1) update the histogram of the window
# 2) update the number of correct keys in the window
def q_mike_o(s, t):
def histogram_make(s):
# return a histogram of the characters of s
ret = {}
for c in s:
if not c in ret:
ret[c] = 0
ret[c] += 1
return ret
def histogram_correct_for_key(does_this, match_this, for_this_key):
return (for_this_key in match_this) and (for_this_key in does_this) and (does_this[for_this_key] == match_this[for_this_key])
# number of KEYS which have the same values
def histogram_compare(a, b):
ret = 0
for c in b:
if histogram_correct_for_key(a, b, c):
ret += 1
return ret
# build histogram of t. this will remain static after this.
# O(t)
histogram_t = histogram_make(t)
# build histogram of the initial window. this will update.
# O(t)
histogram_s = histogram_make(s[:len(t)])
# target number of correct KEYS we're looking for.
# note this is keys/buckets, not individual letters.
# so if t were say "aaaaaa", the 'correct' value we'd be looking for would be 1.
# O(1)
trg_correct = len(histogram_t)
# calculate number of correct keys in the window histogram.
# O(t)
num_correct = histogram_compare(histogram_s, histogram_t)
if num_correct == trg_correct:
return True
# slide the window
# O(s - t)
for s_start in range(1, len(s) - len(t) + 1):
s_start_old = s_start - 1
char_old = s[s_start_old]
char_new = s[s_start + len(t) - 1]
# compute change in correct count due to losing old character
if char_old in histogram_t:
if histogram_s[char_old] == histogram_t[char_old]:
# it was correct and now no longer is
num_correct -= 1
if histogram_s[char_old] == histogram_t[char_old] + 1:
# it was too large by one and now is equal
num_correct += 1
# remove old
histogram_s[char_old] -= 1
# add new
if not char_new in histogram_s:
histogram_s[char_new] = 0
histogram_s[char_new] += 1
# compute change in correct count due to gaining new character
if char_new in histogram_t:
if histogram_s[char_new] == histogram_t[char_new]:
# it is now correct, so therefore was not before
num_correct += 1
if histogram_s[char_new] == histogram_t[char_new] + 1:
# it is now too large by one, so it used to be correct
num_correct -= 1
if num_correct == trg_correct:
return True
return False
# approach by orion.
# thanks to steve for pointing out error in backing up.
# i think this is no longer O(Ns) because we back-up by O(Nt).
def q_orion(s, t):
# look thru s for the start of a possible anagram
looking_good = 0
# turn t into a map where the key is the letter and the value is the number of occurrences
t_map = {}
for c in t:
if not c in t_map:
t_map[c] = 0
t_map[c] += 1
working_map = t_map.copy()
n = 0
while n < len(s):
c = s[n]
if c in working_map:
looking_good += 1
working_map[c] -= 1
if working_map[c] == 0:
del working_map[c]
if len(working_map) == 0:
return True
else:
if looking_good > 0:
working_map = t_map.copy()
n -= looking_good
looking_good = 0
n += 1
return len(working_map) == 0
from collections import Counter
# approach by steve
def q_steve(s,t):
# create frequency count (O(N))
tCount = Counter(t)
for i in range(len(s)-len(t)+1):
if (Counter(s[i:i+len(t)]) == tCount):
return True
return False
examples = [
['udacity' , 'ad' ],
['udacity' , 'da' ],
['green glass door' , 'glass' ],
['udacity' , 'adi' ],
['udacity' , 'ytic' ],
['udacity' , 'ytc' ],
['udacity' , 'udacity' ],
['udacity' , 'cityuda' ],
['blep' , 'ee' ],
['bleeper' , 'ee' ],
['bleeper' , 'eee' ],
['bleeper' , 'eeep' ],
['eep' , 'ep' ],
['abcadef' , 'bcad' ],
['asdf asdfadfasd asd asd' , 'p' ],
['asdf asdpfadfasd asd asd', 'pp' ],
['asdf asdpfadfasd asd asd', 'pppppppppppp'],
['pppplplpl' , 'pl' ],
['qlwkjr q;lwerjkq;wlkjkqwjeqkwekqw oqwh oqiuyr evlnasefb aowhr pqwhr lahs lfash','ahfq ohq ow ofwh qwfhwqeofh'],
['abcdefghijklmnopqrstuvwxyzz_aabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvw','abcdefghijklmnopqrstuvwxyz_'],
['jbqatsopxhwniropahdyvgogjlmkjkqgcdxxcwvarvpiijklmnopfrsfbqbmdnweuzmtefeltulihz0sck0yun','ijklmnop'],
['123412344321432113542' , '12345' ],
]
# add a long example where t is not in s
# the values 100 and 38 are about as large as i can make them w/o a recursion overflow in q_ernest2.
alphabet = 'abcdefghijklmnopqrstuvwxyz'
long_s = ''
long_t = ''
long_s += '_.'
for _ in range(100):
long_s += alphabet
long_t += '_'
for _ in range(38):
long_t += alphabet
examples.append([long_s, long_t])
# add a long example where t is in s
# the values 100 and 38 are about as large as i can make them w/o a recursion overflow in q_ernest2.
alphabet = 'abcdefghijklmnopqrstuvwxyz'
long_s = ''
long_t = ''
for _ in range(100):
long_s += alphabet
long_s += '_'
for _ in range(38):
long_t += alphabet
long_t += '_'
examples.append([long_s, long_t])
def do_trial(num_iters, s, t, correct_answer, impl_name, implementation):
answer = None
t1 = time.time()
for _ in range(num_iters):
answer = implementation(s, t)
t2 = time.time()
dt = t2 - t1
error_callout = "" if (answer == correct_answer) else " <----------------- ?"
print("%3s %d x %d: %5s, %6dms%s" % (impl_name, len(s), len(t), str(answer), (int)(dt * 1000.0), error_callout))
print('----- TRIAL RUN IN PROGRESS -----')
for ex in examples:
s = ex[0]
t = ex[1]
correct_answer = q_steve(s, t)
ss = s # if len(s) < 80 else ("[ %d chars ]" % (len(s)))
tt = t # if len(t) < 80 else ("[ %d chars ]" % (len(t)))
num_iters = 10
print("len(s): %3d unique(s): %3d s: '%s'" % (len(s), len(Counter(s)), ss))
print("len(t): %3d unique(t): %3d t: '%s'" % (len(t), len(Counter(t)), tt))
print("num iterations: %d" % (num_iters))
do_trial(num_iters, s, t, correct_answer, "o" , q_orion )
do_trial(num_iters, s, t, correct_answer, "s" , q_steve )
do_trial(num_iters, s, t, correct_answer, "m" , q_mike )
do_trial(num_iters, s, t, correct_answer, "m_o", q_mike_o )
do_trial(num_iters, s, t, correct_answer, "j" , q_jonathan )
do_trial(num_iters, s, t, correct_answer, "j2" , q_jonathan2 )
do_trial(num_iters, s, t, correct_answer, "j3" , q_jonathan3 )
do_trial(num_iters, s, t, correct_answer, "e" , q_ernest )
do_trial(num_iters, s, t, correct_answer, "e2" , q_ernest2 )
print("")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment