-
-
Save elenzil/18007a9825e435b8c3c47b4fe1ea81bb to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import copy | |
import time | |
# fun from Ernest! | |
# | |
# Given two strings s and t, determine whether some anagram of t is a substring of s. | |
# For example: if s = "udacity" and t = "ad", then the function returns True. | |
# Your function definition should look like: question1(s, t) and return a boolean True or False. | |
def q_ernest(s, t): | |
"""Ernest is learning python!""" | |
originalChecklist = {} | |
# Create checklist of characters | |
for c in t: | |
if c in originalChecklist: | |
originalChecklist[c] += 1 | |
elif c not in s: | |
return False | |
else: | |
originalChecklist[c] = 1 | |
# Variables for stuff | |
startChar = '' | |
checklist = originalChecklist.copy() | |
originalChecklistL = len(checklist) | |
checklistL = originalChecklistL | |
lenT = len(t) | |
remaining = lenT | |
iteration = 0 | |
maxIterations = len(s) - lenT + 1 | |
for c in s: | |
if c in checklist: | |
if(lenT == checklistL): | |
startChar = c | |
checklist[c] -= 1 | |
if checklist[c] == 0: | |
del checklist[c] | |
checklistL -= 1 | |
remaining -= 1 | |
if remaining <= 0: | |
return True | |
else: | |
if(iteration >= maxIterations): | |
return False | |
if(c != startChar): | |
checklistL = originalChecklistL | |
checklist = originalChecklist.copy() | |
remaining = lenT | |
if c in checklist: | |
if(lenT == checklistL): | |
startChar = c | |
checklist[c] -= 1 | |
if checklist[c] == 0: | |
del checklist[c] | |
checklistL -= 1 | |
remaining -= 1 | |
if remaining <= 0: | |
return True | |
iteration += 1 | |
return False | |
def q_ernest2(s, t): | |
"""Abstract approach""" | |
# Create checklist of characters | |
# Basic check: if char in t not in s, return False | |
checklist = {} | |
for c in t: | |
if c not in s: | |
return False | |
if c not in checklist: | |
checklist[c] = 1 | |
else: | |
checklist[c] += 1 | |
startingCheckChar = t[0] | |
# print("Starting char: " + startingCheckChar) | |
checklist[startingCheckChar] -= 1 | |
if(checklist[startingCheckChar] == 0): | |
del checklist[startingCheckChar] | |
i = 0 | |
for c in s: | |
if c == startingCheckChar: | |
if(q_ernest2_check(i, i, 1, checklist.copy())): | |
return True | |
i += 1 | |
return False | |
def q_ernest2_check(i, originalIndex, direction, modifiedChecklist): | |
if(direction == 1 and i + direction >= len(s)): | |
# print(" No more chars to the right") | |
return q_ernest2_check(originalIndex, originalIndex, -1, modifiedChecklist) | |
if(direction == -1 and i + direction < 0): | |
# print(" No more chars to the left. False!") | |
return False | |
# blah = 'right' | |
# if(direction == -1): | |
# blah = 'left' | |
# print(" checking " + blah + " (" + s[i + direction] + ") in " + str(modifiedChecklist)) | |
charToCheck = s[i + direction] | |
if(charToCheck in modifiedChecklist): | |
modifiedChecklist[charToCheck] -= 1 | |
if(modifiedChecklist[charToCheck] == 0): | |
del modifiedChecklist[charToCheck] | |
if(len(modifiedChecklist) <= 0): | |
# print(" Returning true!!!") | |
return True | |
# print(" checks out! New modifiedChecklist: " + str(modifiedChecklist)) | |
return q_ernest2_check(i + direction, originalIndex, direction, modifiedChecklist) | |
else: | |
if(direction == 1): | |
return q_ernest2_check(originalIndex, originalIndex, -1, modifiedChecklist) | |
# print(" doesn't check out") | |
return False | |
def q_jonathan(s, t): | |
""" I tried to build this off of array.array with a width of 256 indexing by ord(c) but python is slow at making arrays :(""" | |
counts = {} | |
for c in t: | |
if c in counts: | |
counts[c] += 1 | |
else: | |
counts[c] = 1 | |
current = counts.copy() | |
i = 0 | |
jump = i | |
remaining = len(t) | |
while i < len(s): | |
c = s[i] | |
if current.get(c, 0) <= 0: | |
i = jump | |
jump += 1 | |
if remaining != len(t): | |
current = counts.copy() | |
remaining = len(t) | |
continue | |
current[c] -= 1 | |
remaining -= 1 | |
i += 1 | |
if remaining == 0: | |
return True | |
return False | |
def q_jonathan2(s, t): | |
counts = {} | |
for c in t: | |
if c in counts: | |
counts[c] += 1 | |
else: | |
counts[c] = 1 | |
current = counts.copy() | |
i = 0 | |
remaining = len(t) | |
while i < len(s): | |
c = s[i] | |
if current.get(c, 0) <= 0: | |
if remaining != len(t): | |
if s[i - (len(t)-remaining)] != c: | |
current = counts.copy() | |
remaining = len(t) | |
i += 1 | |
continue | |
current[c] -= 1 | |
remaining -= 1 | |
i += 1 | |
if remaining == 0: | |
return True | |
return False | |
def q_jonathan3(s, t): | |
"""Kinda silly level of optimizations hear. Not easy to undestand.""" | |
counts = {} | |
for c in t: | |
if c in counts: | |
counts[c] += 1 | |
else: | |
counts[c] = 1 | |
current = counts.copy() | |
i = 0 | |
j = len(t) - 1 | |
remaining = len(t) | |
reset = False | |
while j < len(s): | |
c = s[j] | |
if current.get(c, 0) <= 0: | |
i = j + 1 | |
j += len(t) | |
if reset is True: | |
current = counts.copy() | |
remaining = len(t) | |
else: | |
reset = True | |
continue | |
current[c] -= 1 | |
j -= 1 | |
remaining -= 1 | |
if remaining == 0: | |
return True | |
return False | |
def q_mike(s, t): | |
def add_to_hist(c, hist): | |
hist[c] = 1 if c not in hist else hist[c] + 1 | |
def count_mismatches(a_hist, b_hist): | |
return len([1 for c in all_chars if a_hist[c] != b_hist[c]]) | |
all_chars = {} | |
for c in t: | |
all_chars[c] = 0 | |
for c in s: | |
all_chars[c] = 0 | |
t_hist = copy.deepcopy(all_chars) | |
for c in t: | |
add_to_hist(c, t_hist) | |
window_hist = copy.deepcopy(all_chars) | |
num_mismatches = 0 | |
for (i, c) in enumerate(s): | |
# first len(t) characters: build histogram | |
if i < len(t): | |
add_to_hist(c, window_hist) | |
if i == len(t) - 1: | |
# initialize num_mismatches | |
num_mismatches = count_mismatches(t_hist, window_hist) | |
# sliding window phase | |
else: | |
if num_mismatches == 0: | |
return True | |
# shift sliding window | |
left_c = s[i - len(t)] | |
pre_left_match = t_hist[left_c] == window_hist[left_c] | |
pre_right_match = t_hist[c] == window_hist[c] | |
window_hist[left_c] -= 1 | |
window_hist[c] += 1 | |
post_left_match = t_hist[left_c] == window_hist[left_c] | |
post_right_match = t_hist[c] == window_hist[c] | |
if pre_left_match and not post_left_match: | |
num_mismatches += 1 | |
elif not pre_left_match and post_left_match: | |
num_mismatches -= 1 | |
if pre_right_match and not post_right_match: | |
num_mismatches += 1 | |
elif not pre_right_match and post_right_match: | |
num_mismatches -= 1 | |
return num_mismatches == 0 | |
# mike's approach coded up by orion. | |
# approach here is to slide a window from left to right along s. | |
# at the beginning we build a histogram of the window, | |
# and compare how many _KEYS_ in the histogram of s are correct for the histogram of t. | |
# we notice that each time we slide the window, one character leaves and one joins. | |
# based on that we: | |
# 1) update the histogram of the window | |
# 2) update the number of correct keys in the window | |
def q_mike_o(s, t): | |
def histogram_make(s): | |
# return a histogram of the characters of s | |
ret = {} | |
for c in s: | |
if not c in ret: | |
ret[c] = 0 | |
ret[c] += 1 | |
return ret | |
def histogram_correct_for_key(does_this, match_this, for_this_key): | |
return (for_this_key in match_this) and (for_this_key in does_this) and (does_this[for_this_key] == match_this[for_this_key]) | |
# number of KEYS which have the same values | |
def histogram_compare(a, b): | |
ret = 0 | |
for c in b: | |
if histogram_correct_for_key(a, b, c): | |
ret += 1 | |
return ret | |
# build histogram of t. this will remain static after this. | |
# O(t) | |
histogram_t = histogram_make(t) | |
# build histogram of the initial window. this will update. | |
# O(t) | |
histogram_s = histogram_make(s[:len(t)]) | |
# target number of correct KEYS we're looking for. | |
# note this is keys/buckets, not individual letters. | |
# so if t were say "aaaaaa", the 'correct' value we'd be looking for would be 1. | |
# O(1) | |
trg_correct = len(histogram_t) | |
# calculate number of correct keys in the window histogram. | |
# O(t) | |
num_correct = histogram_compare(histogram_s, histogram_t) | |
if num_correct == trg_correct: | |
return True | |
# slide the window | |
# O(s - t) | |
for s_start in range(1, len(s) - len(t) + 1): | |
s_start_old = s_start - 1 | |
char_old = s[s_start_old] | |
char_new = s[s_start + len(t) - 1] | |
# compute change in correct count due to losing old character | |
if char_old in histogram_t: | |
if histogram_s[char_old] == histogram_t[char_old]: | |
# it was correct and now no longer is | |
num_correct -= 1 | |
if histogram_s[char_old] == histogram_t[char_old] + 1: | |
# it was too large by one and now is equal | |
num_correct += 1 | |
# remove old | |
histogram_s[char_old] -= 1 | |
# add new | |
if not char_new in histogram_s: | |
histogram_s[char_new] = 0 | |
histogram_s[char_new] += 1 | |
# compute change in correct count due to gaining new character | |
if char_new in histogram_t: | |
if histogram_s[char_new] == histogram_t[char_new]: | |
# it is now correct, so therefore was not before | |
num_correct += 1 | |
if histogram_s[char_new] == histogram_t[char_new] + 1: | |
# it is now too large by one, so it used to be correct | |
num_correct -= 1 | |
if num_correct == trg_correct: | |
return True | |
return False | |
# approach by orion. | |
# thanks to steve for pointing out error in backing up. | |
# i think this is no longer O(Ns) because we back-up by O(Nt). | |
def q_orion(s, t): | |
# look thru s for the start of a possible anagram | |
looking_good = 0 | |
# turn t into a map where the key is the letter and the value is the number of occurrences | |
t_map = {} | |
for c in t: | |
if not c in t_map: | |
t_map[c] = 0 | |
t_map[c] += 1 | |
working_map = t_map.copy() | |
n = 0 | |
while n < len(s): | |
c = s[n] | |
if c in working_map: | |
looking_good += 1 | |
working_map[c] -= 1 | |
if working_map[c] == 0: | |
del working_map[c] | |
if len(working_map) == 0: | |
return True | |
else: | |
if looking_good > 0: | |
working_map = t_map.copy() | |
n -= looking_good | |
looking_good = 0 | |
n += 1 | |
return len(working_map) == 0 | |
from collections import Counter | |
# approach by steve | |
def q_steve(s,t): | |
# create frequency count (O(N)) | |
tCount = Counter(t) | |
for i in range(len(s)-len(t)+1): | |
if (Counter(s[i:i+len(t)]) == tCount): | |
return True | |
return False | |
examples = [ | |
['udacity' , 'ad' ], | |
['udacity' , 'da' ], | |
['green glass door' , 'glass' ], | |
['udacity' , 'adi' ], | |
['udacity' , 'ytic' ], | |
['udacity' , 'ytc' ], | |
['udacity' , 'udacity' ], | |
['udacity' , 'cityuda' ], | |
['blep' , 'ee' ], | |
['bleeper' , 'ee' ], | |
['bleeper' , 'eee' ], | |
['bleeper' , 'eeep' ], | |
['eep' , 'ep' ], | |
['abcadef' , 'bcad' ], | |
['asdf asdfadfasd asd asd' , 'p' ], | |
['asdf asdpfadfasd asd asd', 'pp' ], | |
['asdf asdpfadfasd asd asd', 'pppppppppppp'], | |
['pppplplpl' , 'pl' ], | |
['qlwkjr q;lwerjkq;wlkjkqwjeqkwekqw oqwh oqiuyr evlnasefb aowhr pqwhr lahs lfash','ahfq ohq ow ofwh qwfhwqeofh'], | |
['abcdefghijklmnopqrstuvwxyzz_aabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvw','abcdefghijklmnopqrstuvwxyz_'], | |
['jbqatsopxhwniropahdyvgogjlmkjkqgcdxxcwvarvpiijklmnopfrsfbqbmdnweuzmtefeltulihz0sck0yun','ijklmnop'], | |
['123412344321432113542' , '12345' ], | |
] | |
# add a long example where t is not in s | |
# the values 100 and 38 are about as large as i can make them w/o a recursion overflow in q_ernest2. | |
alphabet = 'abcdefghijklmnopqrstuvwxyz' | |
long_s = '' | |
long_t = '' | |
long_s += '_.' | |
for _ in range(100): | |
long_s += alphabet | |
long_t += '_' | |
for _ in range(38): | |
long_t += alphabet | |
examples.append([long_s, long_t]) | |
# add a long example where t is in s | |
# the values 100 and 38 are about as large as i can make them w/o a recursion overflow in q_ernest2. | |
alphabet = 'abcdefghijklmnopqrstuvwxyz' | |
long_s = '' | |
long_t = '' | |
for _ in range(100): | |
long_s += alphabet | |
long_s += '_' | |
for _ in range(38): | |
long_t += alphabet | |
long_t += '_' | |
examples.append([long_s, long_t]) | |
def do_trial(num_iters, s, t, correct_answer, impl_name, implementation): | |
answer = None | |
t1 = time.time() | |
for _ in range(num_iters): | |
answer = implementation(s, t) | |
t2 = time.time() | |
dt = t2 - t1 | |
error_callout = "" if (answer == correct_answer) else " <----------------- ?" | |
print("%3s %d x %d: %5s, %6dms%s" % (impl_name, len(s), len(t), str(answer), (int)(dt * 1000.0), error_callout)) | |
print('----- TRIAL RUN IN PROGRESS -----') | |
for ex in examples: | |
s = ex[0] | |
t = ex[1] | |
correct_answer = q_steve(s, t) | |
ss = s # if len(s) < 80 else ("[ %d chars ]" % (len(s))) | |
tt = t # if len(t) < 80 else ("[ %d chars ]" % (len(t))) | |
num_iters = 10 | |
print("len(s): %3d unique(s): %3d s: '%s'" % (len(s), len(Counter(s)), ss)) | |
print("len(t): %3d unique(t): %3d t: '%s'" % (len(t), len(Counter(t)), tt)) | |
print("num iterations: %d" % (num_iters)) | |
do_trial(num_iters, s, t, correct_answer, "o" , q_orion ) | |
do_trial(num_iters, s, t, correct_answer, "s" , q_steve ) | |
do_trial(num_iters, s, t, correct_answer, "m" , q_mike ) | |
do_trial(num_iters, s, t, correct_answer, "m_o", q_mike_o ) | |
do_trial(num_iters, s, t, correct_answer, "j" , q_jonathan ) | |
do_trial(num_iters, s, t, correct_answer, "j2" , q_jonathan2 ) | |
do_trial(num_iters, s, t, correct_answer, "j3" , q_jonathan3 ) | |
do_trial(num_iters, s, t, correct_answer, "e" , q_ernest ) | |
do_trial(num_iters, s, t, correct_answer, "e2" , q_ernest2 ) | |
print("") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment