benhall-7/names.py

## names.py
import time

start_time = time.time()

f = open('names_1.txt', 'r')
names_1 = f.read().split("\n")  # List containing 10000 names
f.close()
# takes at most log(n) space, which is less than a second array
names_1.sort()

duplicates = []

def bsearch(arr, val):
    l = 0
    r = len(arr) - 1

    # check both endpoints before starting the loop
    if arr[l] > val or arr[r] < val:
        return False

    if arr[l] == val or arr[r] == val:
        return True

    # if we've made it this far, neither endpoint is equal to the value,
    # and the value may lie in between
    while r - l >= 2:
        mid = (l + r) // 2
        # by assigning only one endpoint at a time we avoid having to check
        # both each time we go through here. The else block is all we need
        if arr[mid] > val:
            r = mid
        elif arr[mid] < val:
            l = mid
        else:
            return True
    return False

with open('names_2.txt', 'r') as f:
    # iterate through the file instead of creating a second array
    for line in f:
        # don't allocate new strings. Use slice instead, and remove trailing newlines
        if line[-1] == "\n":
            name = line[0:-1]
        else:
            name = line[0:]
        # now that we have the name, binary search the sorted list, which is O(b*log(a))
        if bsearch(names_1, name):
            duplicates.append(name)

end_time = time.time()
print (f"{len(duplicates)} duplicates:\n\n{', '.join(duplicates)}\n\n")
print (f"runtime: {end_time - start_time} seconds")
	import time

	start_time = time.time()

	f = open('names_1.txt', 'r')
	names_1 = f.read().split("\n") # List containing 10000 names
	f.close()
	# takes at most log(n) space, which is less than a second array
	names_1.sort()

	duplicates = []

	def bsearch(arr, val):
	l = 0
	r = len(arr) - 1

	# check both endpoints before starting the loop
	if arr[l] > val or arr[r] < val:
	return False

	if arr[l] == val or arr[r] == val:
	return True

	# if we've made it this far, neither endpoint is equal to the value,
	# and the value may lie in between
	while r - l >= 2:
	mid = (l + r) // 2
	# by assigning only one endpoint at a time we avoid having to check
	# both each time we go through here. The else block is all we need
	if arr[mid] > val:
	r = mid
	elif arr[mid] < val:
	l = mid
	else:
	return True
	return False

	with open('names_2.txt', 'r') as f:
	# iterate through the file instead of creating a second array
	for line in f:
	# don't allocate new strings. Use slice instead, and remove trailing newlines
	if line[-1] == "\n":
	name = line[0:-1]
	else:
	name = line[0:]
	# now that we have the name, binary search the sorted list, which is O(b*log(a))
	if bsearch(names_1, name):
	duplicates.append(name)

	end_time = time.time()
	print (f"{len(duplicates)} duplicates:\n\n{', '.join(duplicates)}\n\n")
	print (f"runtime: {end_time - start_time} seconds")