gsinclair/week3.py

## week3.py
import random
import time

# 'timed' runs a function several times and prints the amount
# of time taken in seconds.
def timed(name, f, ntimes):
  start = time.time()
  for i in range(ntimes):
    f()
  elapsed = time.time() - start
  print("%s (%d): %f" % (name, ntimes, elapsed))

# N1 is a massive list. N2 is a massive set.
N1 = [n for n in range(1,10000001)]
N2 = set(N1)

# The code below is to demonstrate that looking for an item
# in a large list quite slow, but looking for an item in a
# large list is instant.

def find_in_list():
  x = random.randint(-5000000, 15000000)
  return x in N1

def find_in_set():
  x = random.randint(-5000000, 15000000)
  return x in N2

def time1():
  timed("List", find_in_list, 10)

def time2():
  timed("Set", find_in_set, 10)

# -----------------------------------------------------

# Looking for an item in a list is an O(n) operation,
# where n is the size of the list. This means that as the
# list grows longer, the lookup time gets longer as well.

# Looking for an item in a set is either:
#  * an O(1) operation [constant time]   OR
#  * an O(log n) operation [blindingly fast]
#
# It depends how the set is implemented.

# -----------------------------------------------------

# An example of a wonderful Python feature called a list
# comprehension.
xs = [n+1 for n in range(100) if n % 3 == 0]

# A long-form equivalent of the list comprehension above
# would be:
xs = []
for n in range(100):
  if n % 3 == 0:
    xs.append(n+1)

# Here is a definition of factors(n) that uses a list comprehension
# instead of the explicit loop we have used before.
def factors(n):
  max_factor = n // 2 + 1
  factors = [f for f in range(1,max_factor) if n % f == 0]
  factors.append(n)
  return factors

# -----------------------------------------------------

# And now we have the highest common factor of a and b using
# set intersection. This is faster than the code we wrote before,
# which used lists instead of sets.
def hcf_set(a,b):
  fa = set(factors(a))
  fb = set(factors(b))
  common = fa.intersection(fb)
  return max(common)

# And now the fastest hcf or gcd of all. This is Euclid's algorithm,
# which finds the GCD of two numbers by breaking it down into remainders,
# which are necessarily smaller.
#
# Here is a great example of the algorithm. Consider 305 and 1040.
#   1040 = 3(305) + 125
#    305 = 2(125) +  55
#    125 = 2( 55) +  15
#     55 = 3( 15) +  10
#     15 = 1( 10) +   5
#     10 = 2(  5) +   0     --> so the GCD is 5
#
# Here is the same one written recursively, which is clearer.
#     gcd(305, 1040)
#   = gcd(1040, 305)       (need the larger number first)
#   = gcd( 305, 125)
#   = gcd( 125,  55)
#   = gcd(  55,  15)
#   = gcd(  15,  10)
#   = gcd(  10,   5)
#   = gcd(   5,   0)       (now we know we have an answer)
#   = 5
#
# Compare this to our old way of finding HCF(305, 1040):
#  * the factors of 305 are {1, 5, 61, 305}
#  * the factors of 1040 are {1, 2, 4, 5, 8, 10, 13, 16, 20, 26, 40,
#                             52, 65, 80, 104, 130, 208, 260, 520, 1040}
#    - a lot of work goes into finding these factors
#  * the common factors are {1,5}
#  * the max is 5
#
# There is overhead in creating lists and sets. The Euclidean algorithm
# has none of that overhead: it simply does about 6 modulo calculations
# and that's it!
def gcd(a,b):
  if b == 0:
    # We can't break it down any more. We're done.
    return a
  elif a < b:
    # Need to put the numbers in the right order.
    return gcd(b,a)
  elif a == b:
    # gcd(14,14) == 14. We're done.
    return a
  else:
    return gcd(b, a % b)

# Question for the audience. There is a little bit of the code
# in the function above that is unnecessary. What is it?
	import random
	import time

	# 'timed' runs a function several times and prints the amount
	# of time taken in seconds.
	def timed(name, f, ntimes):
	start = time.time()
	for i in range(ntimes):
	f()
	elapsed = time.time() - start
	print("%s (%d): %f" % (name, ntimes, elapsed))

	# N1 is a massive list. N2 is a massive set.
	N1 = [n for n in range(1,10000001)]
	N2 = set(N1)

	# The code below is to demonstrate that looking for an item
	# in a large list quite slow, but looking for an item in a
	# large list is instant.

	def find_in_list():
	x = random.randint(-5000000, 15000000)
	return x in N1

	def find_in_set():
	x = random.randint(-5000000, 15000000)
	return x in N2

	def time1():
	timed("List", find_in_list, 10)

	def time2():
	timed("Set", find_in_set, 10)

	# -----------------------------------------------------

	# Looking for an item in a list is an O(n) operation,
	# where n is the size of the list. This means that as the
	# list grows longer, the lookup time gets longer as well.

	# Looking for an item in a set is either:
	# * an O(1) operation [constant time] OR
	# * an O(log n) operation [blindingly fast]
	#
	# It depends how the set is implemented.

	# -----------------------------------------------------

	# An example of a wonderful Python feature called a list
	# comprehension.
	xs = [n+1 for n in range(100) if n % 3 == 0]

	# A long-form equivalent of the list comprehension above
	# would be:
	xs = []
	for n in range(100):
	if n % 3 == 0:
	xs.append(n+1)

	# Here is a definition of factors(n) that uses a list comprehension
	# instead of the explicit loop we have used before.
	def factors(n):
	max_factor = n // 2 + 1
	factors = [f for f in range(1,max_factor) if n % f == 0]
	factors.append(n)
	return factors

	# -----------------------------------------------------

	# And now we have the highest common factor of a and b using
	# set intersection. This is faster than the code we wrote before,
	# which used lists instead of sets.
	def hcf_set(a,b):
	fa = set(factors(a))
	fb = set(factors(b))
	common = fa.intersection(fb)
	return max(common)

	# And now the fastest hcf or gcd of all. This is Euclid's algorithm,
	# which finds the GCD of two numbers by breaking it down into remainders,
	# which are necessarily smaller.
	#
	# Here is a great example of the algorithm. Consider 305 and 1040.
	# 1040 = 3(305) + 125
	# 305 = 2(125) + 55
	# 125 = 2( 55) + 15
	# 55 = 3( 15) + 10
	# 15 = 1( 10) + 5
	# 10 = 2( 5) + 0 --> so the GCD is 5
	#
	# Here is the same one written recursively, which is clearer.
	# gcd(305, 1040)
	# = gcd(1040, 305) (need the larger number first)
	# = gcd( 305, 125)
	# = gcd( 125, 55)
	# = gcd( 55, 15)
	# = gcd( 15, 10)
	# = gcd( 10, 5)
	# = gcd( 5, 0) (now we know we have an answer)
	# = 5
	#
	# Compare this to our old way of finding HCF(305, 1040):
	# * the factors of 305 are {1, 5, 61, 305}
	# * the factors of 1040 are {1, 2, 4, 5, 8, 10, 13, 16, 20, 26, 40,
	# 52, 65, 80, 104, 130, 208, 260, 520, 1040}
	# - a lot of work goes into finding these factors
	# * the common factors are {1,5}
	# * the max is 5
	#
	# There is overhead in creating lists and sets. The Euclidean algorithm
	# has none of that overhead: it simply does about 6 modulo calculations
	# and that's it!
	def gcd(a,b):
	if b == 0:
	# We can't break it down any more. We're done.
	return a
	elif a < b:
	# Need to put the numbers in the right order.
	return gcd(b,a)
	elif a == b:
	# gcd(14,14) == 14. We're done.
	return a
	else:
	return gcd(b, a % b)

	# Question for the audience. There is a little bit of the code
	# in the function above that is unnecessary. What is it?