Last active
March 29, 2019 04:32
-
-
Save gsinclair/693b05b5a7e2926e7daee695bb62f320 to your computer and use it in GitHub Desktop.
Week 3 of Informatics, covering the relative efficiency of lists and sets, an example of list comprehensions, and Euclid's GCD algorithm.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
import time | |
# 'timed' runs a function several times and prints the amount | |
# of time taken in seconds. | |
def timed(name, f, ntimes): | |
start = time.time() | |
for i in range(ntimes): | |
f() | |
elapsed = time.time() - start | |
print("%s (%d): %f" % (name, ntimes, elapsed)) | |
# N1 is a massive list. N2 is a massive set. | |
N1 = [n for n in range(1,10000001)] | |
N2 = set(N1) | |
# The code below is to demonstrate that looking for an item | |
# in a large list quite slow, but looking for an item in a | |
# large list is instant. | |
def find_in_list(): | |
x = random.randint(-5000000, 15000000) | |
return x in N1 | |
def find_in_set(): | |
x = random.randint(-5000000, 15000000) | |
return x in N2 | |
def time1(): | |
timed("List", find_in_list, 10) | |
def time2(): | |
timed("Set", find_in_set, 10) | |
# ----------------------------------------------------- | |
# Looking for an item in a list is an O(n) operation, | |
# where n is the size of the list. This means that as the | |
# list grows longer, the lookup time gets longer as well. | |
# Looking for an item in a set is either: | |
# * an O(1) operation [constant time] OR | |
# * an O(log n) operation [blindingly fast] | |
# | |
# It depends how the set is implemented. | |
# ----------------------------------------------------- | |
# An example of a wonderful Python feature called a list | |
# comprehension. | |
xs = [n+1 for n in range(100) if n % 3 == 0] | |
# A long-form equivalent of the list comprehension above | |
# would be: | |
xs = [] | |
for n in range(100): | |
if n % 3 == 0: | |
xs.append(n+1) | |
# Here is a definition of factors(n) that uses a list comprehension | |
# instead of the explicit loop we have used before. | |
def factors(n): | |
max_factor = n // 2 + 1 | |
factors = [f for f in range(1,max_factor) if n % f == 0] | |
factors.append(n) | |
return factors | |
# ----------------------------------------------------- | |
# And now we have the highest common factor of a and b using | |
# set intersection. This is faster than the code we wrote before, | |
# which used lists instead of sets. | |
def hcf_set(a,b): | |
fa = set(factors(a)) | |
fb = set(factors(b)) | |
common = fa.intersection(fb) | |
return max(common) | |
# And now the fastest hcf or gcd of all. This is Euclid's algorithm, | |
# which finds the GCD of two numbers by breaking it down into remainders, | |
# which are necessarily smaller. | |
# | |
# Here is a great example of the algorithm. Consider 305 and 1040. | |
# 1040 = 3(305) + 125 | |
# 305 = 2(125) + 55 | |
# 125 = 2( 55) + 15 | |
# 55 = 3( 15) + 10 | |
# 15 = 1( 10) + 5 | |
# 10 = 2( 5) + 0 --> so the GCD is 5 | |
# | |
# Here is the same one written recursively, which is clearer. | |
# gcd(305, 1040) | |
# = gcd(1040, 305) (need the larger number first) | |
# = gcd( 305, 125) | |
# = gcd( 125, 55) | |
# = gcd( 55, 15) | |
# = gcd( 15, 10) | |
# = gcd( 10, 5) | |
# = gcd( 5, 0) (now we know we have an answer) | |
# = 5 | |
# | |
# Compare this to our old way of finding HCF(305, 1040): | |
# * the factors of 305 are {1, 5, 61, 305} | |
# * the factors of 1040 are {1, 2, 4, 5, 8, 10, 13, 16, 20, 26, 40, | |
# 52, 65, 80, 104, 130, 208, 260, 520, 1040} | |
# - a lot of work goes into finding these factors | |
# * the common factors are {1,5} | |
# * the max is 5 | |
# | |
# There is overhead in creating lists and sets. The Euclidean algorithm | |
# has none of that overhead: it simply does about 6 modulo calculations | |
# and that's it! | |
def gcd(a,b): | |
if b == 0: | |
# We can't break it down any more. We're done. | |
return a | |
elif a < b: | |
# Need to put the numbers in the right order. | |
return gcd(b,a) | |
elif a == b: | |
# gcd(14,14) == 14. We're done. | |
return a | |
else: | |
return gcd(b, a % b) | |
# Question for the audience. There is a little bit of the code | |
# in the function above that is unnecessary. What is it? |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment