ForceBru/README.md

## README.md

      
    Raw
  

              README.md
            
          
    I've recently encountered an article that showed some ways to speed up one's Python code. Some of these methods seemed rather peculiar, so I decided to do some extra complex data analysis to try to understand whether these methods actually work.
Python version: 3.7.1
Here's a quick summary of the methods proposed in that article that I found odd.
Decreasing the use of for loop

The article said that, since for loops are "dynamic" (not sure what this means), they're slower than while loops. I compared the following two loops and found that, on average, the for loop was about 2.5 times faster than the corresponding while loop:
# Fast!
for _ in range(50):
    pass
    
# Slow...
i = 0
while i < 50:
    i += 1
Using tuple unpacking to assign to multiple variables

Indeed, in the following code, assignment via tuple unpacking was on average 1.47 times faster than consecutive assignment:
# Fast!
a, b, c, d = 2, 3, 5, 7

# Slow...
a = 2
b = 3
c = 5
d = 7
Using 1 for infinite loops

The article claimed that using while 1 instead of while True "will reduce some runtime". Testing an infinite loop doesn't seem feasible because doing so will take quite some time, so I tested if statements instead, because a while loop is basically an if statement with a jump. Turns out, if 1 is indeed faster, but the difference is small:
# About 5% faster
if 1:
    pass
    
if True:
    pass
If you run this test with test.py and then perform the Welch Two Sample t-test on the data in R, you'll find that the 95% confidence interval is (-0.0005672652 -0.0005465111), which does not include zero (so the difference is significant), but is rediculously tiny and close to zero:
> one_true = read.csv("one_true.csv")
> t.test(one_true$one, one_true[["true"]])

	Welch Two Sample t-test

data:  one_true$one and one_true[["true"]]
t = -105.19, df = 14599, p-value < 2.2e-16
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -0.0005672652 -0.0005465111
sample estimates:
 mean of x  mean of y 
0.01025956 0.01081645

  
## assignment.png

      
    Raw
  

              assignment.png
            
          
## for_while.png

      
    Raw
  

              for_while.png
            
          
## one_true.png

      
    Raw
  

              one_true.png
            
          
## tests.py
"""
Code that runs the tests.
Run with `python3 tests.py`, follow the instructions and WAIT.
"""

import timeit
import csv
import matplotlib.pyplot as plt


def mean(lst: list) -> float:
	return sum(lst) / len(lst)

def write_csv(fname: str, fieldnames: list, times1: list, times2: list):
  assert len(fieldnames) == 2
  assert len(times1) == len(times2)

  with open(fname, 'w') as f:
  	writer = csv.DictWriter(f, fieldnames=fieldnames)

  	writer.writeheader()
  	writer.writerows(
  		{fieldnames[0]: t1, fieldnames[1]: t2}
  		for t1, t2 in zip(times1, times2)
  	)

def print_stats(title: str, times: list):
	print(f"{title:<15} {min(times):6.03f} {mean(times):6.03f} {max(times):6.03f} => {sum(times):6.03} (min/avg/max => total)")

def plot_hist(title: str, bins: int, times1: list, code1: str, times2: list, code2: str):
	setup = {'bins': bins, 'alpha': 0.5}
	plt.hist(times1, label=code1, **setup)
	plt.hist(times2, label=code2, **setup)

	plt.title(title)
	plt.legend()
	plt.show()
	plt.close()

def decrease_the_use_of_for_loop(fname: str, nrep: int):
	"""
	Check if `for` loops are slower than `while` loops
	"""

	Nloop = 50
	code1 = f"for _ in range({Nloop}):\n\tpass"
	code2 = f"i=0\nwhile i < {Nloop}:\n\ti += 1"

	times1 = timeit.repeat(code1, repeat=nrep)
	print_stats("For loop", times1)

	times2 = timeit.repeat(code2, repeat=nrep)
	print_stats("While loop", times2)

	write_csv(fname, ['For', 'While'], times1, times2)

	plot_hist(
	"Are `for` loops slower than `while` loops?", nrep // 3,
	times1, code1, times2, code2
	)

def assignment(fname: str, nrep: int):
	"""
	Check if tuple unpacking is faster than consecutive assignment
	"""

	code1 = 'a = 2\nb = 3\nc = 5\nd = 7'
	code2 = 'a, b, c, d = 2, 3, 5, 7'

	times1 = timeit.repeat(code1, repeat=nrep)
	print_stats("Consecutive", times1)

	times2 = timeit.repeat(code2, repeat=nrep)
	print_stats("Unpacking", times2)

	write_csv(fname, ['consecutive', 'unpacking'], times1, times2)

	plot_hist(
		"Are consecutive assignments slower than tuple unpacking?", nrep // 3,
		times1, code1, times2, code2
	)

def one_for_inf(fname: str, nrep: int):
	"""
	Check if using `1` instead of `True` for infinite loops is faster
	"""

	code1 = 'if 1:\n\tpass'
	code2 = 'if True:\n\tpass'

	times1 = timeit.repeat(code1, repeat=nrep)
	print_stats("One", times1)

	times2 = timeit.repeat(code2, repeat=nrep)
	print_stats("True", times2)

	write_csv(fname, ['one', 'true'], times1, times2)

	plot_hist(
		"Is `1` faster than `True`?", nrep // 3,
		times1, code1, times2, code2
	)


if __name__ == '__main__':
  choices = {
    'for_while': decrease_the_use_of_for_loop,
    'assignment': assignment,
    'one_true': one_for_inf
  }

  csv_file = input(f"What to test? ({', '.join(choices.keys())}): ").strip()
  test_fn = choices[csv_file]
  nrep = int(input("Number of repetitions: "))

  csv_file += '.csv'
  print("\nData will be written to", csv_file)
  print(f"Running {test_fn}...")

  test_fn(csv_file, nrep)

  print("Done!")
	"""
	Code that runs the tests.
	Run with `python3 tests.py`, follow the instructions and WAIT.
	"""

	import timeit
	import csv
	import matplotlib.pyplot as plt


	def mean(lst: list) -> float:
	return sum(lst) / len(lst)

	def write_csv(fname: str, fieldnames: list, times1: list, times2: list):
	assert len(fieldnames) == 2
	assert len(times1) == len(times2)

	with open(fname, 'w') as f:
	writer = csv.DictWriter(f, fieldnames=fieldnames)

	writer.writeheader()
	writer.writerows(
	{fieldnames[0]: t1, fieldnames[1]: t2}
	for t1, t2 in zip(times1, times2)
	)

	def print_stats(title: str, times: list):
	print(f"{title:<15} {min(times):6.03f} {mean(times):6.03f} {max(times):6.03f} => {sum(times):6.03} (min/avg/max => total)")

	def plot_hist(title: str, bins: int, times1: list, code1: str, times2: list, code2: str):
	setup = {'bins': bins, 'alpha': 0.5}
	plt.hist(times1, label=code1, **setup)
	plt.hist(times2, label=code2, **setup)

	plt.title(title)
	plt.legend()
	plt.show()
	plt.close()

	def decrease_the_use_of_for_loop(fname: str, nrep: int):
	"""
	Check if `for` loops are slower than `while` loops
	"""

	Nloop = 50
	code1 = f"for _ in range({Nloop}):\n\tpass"
	code2 = f"i=0\nwhile i < {Nloop}:\n\ti += 1"

	times1 = timeit.repeat(code1, repeat=nrep)
	print_stats("For loop", times1)

	times2 = timeit.repeat(code2, repeat=nrep)
	print_stats("While loop", times2)

	write_csv(fname, ['For', 'While'], times1, times2)

	plot_hist(
	"Are `for` loops slower than `while` loops?", nrep // 3,
	times1, code1, times2, code2
	)

	def assignment(fname: str, nrep: int):
	"""
	Check if tuple unpacking is faster than consecutive assignment
	"""

	code1 = 'a = 2\nb = 3\nc = 5\nd = 7'
	code2 = 'a, b, c, d = 2, 3, 5, 7'

	times1 = timeit.repeat(code1, repeat=nrep)
	print_stats("Consecutive", times1)

	times2 = timeit.repeat(code2, repeat=nrep)
	print_stats("Unpacking", times2)

	write_csv(fname, ['consecutive', 'unpacking'], times1, times2)

	plot_hist(
	"Are consecutive assignments slower than tuple unpacking?", nrep // 3,
	times1, code1, times2, code2
	)

	def one_for_inf(fname: str, nrep: int):
	"""
	Check if using `1` instead of `True` for infinite loops is faster
	"""

	code1 = 'if 1:\n\tpass'
	code2 = 'if True:\n\tpass'

	times1 = timeit.repeat(code1, repeat=nrep)
	print_stats("One", times1)

	times2 = timeit.repeat(code2, repeat=nrep)
	print_stats("True", times2)

	write_csv(fname, ['one', 'true'], times1, times2)

	plot_hist(
	"Is `1` faster than `True`?", nrep // 3,
	times1, code1, times2, code2
	)


	if __name__ == '__main__':
	choices = {
	'for_while': decrease_the_use_of_for_loop,
	'assignment': assignment,
	'one_true': one_for_inf
	}

	csv_file = input(f"What to test? ({', '.join(choices.keys())}): ").strip()
	test_fn = choices[csv_file]
	nrep = int(input("Number of repetitions: "))

	csv_file += '.csv'
	print("\nData will be written to", csv_file)
	print(f"Running {test_fn}...")

	test_fn(csv_file, nrep)

	print("Done!")