Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save hughdbrown/ac86c0e0035930ca434fd594c1673888 to your computer and use it in GitHub Desktop.
Save hughdbrown/ac86c0e0035930ca434fd594c1673888 to your computer and use it in GitHub Desktop.
Count how many people are alive per year, based on their birthyear and deathyear, aims for performances.
#!/usr/bin/env python3
import collections
import itertools
import random
import sys
import timeit
pop = [
(b := random.randint(1900, 2000), random.randint(b + 1, b + 100))
for _ in range(10000)
]
# 0.162 s (with 10000 people [1,100] years old)
def count_living_per_year1(population: list[tuple[int, int]]) -> dict[int, int]:
living_per_year = {}
for birth, death in population:
if birth in living_per_year:
living_per_year[birth] += 1
else:
living_per_year[birth] = 1
if death in living_per_year:
living_per_year[death] -= 1
else:
living_per_year[death] = -1
for year in range(min(living_per_year), max(living_per_year)):
living_per_year[year] = living_per_year.get(year, 0) - living_per_year.get(year - 1, 0)
living_per_year.pop(max(living_per_year))
return living_per_year
# 0.118 s (with 10000 people [1,100] years old)
def count_living_per_year2(population: list[tuple[int, int]]) -> dict[int, int]:
living_per_year = collections.defaultdict(int)
for birth, death in population:
living_per_year[birth] += 1
living_per_year[death] -= 1
min_, max_ = min(living_per_year), max(living_per_year)
for year in range(min_, max_):
living_per_year[year] += living_per_year[year - 1]
del living_per_year[min_ - 1], living_per_year[max_]
return living_per_year
# 0.105 s (with 10000 people [1,100] years old)
def count_living_per_year3(population: list[tuple[int, int]]) -> dict[int, int]:
births = collections.Counter(birth for birth, _ in population)
deaths = collections.Counter(death for _, death in population)
living_per_year = births
for year in range(min(births), max(deaths)):
living_per_year[year] += living_per_year[year - 1] - deaths[year]
return living_per_year
# 0.108 s (with 10000 people [1,100] years old)
def count_living_per_year4(population: list[tuple[int, int]]) -> dict[int, int]:
births = collections.Counter(birth for birth, _ in population)
deaths = collections.Counter(death for _, death in population)
(delta_per_year := births).subtract(deaths)
living_per_year = collections.Counter()
for year in range(min(delta_per_year), max(delta_per_year)):
living_per_year[year] = living_per_year[year - 1] + delta_per_year[year]
return living_per_year
# 0.093 s (with 10000 people [1,100] years old)
def count_living_per_year5(population: list[tuple[int, int]]) -> dict[int, int]:
births, deaths = zip(*population)
births_counter = collections.Counter(births)
deaths_counter = collections.Counter(deaths)
living_per_year = collections.Counter()
for year in range(min(births_counter), max(deaths_counter)):
delta_living = births_counter[year] - deaths_counter[year]
living_per_year[year] = living_per_year[year - 1] + delta_living
return living_per_year
def count_living_per_year6(population: list[tuple[int, int]]) -> dict[int, int]:
"""
Version by hughdbrown
Write as few python loops as possible, drive execution into compiled code/builtins
1. Generate yearly deltas
2. Flatten into array
3. Calculate with cumulative sum
4. Return result in dict
"""
b, d = zip(*population)
delta_per_year = collections.Counter(b)
delta_per_year.subtract(collections.Counter(d))
first_birth, last_death = min(delta_per_year), max(delta_per_year)
delta_get = delta_per_year.get
deltas = [delta_get(year, 0) for year in range(first_birth, last_death)]
return dict(
enumerate(itertools.accumulate(deltas), start=first_birth)
)
def count_living_per_year7(population):
"""
Version by Stefan Schuerger
"""
born_by = collections.defaultdict(int)
dead_by = collections.defaultdict(int)
living_per_year = collections.defaultdict(int)
for birth, death in population:
born_by[birth] += 1
dead_by[death] += 1
alive = 0
for year in range(1900,2100):
living_per_year[year] = alive = alive + born_by[year] - dead_by[year]
return living_per_year
def run_benchmarks():
print(f"python version: {sys.version}")
for i in range(1, 8):
fn = f"count_living_per_year{i}(pop)"
timing = timeit.timeit(stmt=fn, globals=globals(), number=1000)
print(f"{fn}: {timing}")
if __name__ == '__main__':
run_benchmarks()
@hughdbrown
Copy link
Author

python version: 3.11.3 (main, Apr 12 2023, 18:27:18) [Clang 14.0.3 (clang-1403.0.22.14.1)]
count_living_per_year1(pop): 2.138002374966163
count_living_per_year2(pop): 1.818175038031768
count_living_per_year3(pop): 1.5936920850072056
count_living_per_year4(pop): 1.6447406660299748
count_living_per_year5(pop): 1.3068225800525397
count_living_per_year6(pop): 1.2849942080210894
count_living_per_year7(pop): 1.710214082035236

python version: 3.9.4 (default, Apr 20 2021, 22:14:53)
[Clang 12.0.0 (clang-1200.0.32.2)]
count_living_per_year1(pop): 2.671531382
count_living_per_year2(pop): 1.9882104379999999
count_living_per_year3(pop): 1.7378031969999999
count_living_per_year4(pop): 1.7831689280000003
count_living_per_year5(pop): 1.548357961999999
count_living_per_year6(pop): 1.5165221899999999
count_living_per_year7(pop): 1.823261629000001

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment