Skip to content

Instantly share code, notes, and snippets.

@collares
Last active August 25, 2022 12:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save collares/c3c0ecb944392b5fa78027510cf72fa0 to your computer and use it in GitHub Desktop.
Save collares/c3c0ecb944392b5fa78027510cf72fa0 to your computer and use it in GitHub Desktop.
An implementation of Bill Gosper's hashlife algorithm
class Store(object):
LOG_MAX_UNIVERSE = 60
# This is arbitrary, but it simplifies get_empty below.
DEAD_HASH = 0
ALIVE_HASH = LOG_MAX_UNIVERSE + 1
class MacrocellData(object):
def __init__(self, quadrants, result):
self.quadrants = quadrants
self.result = result
def __init__(self):
self.from_quadrants = {}
self.from_hash = {}
self.from_hash[Store.DEAD_HASH] = None
self.from_hash[Store.ALIVE_HASH] = None
# The Macrocell implementation allows for changing cells of an
# existing universe, but assumes a universe exists. This is a
# hack to bootstrap the process: create empty macrocells of
# various sizes.
for i in xrange(Store.LOG_MAX_UNIVERSE):
# Create empty macrocell of size 2^(i+1) x 2^(i+1) with
# hash i+1.
self.from_quadrants[(i, i, i, i)] = i+1
self.from_hash[i+1] = Store.MacrocellData((i, i, i, i), i)
@staticmethod
def single_cell(state):
return Store.ALIVE_HASH if state else Store.DEAD_HASH
def get_empty_box(self, n):
"Returns hash of a dead macrocell of size 2^n x 2^n"
assert n <= Store.LOG_MAX_UNIVERSE
return n
def query(self, hashcode):
return self.from_hash[hashcode]
def get_hash(self, quadrants):
quadrants = tuple(quadrants)
if quadrants in self.from_quadrants:
return self.from_quadrants[quadrants]
new_hash = len(self.from_hash)
self.from_quadrants[quadrants] = new_hash
self.from_hash[new_hash] = Store.MacrocellData(quadrants, None)
# Warn when hash tables get to size equal to a power of two
if new_hash & (new_hash-1) == 0:
print "WARNING: Now storing {} macrocells".format(new_hash)
return new_hash
3
0 1
1 2
2 0
2 1
2 2
36
0 24
1 22
1 24
2 12
2 13
2 20
2 21
2 34
2 35
3 11
3 15
3 20
3 21
3 34
3 35
4 0
4 1
4 10
4 16
4 20
4 21
5 0
5 1
5 10
5 14
5 16
5 17
5 22
5 24
6 10
6 16
6 24
7 11
7 15
8 12
8 13
#!/usr/bin/env python2
from cell_store import Store
from positioned_macrocell import PositionedMacrocell
import sys
USAGE = """Usage: hashlife.py INITIAL_STATE_FILE i0 j0 i1 j1 t OUTPUT_PREFIX
Takes a snapshot of the region [i0, i1) x [j0, j1) of the plane after
t iterations of Conway's Game of Life. The game is played on an
infinite grid, and each cell of the grid can be alive or dead. At each
step, the configuration evolves according to the following rules:
* If a cell has at most 1 or at least 4 live neighbors, it dies at
the next time step (due to isolation or overcrowding).
* If a cell has exactly three live neighbors, it becomes alive at the
next time step.
* Otherwise, if it has exactly two neighbors, it does not change.
The text file INITIAL_STATE_FILE lists the live cells, one per line,
at time t = 0. Its first line contains the bounding box size X; the
following lines must contain two numbers each in the range [0, X),
corresponding to locations of live cells.
The output is written to OUTPUT_PREFIX.pbm. Live cells are black,
dead cells are white. The number of iterations can be very large, but
(i1 - i0) * (j1 - j0) should not be too big.
For example, to simulate 4,000,000,000 iterations of a glider gun and
capture region [9999900, 10000000) x [9999900, 10000000), try:
hashlife.py glider_gun.txt 9999900 9999900 10000000 10000000 4000000000 out
"""
if __name__ == "__main__":
# Not using argument parsers felt appropriate.
if len(sys.argv) != 8:
print USAGE
sys.exit(1)
i0, j0, i1, j1, t = map(int, sys.argv[2:7])
with open(sys.argv[1], "r") as f:
size = int(f.readline().strip())
store = Store()
# Most of the documentation is in macrocell.py and
# positioned_macrocell.py. In particular, there is a detailed
# explanation of what is a macrocell.
anchored = PositionedMacrocell.empty(min(0, i0), min(0, j0),
max(size, i1), max(size, j1),
t, store)
for line in f:
i, j = map(int, line.split())
anchored = anchored.modify_cell(i, j, 1)
with open(sys.argv[7] + ".pbm", "w") as writer:
writer.write("P1\n")
writer.write("{} {}\n".format(i1 - i0, j1 - j0))
for i in xrange(i0, i1):
for j in xrange(j0, j1):
writer.write("{} ".format(anchored.query(i, j, t)))
writer.write("\n")
import itertools
from cell_store import Store
class Macrocell(object):
"""A macrocell represents a grid of 2^n x 2^n Game of Life cells.
The following is an implementation of Bill Gosper's hashlife
algorithm. It is based on Gosper, R. Wm. "Exploiting regularities
in large cellular spaces." Physica D: Nonlinear Phenomena 10.1-2
(1984): 75-80.
In theory, a macrocell is a 1x1 cell or it consists of four
macrocells of size 2^(n-1) x 2^(n-1) (its _quadrants_). Since a
macrocell can be quite big, we store it lazily: In practice, a
macrocell is just a number (a _hash_); given a hash corresponding
to a macrocell, a global dictionary allows us to get the hashes of
its quadrants. Moreover, cells with the same quadrants have the
same hash. Crucially, this allows for reuse of macrocells: For
example, a completely dead 2^n macrocell has four copies of the
same dead 2^(n-1) macrocell. In a best case scenario, this reuse
can lead to exponential simulation speedups.
A macrocell assumes its top-left corner is (0, 0) and that it
represents the state at time t = 0; therefore, we know the
configuration of every cell inside this region at time t = 0. At
time 1, the state of all cells but those labeled 1 in the diagram
below depends solely on the values of cells in this macrocell.
Similarly, the state at time 2 is known for all cells but those
labeled 1 or 2.
+---------------------------+
|1111111111111|1111111111111|
|1222222222222|2222222222221|
|12 | 21|
|12 +-------------+ 21|
|12 | | 21|
|12 | | 21|
|12 | | 21|
|------| |-------
|12 | | 21|
|12 | | 21|
|12 | | 21|
|12 +-------------+ 21|
|12 | 21|
|1222222222222|2222222222221|
|1111111111111|1111111111111|
+---------------------------+
Figure 1: The quadrants and the result of a macrocell.
Proceeding analogously, we see that the state at time t = 2^(n-2)
of the center square of side length 2^(n-1) shown in the figure is
completely determined. When n >= 2, we define a macrocell's
_result_ to be the state of this center square at time t =
2^(n-2). It is also a macrocell. A macrocell's result is computed
lazily whenever needed, and stored for future reuse.
But how to compute it? Since a macrocell is defined recursively as
four smaller macrocells, it is natural to wonder if its result can
be computed recursively. Fortunately, the answer is yes! I attempt
to describe how below.
We will not compute the result in a single step. Rether, the first
step is to compute a concentric square of side 3 * 2^(n-2) at time
2^(n-3). I claim that this can be done by 9 recursive calls, if we
consider quadrants of quadrants and rearrange them carefully, as
the following figure illustrates (a detailed explanation
follows). The outermost square is our current macrocell, viewed as
a 4x4 grid of quadrants-of-quadrants.
+-------------------------------------------------------+
| . . . |
| . . . |
| . . . |
| +-----------------------------------------+ |
| | | | | |
| | | | | |
| | . | . | . | |
|......| .0. | .1. | .2. |......|
| | . | . | . | |
| | | | | |
| | | | | |
| |-----------------------------------------| |
| | | | | |
| | | | | |
| | . | . | . | |
|......| .3. | .4. | .5. |......|
| | . | . | . | |
| | | | | |
| | | | | |
| |-----------------------------------------| |
| | | | | |
| | | | | |
| | . | . | . | |
|......| .6. | .7. | .8. |......|
| | . | . | . | |
| | | | | |
| | | | | |
| +-----------------------------------------+ |
| . . . |
| . . . |
| . . . |
+-------------------------------------------------------+
Figure 2: 9 subcells that we know how to advance to time 2^(n-3).
The main feature of the above picture is that each of the smaller
subcells (labeled with numbers from 0 to 8 and of side 2^(n-2)) is
concentric with a square formed by four of the sixteen
quadrants-of-quadrants (delimited by dots instead of dashes). In
this way, each square is the center square of a macrocell of side
2^(n-1); therefore, inductively we know how to advance them to
time 2^(n-3) by taking the result of the corresponding smaller
macrocell.
Recall that our main goal is to compute the result of the 2^n
macrocell, that is, to advance the center square of the big
macrocell to time 2^(n-2). Moreover, we already have a 3x3 grid of
squares of side 2^(n-2) advanced to time 2^(n-3). We can group
those squares to obtain macrocells of side 2^(n-1) in such a way
that their results form a 2x2 grid, as shown below.
+-----------------------------------------+
| B A |
| B A |
| B A |
| +-------------+-------------+ |
| | | | |
| | | | |
| | | | |
|CCCCCC| A | B |DDDDDD|
| | | | |
| | | | |
| | | | |
| +-------------+-------------+ |
| | | | |
| | | | |
| | | | |
|AAAAAA| C | D |BBBBBB|
| | | | |
| | | | |
| | | | |
| +-------------+-------------+ |
| D C |
| D C |
| D C |
+-----------------------------------------+
Figure 3: Centers of four overlapping macrocells of side 2^(n-1).
In the figure above, the square whose center is marked A is the
center square of a macrocell delimited by the A lines, and
similarly for B, C, D. Since they are centers of macrocells of
side 2^(n-3), we can advance them by time 2^(n-3) by four
recursive result calculations.
Since the 3x3 grid was already 2^(n-3) units of time ahead, the
second time advance gets the 2x2 grid to time 2^(n-3)+2^(n-3) =
2^(n-2). The latter grid forms a square of total side 2^(n-1),
concentric with the original macrocell, and is therefore the
desired result. Whew!
In the original paper, the above construction is described in
terms of "scoops of spacetime": the mentioned grids are overlaid
on top of each other, and the z-coordinate represents time. In
this way, our overlaid grids are actually pyramid sections in
(2+1)-dimensional space.
"""
def __init__(self, hashcode, store):
self.hashcode = hashcode
self.store = store
self.memoized_data = self.store.query(hashcode)
# Memoized wrapped versions of a part of self.memoized_data
# (see method wrap()). Not strictly necessary.
self.wrapped_quadrants = None
def value(self):
if self.hashcode == Store.ALIVE_HASH:
return 1
if self.hashcode == Store.DEAD_HASH:
return 0
return -1
def wrap(self, hashcode):
"""Given a hash from the same store as the current cell, returns a
Macrocell object wrapping this instance.
"""
# NB: This is one of those times where object-oriented
# programming is a bit annoying.
return Macrocell(hashcode, self.store)
def quadrants(self):
"Returns a 2x2 matrix of Macrocell instances of the four quadrants"
if not self.wrapped_quadrants:
# To improve readability in the rest, reshape the 4-list
# as a 2x2 matrix of macrocells.
h00, h01, h10, h11 = self.memoized_data.quadrants
self.wrapped_quadrants = [[self.wrap(h00), self.wrap(h01)],
[self.wrap(h10), self.wrap(h11)]]
return self.wrapped_quadrants
def subquadrants(self):
"""Returns a 4x4 matrix of Macrocell instances, corresponding to
quadrants of quadrants.
"""
return [[self.quadrants()[i/2][j/2].quadrants()[i % 2][j % 2]
for j in xrange(4)] for i in xrange(4)]
def advance_grid(self, grid):
"""Given a m x m grid of equally sized macrocells, each of side x,
compute a (m-1) x (m-1) grid of equally sized macrocells that
represents the original grid advanced by x time units, as
explained in the class docstring.
The new grid is smaller because of boundary effects: the cells
at distance less than x from the boundary depend on values
outside the input.
In particular, calling this on a 2 x 2 grid just calls
result() on the macrocell whose quadrants are given by grid.
"""
m = len(grid)
ans = [[None] * (m-1) for _ in range(m-1)]
for i in xrange(m-1):
for j in xrange(m-1):
quadrants = (grid[i][j].hashcode,
grid[i][j+1].hashcode,
grid[i+1][j].hashcode,
grid[i+1][j+1].hashcode)
macrocell_hash = self.store.get_hash(quadrants)
ans[i][j] = self.wrap(macrocell_hash).result()
return ans
def result(self):
if self.memoized_data.result:
return self.wrap(self.memoized_data.result)
subquadrants = self.subquadrants()
next_quadrants = []
# Check if the grid is 4x4.
if self.subquadrants()[0][0].value() >= 0:
# No cleverness required here: We are advancing a 2x2
# square by a single time unit. Just apply the evolution
# rule for Conway's Game of Life.
def number_of_neighbors(i, j):
acc = 0
for di in xrange(-1, 2):
for dj in xrange(-1, 2):
if di or dj:
acc += subquadrants[i+di][j+dj].value()
return acc
for i in range(2):
for j in range(2):
n = number_of_neighbors(i+1, j+1)
if n <= 1 or n >= 4:
next_state = self.store.single_cell(False)
elif n == 3:
next_state = self.store.single_cell(True)
else:
next_state = subquadrants[i+1][j+1].hashcode
next_quadrants.append(next_state)
else:
grid = self.advance_grid(self.advance_grid(subquadrants))
for i in range(2):
for j in range(2):
next_quadrants.append(grid[i][j].hashcode)
self.memoized_data.result = self.store.get_hash(next_quadrants)
return self.result()
from macrocell import Macrocell
from cell_store import Store
class PositionedMacrocell(object):
"""A macrocell together with a place where it should be drawn.
We cannot memoize the position and time along with the macrocell,
since this would defeat the purpose of hashlife, which is to reuse
computation which happens in different places at different times.
When drawing, however, we must know where to put each macrocell.
This class stores a macrocell and a location in spacetime, while
still reusing macrocells whenever possible.
"""
def __init__(self, macrocell, side, i, j, t):
self.macrocell = macrocell
self.side = side
self.i = i
self.j = j
self.t = t
@staticmethod
def empty(i0, j0, i1, j1, max_t, store):
"""Return an empty box big enough that we can query and change
anything in [i0, i1) x [j0, j1) up until time max_t.
"""
n = max(i1 - i0, j1 - j0) + 2 * max_t
log_window_side = (n-1).bit_length()
macrocell = Macrocell(store.get_empty_box(log_window_side),
store)
return PositionedMacrocell(macrocell, 2**log_window_side,
i0 - max_t, j0 - max_t, 0)
def can_compute(self, i, j, t):
"""Check if the point (i, j) can be computed at time t given the data
stored in the current positioned macrocell.
A macrocell's side shrinks by 2 each time it moves forward in
time. That is, a macrocell [i, i + side) x [j, j + side) at
time 0 determines the value of the square [i+t, i+side-t) x
[j+t, j+side-t) at time t.
"""
if t < self.t:
# We do not know how to move backwards in time.
return False
delta = t - self.t
return self.i + delta <= i < self.i + self.side - delta \
and self.j + delta <= j < self.j + self.side - delta
def modify_cell(self, i, j, state):
if self.side == 1:
cell = self.macrocell.wrap(Store.single_cell(state))
return PositionedMacrocell(cell, self.side,
self.i, self.j, self.t)
# We will assign compatible positions to each quadrant and see
# if the cell we want to change is there. We recursively
# proceed down the right quadrant; we then reassemble the current
# positioned macrocell.
quadrants = self.macrocell.quadrants()
flattened = []
step = self.side/2
for qi in xrange(2):
for qj in xrange(2):
anchored = PositionedMacrocell(quadrants[qi][qj],
self.side/2,
self.i + qi*step,
self.j + qj*step,
self.t)
if anchored.can_compute(i, j, self.t):
modified = anchored.modify_cell(i, j, state)
quadrants[qi][qj] = modified.macrocell
flattened.append(quadrants[qi][qj].hashcode)
modified_hash = self.macrocell.store.get_hash(flattened)
cell = self.macrocell.wrap(modified_hash)
return PositionedMacrocell(cell, self.side, self.i, self.j, self.t)
# Beware: The code for query() is a bit messy from a software
# engineering point of view, especially because it duplicates code
# from Macrocell's advance_grid. Unfortunately eliminating this
# duplication is tricky due to the need to calculate precise
# offsets for the subcalls.
def query(self, i, j, t):
"Returns the status of a cell at time t in O(log t) time."
assert self.side >= 2
assert self.can_compute(i, j, t)
if self.side == 2:
assert 0 <= i - self.i < 2 and 0 <= j - self.j < 2 and t == self.t
cell = self.macrocell.quadrants()[i - self.i][j - self.j]
return cell.value()
# A macrocell's result is side/4 units in the future.
step = self.side/4
# If advancing to cell's result is not too much, do it.
if t - self.t >= step:
return PositionedMacrocell(self.macrocell.result(),
self.side/2,
self.i + step,
self.j + step,
self.t + step).query(i, j, t)
# To understand the next part, familiarity with how the
# Macrocell computes its result is required.
#
# We know that the 4x4 grid of subquadrants can be advanced to
# time step/2 to obtain a 3x3 grid (which covers everything we
# can compute at that time), and that advancing this grid by
# time step/2 again leads to the Macrocell's result.
if t - self.t <= step/2:
# Therefore, if we are before time step/2, one of the 9
# subcases that were generated by the first call to
# advance_grid() must be able to compute our point.
grid = self.macrocell.subquadrants()
extra_offset = 0
else:
# Otherwise, one of the 4 subcases that were generated by
# the second call to advance_grid() works. Those are
# offset by step/2 both in time and in space.
grid = self.macrocell.advance_grid(self.macrocell.subquadrants())
extra_offset = step/2
for qi in xrange(len(grid) - 1):
for qj in xrange(len(grid) - 1):
quadrants = (grid[qi][qj].hashcode,
grid[qi][qj+1].hashcode,
grid[qi+1][qj].hashcode,
grid[qi+1][qj+1].hashcode)
macrocell_hash = self.macrocell.store.get_hash(quadrants)
cell = self.macrocell.wrap(macrocell_hash)
anchored = PositionedMacrocell(cell,
self.side/2,
self.i + qi*step + extra_offset,
self.j + qj*step + extra_offset,
self.t + extra_offset)
if anchored.can_compute(i, j, t):
return anchored.query(i, j, t)
# By the previous argument, this should not be reached.
assert False
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment