Skip to content

Instantly share code, notes, and snippets.

@zed

zed/.gitignore Secret

Created February 21, 2011 00:38
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save zed/73d83699c9a15db37f22 to your computer and use it in GitHub Desktop.
Save zed/73d83699c9a15db37f22 to your computer and use it in GitHub Desktop.
Sort at most 10 million 7-digit numbers. constraints: 1M RAM, high speed. several secs is good.
#
/sorted.bin
/input.bin
#!/usr/bin/env python3
# see http://neopythonic.blogspot.com/2008/10/sorting-million-32-bit-integers-in-2mb.html
"""Sort input integers (binary stream) in 2MB of RAM.
USAGE:
$ %prog < input.bin >sorted.bin
"""
import sys, array, tempfile, heapq
assert array.array('i').itemsize == 4
def intsfromfile(f):
while True:
a = array.array('i')
a.fromstring(f.read(4000))
if not a:
break
for x in a:
yield x
iters = []
while True:
a = array.array('i')
a.fromstring(sys.stdin.buffer.read(40000))
if not a:
break
f = tempfile.TemporaryFile()
array.array('i', sorted(a)).tofile(f)
f.seek(0)
iters.append(intsfromfile(f))
a = array.array('i')
for x in heapq.merge(*iters):
a.append(x)
if len(a) >= 1000:
a.tofile(sys.stdout.buffer)
del a[:]
if a:
a.tofile(sys.stdout.buffer)
"""Generate random 7-digit numbers.
USAGE:
$ %prog >input.txt
"""
import random
####L = random.sample(range(10**7), 10**2)
L = list(range(10**7))
random.shuffle(L)
def chunks(it, n):
"""Iterate in chunks `n` items at a time."""
return zip(*[it]*n) #NOTE: it drops tails
print('\n'.join(' '.join(map('{0:07d}'.format, chunk))
for chunk in chunks(iter(L), 10)))
.PHONY: timeit clean
timeit: input.bin __main__.py
/usr/bin/time python3 . < $< >sorted.bin
%.bin: %.txt write_as_bytes.py
python3 write_as_bytes.py < $< > $@
input.txt: generate_input.py
python3 generate_input.py > $@
clean:
-rm input.txt input.bin sorted.bin
#!/usr/bin/env python3
"""Write input numbers as bytes.
USAGE:
$ %prog <input.txt >input.bin
"""
import array
import fileinput
import sys
for line in fileinput.input():
array.array('i', map(int, line.split())).tofile(sys.stdout.buffer)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment