secret
Created

Sort at most 10 million 7-digit numbers. constraints: 1M RAM, high speed. several secs is good.

  • Download Gist
.gitignore
1 2 3
#
/sorted.bin
/input.bin
Makefile
Makefile
1 2 3 4 5 6 7 8 9 10 11 12 13
.PHONY: timeit clean
 
timeit: input.bin __main__.py
/usr/bin/time python3 . < $< >sorted.bin
 
%.bin: %.txt write_as_bytes.py
python3 write_as_bytes.py < $< > $@
 
input.txt: generate_input.py
python3 generate_input.py > $@
 
clean:
-rm input.txt input.bin sorted.bin
__main__.py
Python
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
#!/usr/bin/env python3
# see http://neopythonic.blogspot.com/2008/10/sorting-million-32-bit-integers-in-2mb.html
"""Sort input integers (binary stream) in 2MB of RAM.
 
USAGE:
 
$ %prog < input.bin >sorted.bin
"""
import sys, array, tempfile, heapq
assert array.array('i').itemsize == 4
 
def intsfromfile(f):
while True:
a = array.array('i')
a.fromstring(f.read(4000))
if not a:
break
for x in a:
yield x
 
iters = []
while True:
a = array.array('i')
a.fromstring(sys.stdin.buffer.read(40000))
if not a:
break
f = tempfile.TemporaryFile()
array.array('i', sorted(a)).tofile(f)
f.seek(0)
iters.append(intsfromfile(f))
 
a = array.array('i')
for x in heapq.merge(*iters):
a.append(x)
if len(a) >= 1000:
a.tofile(sys.stdout.buffer)
del a[:]
if a:
a.tofile(sys.stdout.buffer)
generate_input.py
Python
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
"""Generate random 7-digit numbers.
 
USAGE:
 
$ %prog >input.txt
"""
import random
 
####L = random.sample(range(10**7), 10**2)
L = list(range(10**7))
random.shuffle(L)
 
def chunks(it, n):
"""Iterate in chunks `n` items at a time."""
return zip(*[it]*n) #NOTE: it drops tails
 
print('\n'.join(' '.join(map('{0:07d}'.format, chunk))
for chunk in chunks(iter(L), 10)))
write_as_bytes.py
Python
1 2 3 4 5 6 7 8 9 10 11 12 13
#!/usr/bin/env python3
"""Write input numbers as bytes.
 
USAGE:
 
$ %prog <input.txt >input.bin
"""
import array
import fileinput
import sys
 
for line in fileinput.input():
array.array('i', map(int, line.split())).tofile(sys.stdout.buffer)

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.