Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jColeChanged/06cecc001d3004a3c45a to your computer and use it in GitHub Desktop.
Save jColeChanged/06cecc001d3004a3c45a to your computer and use it in GitHub Desktop.
This is my personal attempt at sorting a million 32-bit integers in 2MB of RAM.
from random import randint
from sys import maxint
import tempfile
import array
def write_a_million_ints():
f = file("ints.bin", "wb")
ints = array.array('i', [randint(-maxint + 1, maxint - 1) for i in range(1000000)])
ints.tofile(f)
def read_ints(f):
while True:
a = array.array('i')
a.fromstring(f.read(4000))
if not a:
break
for x in a:
yield x
def merge(file_one, file_two):
"""
I think the fact that I'm using generators made this really messy.
"""
merge_file = tempfile.TemporaryFile()
one_gen = read_ints(file_one)
two_gen = read_ints(file_two)
merge_array = array.array('i')
one_next = None
two_next = None
while True:
if one_next is None:
try:
one_next = one_gen.next()
except StopIteration:
pass
if two_next is None:
try:
two_next = two_gen.next()
except StopIteration:
pass
if one_next is None and two_next is None:
merge_array.tofile(merge_file.file)
merge_file.seek(0)
return merge_file
elif one_next is None:
merge_array.append(two_next)
two_next = None
elif two_next is None:
merge_array.append(one_next)
one_next = None
elif one_next >= two_next:
merge_array.append(two_next)
two_next = None
else:
merge_array.append(one_next)
one_next = None
if len(merge_array) >= 1000:
merge_array.tofile(merge_file.file)
del merge_array
merge_array = array.array('i')
def combine(files):
out_file = file("sorted.txt", "w")
# merge_file = reduce(merge, files)
merge_file = merge_sort_files(files)
for value in read_ints(merge_file):
out_file.write(str(value) + "\n")
def merge_sort_files(files):
if len(files) == 1:
return files[0]
if len(files) == 2:
return merge(files[0], files[1])
mid = len(files) / 2
return merge(merge_sort_files(files[:mid]), merge_sort_files(files[mid:]))
def sort_a_million_ints():
f = file("ints.bin", "rb")
temp_files = []
while True:
a = array.array('i')
a.fromstring(f.read(40000)) # read 10,000 ints
if not a: # if array is empty
break
temp_file = tempfile.TemporaryFile()
array.array('i', sorted(a)).tofile(temp_file.file)
temp_file.seek(0)
temp_files.append(temp_file)
combine(temp_files)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment