Skip to content

Instantly share code, notes, and snippets.

@mrvaldes
Last active November 2, 2017 01:07
Show Gist options
  • Save mrvaldes/8130f23da938cf596e7cda10fdabaeb5 to your computer and use it in GitHub Desktop.
Save mrvaldes/8130f23da938cf596e7cda10fdabaeb5 to your computer and use it in GitHub Desktop.
Example of inplace line editing (update line by line) on a big file
def iterate_rainbow(file):
import mmap
import contextlib
import csv
from io import StringIO
encoding = 'utf-8'
csv_opts = {'delimiter': ';', 'quoting': csv.QUOTE_MINIMAL, 'quotechar': '"'}
with open(file, 'r+b') as fd:
with contextlib.closing(mmap.mmap(fd.fileno(), 0, access=mmap.ACCESS_WRITE)) as mm:
start = 0
n_line = 0
for line in iter(mm.readline, b''):
n_line += 1
print("n_line {0}".format(n_line))
end = mm.tell()
line = line.decode(encoding).rstrip()
# print("{0}-{1}: {2}".format(start, end, line))
row = next(csv.reader([line], **csv_opts))
encrypt_row(row, encoding)
new_line_buff = StringIO()
csv.writer(new_line_buff, **csv_opts).writerow(row)
new_line = new_line_buff.getvalue().encode(encoding)
new_end = start + len(new_line)
# mm.seek(start)
fd.seek(0)
size_before = mm.size()
fd.write(mm[0:start] + new_line + mm[end:])
fd.flush()
size_after = mm.size()
if size_after > size_before:
mm.resize(size_after)
mm.seek(new_end)
start = new_end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment