Skip to content

Instantly share code, notes, and snippets.

@odedlaz
Created December 22, 2021 18:21
Show Gist options
  • Save odedlaz/fb36bc928f2928a54949d9b00a5a3ff0 to your computer and use it in GitHub Desktop.
Save odedlaz/fb36bc928f2928a54949d9b00a5a3ff0 to your computer and use it in GitHub Desktop.
str.readlines that yields the lines. instead of reading a file into memory and splitting all lines, use this function to read line-by-line
def safe_readlines(f, block_size=32, max_file_size=1024 * 1024):
read = 0
buf = ""
read_block = functools.partial(f.read, 32)
for block in map(bytes.decode, iter(read_block, b'')):
assert read + block_size <= max_file_size, "manifest file is too big"
read += len(block)
first, *rest = block.splitlines()
# if there are multiple lines in this string, or one with an additional buffer
if rest:
# return the buffered text + text until newline
yield buf + first
# extract all lines in block, the suffix (buf) is a partial line
*lines, buf = rest
yield from lines
continue
# if the block ended with a newline, we won't have 'rest'
elif len(first) != len(block):
yield buf + first
buf = ""
continue
# block doesn't contain a new line
buf += first
assert len(buf) == 0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment