Created
December 22, 2021 18:21
-
-
Save odedlaz/fb36bc928f2928a54949d9b00a5a3ff0 to your computer and use it in GitHub Desktop.
str.readlines that yields the lines. instead of reading a file into memory and splitting all lines, use this function to read line-by-line
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def safe_readlines(f, block_size=32, max_file_size=1024 * 1024): | |
read = 0 | |
buf = "" | |
read_block = functools.partial(f.read, 32) | |
for block in map(bytes.decode, iter(read_block, b'')): | |
assert read + block_size <= max_file_size, "manifest file is too big" | |
read += len(block) | |
first, *rest = block.splitlines() | |
# if there are multiple lines in this string, or one with an additional buffer | |
if rest: | |
# return the buffered text + text until newline | |
yield buf + first | |
# extract all lines in block, the suffix (buf) is a partial line | |
*lines, buf = rest | |
yield from lines | |
continue | |
# if the block ended with a newline, we won't have 'rest' | |
elif len(first) != len(block): | |
yield buf + first | |
buf = "" | |
continue | |
# block doesn't contain a new line | |
buf += first | |
assert len(buf) == 0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment