Skip to content

Instantly share code, notes, and snippets.

@yunhan0
Last active July 13, 2017 06:07
Show Gist options
  • Save yunhan0/9e658cc3bab29dba7dab11966f001159 to your computer and use it in GitHub Desktop.
Save yunhan0/9e658cc3bab29dba7dab11966f001159 to your computer and use it in GitHub Desktop.
Example of processing large data file without running out of memory.
#!/usr/bin/python
# Yunhan Li
# Example of processing large data file without running out of memory.
import sys, os, time
def processFile(filename, output):
bufferlimit = 10000
buffer = []
start_tic = time.clock()
# The with statement handles opening and closing the file, including if an exception is raised in the inner block.
# The for line in f treats the file object f as an iterable,
# which automatically uses buffered IO and memory management so you don't have to worry about large files.
with open(filename) as f, open(output, "w") as w:
# Read line by line, we usually don’t need all of the lines in the file in memory at once
# instead, we just need to be able to iterate through each one, do some processing and throw it away.
for line in f:
# Put processed result into a buffer, and only write to file if buffer reached to a certain size.
# This will avoid writing every iteration.
buffer.append(str(len(line)) + "\n")
if(len(buffer) == bufferlimit):
w.writelines(buffer)
buffer = []
w.writelines(buffer)
print "Done. Using " + str(time.clock() - start_tic ) + "s";
if __name__ == "__main__":
# Receiving argument from sys.argv, it includes: python file, input file, output file(maynot exist)
if len(sys.argv) != 3:
print "Number of arguments not match.\nThis program takes 3 arguments: the input file path, output file path."
exit(1)
your_file = str(sys.argv[1])
output_file = str(sys.argv[2])
if not your_file.endswith(".txt"):
print "I stupidly only accept txt file."
if not os.path.exists(your_file):
print "file does not exist."
processFile(your_file, output_file)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment