Skip to content

Instantly share code, notes, and snippets.

@lancejpollard
Created June 18, 2012 07:29
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save lancejpollard/2947293 to your computer and use it in GitHub Desktop.
Save lancejpollard/2947293 to your computer and use it in GitHub Desktop.
Tinkering with reading large file, line by line, and running async function in Node.js
# http://stackoverflow.com/questions/11078141/how-to-run-an-async-function-for-each-line-of-a-very-large-1gb-file-in-node
readVeryLargeFile = (path) ->
ids = []
i = 0
remaining = ''
onData = (chunk) ->
# this will add new chunk to potential fragment from previous onData stream
remaining += chunk.toString()
lines = remaining.split("\n")
last = lines.pop() # can't assume last line is a full line.
remaining = last # remaining.substr(remaining.lastIndexOf(last))
i = lines.length
stream.pause()
iterate = (line, next) ->
parts = line.split(/[\t\s]+/)
throw new Error(line) unless parts[2]
id = parts[2]
console.log id
setTimeout next, 10
async.forEachSeries lines, iterate, ->
stream.resume()
stream = fs.createReadStream(path, flags: 'r', encoding: 'utf-8', bufferSize: 1024 / 4)
stream.on('data', onData)
readVeryLargeFile('tmp/test.csv')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment