Skip to content

Instantly share code, notes, and snippets.

@noogen
Last active July 15, 2020 15:29
Show Gist options
  • Save noogen/5f1b581894f3cbe84f7262a231a7712c to your computer and use it in GitHub Desktop.
Save noogen/5f1b581894f3cbe84f7262a231a7712c to your computer and use it in GitHub Desktop.
Process file sync with readline
const fs = require('fs')
const readline = require('readline')
const stream = require('stream')
const {promisify} = require('util')
const got = require('got')
const pipeline = promisify(stream.pipeline)
/**
* Processing file line-by-line synchronously
* node readline-sync-process.js csvfile.csv index
* - index allow you to resume processing a file
*
* Example below use 'got' to download image from a Server
* with intermitten pause to prevent hammering of Server
* so you don't getting ban by Server DDOS setting
*/
// run with: node readline-sync-process.js csvfile.csv 0
const file = process.argv[2]
const start = parseInt(process.argv[3] || '0') || 0
function sleep(ms) {
return new Promise((resolve) => {
setTimeout(resolve, ms);
})
}
async function byLine(file, callback) {
const rs = fs.createReadStream(file);
const rl = readline.createInterface({
input: rs,
crlfDelay: Infinity
})
for await(const line of rl) {
await callback(line)
}
}
const main = async () => {
const now = new Date()
console.log(now, file)
console.log('skipping to: ' + start)
let idx = 0
await byLine(file, async (line) => {
idx++
if (idx < start) {
return
}
// timeout 1 seconds for every 10
const timeout = ((idx % 10) === 0) ? 1000 : 1
// example csv file: filename,https://example.com/file.jpg
const parts = line.split(',')
const filename = parts[0]
const url = parts[1]
console.log(idx, url, file)
// download image, save it
await pipeline(
got.stream(url),
fs.createWriteStream('./out/' + filename + '.jpg')
)
await sleep(timeout)
})
}
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment