Skip to content

Instantly share code, notes, and snippets.

@toanalien
Last active April 29, 2023 15:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save toanalien/b77a0de0064b735ddb652102ed79c6e0 to your computer and use it in GitHub Desktop.
Save toanalien/b77a0de0064b735ddb652102ed79c6e0 to your computer and use it in GitHub Desktop.
Sose.xyz crawl
var fs = require('fs');
var async = require('async');
var request = require('request');
var url = require('url');
var path = require('path');
var filename = 'thichucolinkko.txt';
var concurrency = 10;
function downloadFile(link, cb) {
console.log(link);
var filename = path.basename(url.parse(link).pathname);
request.head(link, function(err, res, body) {
request(link).pipe(fs.createWriteStream(filename)).on('close', cb);
});
};
function worker(link, cb) {
console.log('download ' + link);
downloadFile(link, cb);
}
var queue = async.queue(worker, concurrency);
queue.drain = function() {
console.log('Done All !');
};
fs.readFile(filename, function(err, data) {
if (err) throw err;
var array = data.toString().split("\n");
for (i in array)
queue.push(array[i]);
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment