Skip to content

Instantly share code, notes, and snippets.

@hawjeh
Last active June 2, 2022 10:26
Show Gist options
  • Save hawjeh/287fda7edf143db7aafac42e19598f40 to your computer and use it in GitHub Desktop.
Save hawjeh/287fda7edf143db7aafac42e19598f40 to your computer and use it in GitHub Desktop.
Download_media_files_from_from_links_txt
var https = require('https'),
fs = require('fs'),
readline = require('readline');
const baseUrl = process.argv.slice(2)[0]; // Page domain url e.g. https://www.google.com
const processName = process.argv.slice(2)[1]; // Filename without <filename>_links.txt
var download = function (url, dir, dest, cb) {
if (!fs.existsSync(dir)) {
fs.mkdirSync(dir, { recursive: true });
}
var request = https.get(url, function (response) {
if (response.statusCode === 404) {
cb(`404 Not Found - '${url}'`)
} else {
var file = fs.createWriteStream(dest);
response.pipe(file);
file.on('finish', function () {
file.close(cb(`200 Downloaded - ${dest}`));
});
}
}).on('error', function (err) {
fs.unlink(dest);
console.log(`Error URL: ${url}`);
if (cb) cb(err.message);
});
};
var rd = readline.createInterface({
input: fs.createReadStream(`./${processName}_links.txt`),
output: '',
console: false
});
rd.on('line', function (line) {
let newLine = decodeURIComponent(line.replace(baseUrl, ''));
if (newLine.indexOf('?') > 0) {
newLine = newLine.substring(0, newLine.indexOf('?'));
}
const fileName = newLine.substring(newLine.lastIndexOf('/'));
const dir = newLine.replace(fileName, '');
download(line, `./${processName}_downloaded/${dir}`, `./${processName}_downloaded/${newLine}`, function (msg) {
console.log(msg);
});
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment