Skip to content

Instantly share code, notes, and snippets.

@ishiduca
Last active May 21, 2017 09:24
Show Gist options
  • Save ishiduca/ae5be7b153b2413f6f5f336b00cdf4fc to your computer and use it in GitHub Desktop.
Save ishiduca/ae5be7b153b2413f6f5f336b00cdf4fc to your computer and use it in GitHub Desktop.
trombone-download
#!/usr/bin/env node
'use strict'
var url = require('url')
var path = require('path')
var miss = require('mississippi')
var mkdirp = require('mkdirp')
var notifier = require('node-notifier')
var config = require('./config')
var trombone = require('../trombone')
var argv = require('minimist')(process.argv.slice(2), {
alias: {
d: 'dir',
p: 'parallel',
s: 'scraper',
h: 'help'
},
'default': {
dir: config.dir,
parallel: config.parallel
}
})
if (argv.help) {
console.log(`usage: $ trombone [-d directory_path][-p number][-s scraper_path] uri1, uri2, ...
-d, --dir: specify a path of root directory. (default ${config.dir})
-p, --parallel: a number of work in parallel. (default 3)
-s, --scraper: specify a path of scraper module.
-h, --help show help.
`)
process.exit(0)
}
miss.each(arrayStream(argv._), function (u, done) {
var uri = String(u)
var o = url.parse(uri)
var d = path.basename(o.pathname).split('.')[0]
var dir = path.join(argv.dir, o.hostname, d)
var fullPath = path.resolve(process.env.PWD, dir)
var scraperPath = (argv.scraper)
? path.resolve(process.env.PWD, argv.scraper)
: path.join(config.scraperPath, o.hostname, 'scraper')
var scraper
try {
scraper = require(scraperPath)
} catch (err) {
return done(err)
}
mkdirp(dir, function (err) {
if (err) return done(err)
console.log('ok mkdirp --> "%s"', dir)
trombone(uri, scraper, {
parentDir: dir,
parallel: argv.parallel
}, function onEnd (err) {
if (err) return done(err)
notifier.notify({
title: o.hostname,
message: `done ${fullPath}`,
sound: true,
open: `file://${fullPath}`
})
done()
})
})
}, function (err) {
if (err) return console.error(err)
console.log('ended')
})
function arrayStream (as) {
var arry = [].concat(as)
return miss.from(function (size, done) {
if (arry.length <= 0) done(null, null) // done() ではダメ
else done(null, arry.shift())
})
}
var s = '.ently_text>div a[href^="http://file.example.net/uploads/img/"]'
module.exports = function scraper (tr, ws) {
var w = !1
var e = !1
tr.once('finish', () => !w && (e = !0) && ws.end())
tr.once('end', () => !e && (e = !0) && ws.end())
tr.selectAll(s, a => a.getAttribute('href', h => (w = !0) && ws.write(h)))
}
{
"name": "trombone",
"version": "0.1.1",
"main": "trombone.js",
"bin": {
"trombone": "./bin/trombone.js"
},
"files": [
"trombone.js",
"package.json",
"bin",
"scrapers"
],
"dependencies": {
"hyperquest": "^2.1.2",
"minimist": "^1.2.0",
"mississippi": "^1.3.0",
"mkdirp": "^0.5.1",
"node-notifier": "^5.1.2",
"parallel-transform": "^1.1.0",
"sprintf": "^0.1.5",
"trumpet": "^1.7.2",
"xtend": "^4.0.1"
}
}
'use strict'
var fs = require('fs')
var path = require('path')
var xtend = require('xtend')
var miss = require('mississippi')
var sprintf = require('sprintf').sprintf
var trumpet = require('trumpet')
var parallel = require('parallel-transform')
var hyperquest = require('hyperquest')
module.exports = function trombone (uri, scraper, opt, onEnd) {
onEnd || (onEnd = function _onEnd (err) { err && console.error(err) })
opt = xtend(opt)
var tr = trumpet()
var ws = miss.through()
try {
scraper(tr, ws)
} catch (err) {
return onEnd(err)
}
var parallels = 3
var c = -1
miss.pipe(
hyperquest(uri),
miss.duplex(tr, ws),
parallel(opt.pallale || parallels, function (_imgSrc, done) {
var imgSrc = String(_imgSrc)
var ext = path.extname(imgSrc)
var imgDst = path.join(opt.parentDir, sprintf('%03d%s', (c += 1), ext))
miss.pipe(
hyperquest(imgSrc),
fs.createWriteStream(imgDst),
function endOfDownload (err) {
done(err, {src: imgSrc, dst: imgDst})
}
)
}),
miss.through.obj(function (o, _, done) {
console.log('ok download "%s" --> "%s"', o.src, o.dst)
done()
}),
function endOfMultiDownloads (err) {
if (!err) console.log('!! success downlaod "%s" --> "%s"', uri, opt.parentDir)
onEnd(err)
}
)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment