Skip to content

Instantly share code, notes, and snippets.

@andykais
Last active January 16, 2019 01:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save andykais/cdc12846203c524dc33ee4b0b749d994 to your computer and use it in GitHub Desktop.
Save andykais/cdc12846203c524dc33ee4b0b749d994 to your computer and use it in GitHub Desktop.
// current design
import ScrapePages from '../src'
import { normalizeConfig } from '../lib/normalize-config'
const config = { scrape: {} }
const options = {
input: {
username: 'bob'
},
optionsEach: {
gallery: {
downloadPriority: 1,
logLevel: 'info'
}
},
logger: {
level: 'debug',
useFile: 'downloads/record.log'
}
}
const fullConfig = normalizeConfig(config)
const siteScraper = new Scraper(config)
const emitter = siteScraper.run(options)
emitter.on('close', queryFor => queryFor().then(console.log))
emit('stop', () => console.log('stopped.'))
// --=== API proposals ===-- //
/**
* First Design:
* least stateful
*/
import { scraper, querier } from 'scraper'
import * as util from 'scraper/util'
// utils
const scrapers = util.flatten(config)
const fullConfig = util.normalize(config)
util.verify({ config, options })
// main
const { on, emit, query } = scraper(config, options)
on('done', () => {
const results = query({ scrapers: ['image'] })
})
emit('stop')
// instantiate another sqlite db instance
const query = querier(config, options)
/**
* Second Design:
* class instance only contains config state, re-run with different options/input
*/
import * as scraper from 'scraper' // util is available here too
import * as util from 'scraper/util'
// utils
const scrapers = util.flatten(config)
const fullConfig = util.normalize(config)
util.verifyConfig(config)
util.verifyOptions(options)
// main
const siteScraper = scraper.createScraper(config)
const { on, emit, query } = siteScraper.run(options) // push all async tasks into observable (creating folders)
on('done', () => {
const results = query({ scrapers: ['image'] })
})
emit('stop')
// instantiate another sqlite db instance
const query = scraper.createQuerier(config, options)
/**
* Third Design:
* most stateful, but allows instantiating two classes pointing at db
* one instance for scraping and one for querying (possibly two different worker threads)
*/
import { Scraper } from 'scraper'
// utils
Scraper.verify(config)
const fullConfig = Scraper.normalize(config)
const scrapers = Scraper.flatten(config)
Scraper.verify({ config, options })
// main
const siteScraper = new Scraper(config, options)
await siteScraper.init() // create folders, instantiate sqlite db, sync or async?
siteScraper.start()
const results = siteScraper.query({ scrapers: ['image'] }) // query can be called w/out calling start(), so long as init() has been called
siteScraper.on('done', () => {
const results = siteScraper.query({ scrapers: ['image'] })
})
siteScraper.emit('stop')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment