Skip to content

Instantly share code, notes, and snippets.

@benzen
Created September 6, 2015 22:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save benzen/10c7aa003f5cdd707a48 to your computer and use it in GitHub Desktop.
Save benzen/10c7aa003f5cdd707a48 to your computer and use it in GitHub Desktop.
Une impression de déjà vue ici
cheerio = require "cheerio"
superagent = require "superagent"
async = require "async"
_ = require "lodash"
BOY_URLS = [
"http://www.mamanpourlavie.com/prenoms/lettre/a/garcon",
"http://www.mamanpourlavie.com/prenoms/lettre/b/garcon",
"http://www.mamanpourlavie.com/prenoms/lettre/c/garcon",
"http://www.mamanpourlavie.com/prenoms/lettre/d/garcon",
"http://www.mamanpourlavie.com/prenoms/lettre/e/garcon",
"http://www.mamanpourlavie.com/prenoms/lettre/f/garcon",
"http://www.mamanpourlavie.com/prenoms/lettre/g/garcon",
"http://www.mamanpourlavie.com/prenoms/lettre/h/garcon",
"http://www.mamanpourlavie.com/prenoms/lettre/i/garcon",
"http://www.mamanpourlavie.com/prenoms/lettre/j/garcon",
"http://www.mamanpourlavie.com/prenoms/lettre/k/garcon",
"http://www.mamanpourlavie.com/prenoms/lettre/l/garcon",
"http://www.mamanpourlavie.com/prenoms/lettre/m/garcon",
"http://www.mamanpourlavie.com/prenoms/lettre/n/garcon",
"http://www.mamanpourlavie.com/prenoms/lettre/o/garcon",
"http://www.mamanpourlavie.com/prenoms/lettre/p/garcon",
"http://www.mamanpourlavie.com/prenoms/lettre/q/garcon",
"http://www.mamanpourlavie.com/prenoms/lettre/r/garcon",
"http://www.mamanpourlavie.com/prenoms/lettre/s/garcon",
"http://www.mamanpourlavie.com/prenoms/lettre/t/garcon",
"http://www.mamanpourlavie.com/prenoms/lettre/u/garcon",
"http://www.mamanpourlavie.com/prenoms/lettre/v/garcon",
"http://www.mamanpourlavie.com/prenoms/lettre/w/garcon",
"http://www.mamanpourlavie.com/prenoms/lettre/x/garcon",
"http://www.mamanpourlavie.com/prenoms/lettre/y/garcon",
"http://www.mamanpourlavie.com/prenoms/lettre/z/garcon",
]
getPage = (url, cb) ->
console.log "getting page #{url}"
superagent
.get(url)
.end (err, response) ->
if err
cb err
else
cb null, response.text
parsePage = (page, cb) ->
console.log "parsing page"
cb null, cheerio.load(page)
fetchNames = ($, cb) ->
console.log "fetching names"
names = $('li.col4')
.text()
.split("\n")
.map (baddlyFormattedName) ->
baddlyFormattedName
.replace(/\s?\t?/g, "")
.filter (name) -> name != ""
cb null, names
unit = (sex) -> (names, cb) ->
async.map names, (name, cb) ->
cb null,
name: name
sex: sex
tags: []
, cb
processPage = (sex) -> (url, cb) ->
async.seq(getPage,
parsePage,
fetchNames,
unit(sex))(url, cb)
async.map BOY_URLS, processPage("male"), (err, listOfListOfNames) ->
names = _.flatten(listOfListOfNames)
console.log names
console.log "DONE"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment