Skip to content

Instantly share code, notes, and snippets.

@zarac
Created February 4, 2014 20:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save zarac/8811994 to your computer and use it in GitHub Desktop.
Save zarac/8811994 to your computer and use it in GitHub Desktop.
narpm.org-scraper
/*
* A scraper for narpm.org, as described at
* https://www.odesk.com/o/jobs/job/_~011b56c2f6e6ad6885/
*
* The job got cancelled and therefore this script.
*/
/* libz */
var request = require('request')
var $ = require('cheerio')
var fs = require('fs')
/* cfg */
var baseUrl = 'http://www.narpm.org/search/search-managers.html?submitted=true&a=managers_by_zip&xRadius=102&xZipCode='
var zipCodes = [ 10001 ]
/* execute */
zipCodes.forEach(function(zip) {
console.log('request', zip)
request(baseUrl + zip, function(err, res, body) {
console.log('gotz', zip)
if (!err && res.statusCode == 200) {
var page = $.load(body)
var entries = page('div.darkbg, div.lightbg')
console.log('entries.length', entries.length)
entries.map(function(i, e) {
if (i < 2) {
var $e = $(e)
// name
console.log('name = ', $($e.find('table.datalist td[width=175]')[0]).text())
//console.log('i, e', i, e)
} } )
fs.writeFile('data/' + zip, body, function() {
console.log('wrote', zip) } ) } } ) } )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment