Skip to content

Instantly share code, notes, and snippets.

@cirops
Created November 16, 2017 23:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cirops/a389aa78f37d6ef6aa36ecc2986e8d13 to your computer and use it in GitHub Desktop.
Save cirops/a389aa78f37d6ef6aa36ecc2986e8d13 to your computer and use it in GitHub Desktop.
Simple example script to grab data from a paginated website using xmlhttprequest and cheerio
var cheerio = require('cheerio');
var XMLHttpRequest = require("xmlhttprequest").XMLHttpRequest;
function httpGet(theUrl) {
var xmlHttp = new XMLHttpRequest();
xmlHttp.open("GET", theUrl, false); // false for synchronous request
xmlHttp.send(null);
return xmlHttp.responseText;
}
for (page = 1; page <= 7; page++) {
query = `http://cempre.org.br/servico/pesquisa/lista/v3/12/v4/R/pg/${page}`;
result = httpGet(query);
$ = cheerio.load(result);
titles = $('.box-marg-1').find('.tit');
addresses = $('.box-marg-1').find('.txt > div:nth-child(1)');
neighborhoods = $('.box-marg-1').find('.txt > div:nth-child(2)');
cities = $('.box-marg-1').find('.txt > div:nth-child(3)');
ceps = $('.box-marg-1').find('.txt > div:nth-child(4)');
materials = $('.box-marg-1').find('.txt > div:nth-child(5)');
for (i = 0; i < titles.length; i++) {
console.log(`${$(titles[i]).text()}|${$(addresses[i]).text()}|${$(neighborhoods[i]).text()}|${$(cities[i]).text()}|${$(ceps[i]).text()}|${$(materials[i]).text()}`);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment