Last active
January 21, 2016 05:26
-
-
Save niraj-shah/39454eb66296ac737d6e to your computer and use it in GitHub Desktop.
NodeJS example of how to scrape a webpage and retrieve the details
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// include the libraries we need | |
var request = require('request'); | |
var cheerio = require('cheerio'); | |
// set some defaults | |
req = request.defaults({ | |
jar: true, // save cookies to jar | |
rejectUnauthorized: false, | |
followAllRedirects: true // allow redirections | |
}); | |
// scrape the page | |
req.get({ | |
url: "http://www.whatsmyipaddress.net/", | |
headers: { | |
'User-Agent': 'Super Cool Browser' // optional headers | |
} | |
}, function(err, resp, body) { | |
// load the html into cheerio | |
var $ = cheerio.load(body); | |
// get the data and output to console | |
console.log( 'IP: ' + $('.inner_cntent:nth-child(1) span').text() ); | |
console.log( 'Host: ' + $('.inner_cntent:nth-child(2) span').text() ); | |
console.log( 'UA: ' + $('.browser span').text() ); | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment