Last active
January 7, 2018 20:01
-
-
Save kevinchisholm/5e1b27dde8546344be51d670618607be to your computer and use it in GitHub Desktop.
Code Examples for my Blog Post: Web Scraping with Node and Cheerio.js
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var fs = require('fs'), | |
request = require('request'), | |
cheerio = require('cheerio'), | |
pageURL = 'http://output.jsbin.com/xavuga'; | |
function scrapePage () { | |
//make an HTTP request for the page to be scraped | |
request(pageURL, function(error, response, responseHtml){ | |
//write the entire scraped page to the local file system | |
fs.writeFile(__dirname + '/HTML/entire-page.html', responseHtml, function(err){ | |
console.log('entire-page.html successfully written to HTML folder'); | |
}) | |
}) ; | |
} | |
//scrape the page | |
scrapePage(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function scrapePage () { | |
//make an HTTP request for the page to be scraped | |
request(pageURL, function(error, response, responseHtml){ | |
//write the entire scraped page to the local file system | |
fs.writeFile(__dirname + '/HTML/entire-page.html', responseHtml, function(err){ | |
console.log('entire-page.html successfully written to HTML folder'); | |
}) | |
//write isolated sections of the entire scraped page to the local file system | |
//create the cheerio object | |
var $ = cheerio.load(responseHtml), | |
//create a reference to the header element | |
$header = $('header').html(); | |
//write the header to the local file system | |
fs.writeFile(__dirname + '/HTML/header.html', $header, function(err){ | |
console.log('header.html successfully written to HTML folder'); | |
}); | |
}); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function scrapePage () { | |
//make an HTTP request for the page to be scraped | |
request(pageURL, function(error, response, responseHtml){ | |
//write the entire scraped page to the local file system | |
fs.writeFile(__dirname + '/HTML/entire-page.html', responseHtml, function(err){ | |
console.log('entire-page.html successfully written to HTML folder'); | |
}) | |
//write isolated sections of the entire scraped page to the local file system | |
//create the cheerio object | |
var $ = cheerio.load(responseHtml), | |
//create a reference to the header element | |
$header = $('header').html(), | |
$content = $('#mainContent').html(), | |
$footer = $('footer').html(); | |
//write the header to the local file system | |
fs.writeFile(__dirname + '/HTML/header.html', $header, function(err){ | |
console.log('header.html successfully written to HTML folder'); | |
}); | |
//write the content to the local file system | |
fs.writeFile(__dirname + '/HTML/content.html', $content, function(err){ | |
console.log('content.html successfully written to HTML folder'); | |
}) | |
//write the footer to the local file system | |
fs.writeFile(__dirname + '/HTML/footer.html', $footer, function(err){ | |
console.log('footer.html successfully written to HTML folder'); | |
}); | |
}); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment