Skip to content

Instantly share code, notes, and snippets.

@brizandrew
Created December 6, 2016 21:12
Show Gist options
  • Save brizandrew/850ac0bb860bcac07f406cd6223dddc9 to your computer and use it in GitHub Desktop.
Save brizandrew/850ac0bb860bcac07f406cd6223dddc9 to your computer and use it in GitHub Desktop.
Basic web scraping example with node
/*
Basic web scraping example with node
Includes scape and SQL insert
Variant of guide found here: https://scotch.io/tutorials/scraping-the-web-with-node-js
Scrapes the title of Anchorman 2 and places it into SQL table
*/
var express = require('express');
var fs = require('fs');
var request = require('request');
var cheerio = require('cheerio');
var mysql = require('mysql');
var app = express();
var connection = mysql.createConnection({
host : '',
user : '',
password : '',
database : ''
});
function scrape(){
var url = 'http://www.imdb.com/title/tt1229340/';
request(url, function(error, response, html){
if(!error){
var $ = cheerio.load(html);
var result;
$('.title_wrapper').filter(function(){
var data = $(this);
title = data.children().first().text();
result = title;
});
saveToSQL(result);
}
})
}
function saveToSQL(title){
connection.connect();
connection.query('INSERT INTO test(text) VALUES(?)',[title],function(err,result){
console.log('Inserted ' + title + ' into table.')
});
connection.end();
}
app.get('/scrape', function(req, res){
scrape();
res.send('See console for results.')
});
app.listen('8000')
console.log('Listening on localhost:8000/scrape')
exports = module.exports = app;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment