Skip to content

Instantly share code, notes, and snippets.

@pchrysa
Last active February 10, 2017 10:27
Show Gist options
  • Save pchrysa/e284153a4f226e2c7468aae0db26cf5c to your computer and use it in GitHub Desktop.
Save pchrysa/e284153a4f226e2c7468aae0db26cf5c to your computer and use it in GitHub Desktop.
Scrape with Cheerio
var fs = require('fs');
var request = require('sync-request');
var cheerio = require('cheerio');
var _ = require('lodash');
const months = ['gennaio', 'febbraio', 'marzo', 'aprile', 'maggio', 'giugno', 'luglio', 'agosto', 'settembre', 'ottobre', 'novembre', 'dicembre'];
var dates = [];
const url = 'http://m.paginainizio.com/onomasticimob.php?mese=';
for (var m in months) {
console.log(months[m], '!!month!!');
var b = request('GET', url + months[m]);
var $ = cheerio.load(b.getBody('utf8'));
var dayCalendarElement = $('div.daycal');
for (var i = 0; i < dayCalendarElement.length; i++) {
var elementsUntilNextDay = $(dayCalendarElement[i]).nextUntil('div.daycal');
var dayNumber = $(dayCalendarElement[i]).first().text();
dayNumber = ((Number(dayNumber)+1 < 10) ? "0" + Number(dayNumber) : Number(dayNumber));
var names = [];
for (var k in elementsUntilNextDay) {
if (elementsUntilNextDay[k].name === 'a') {
var name = $(elementsUntilNextDay[k]).children('div.cateon').first().text();
name = _.startCase(_.toLower(name));
names.push(name);
}
}
var monthNumber = Number(m) + 1;
monthNumber = monthNumber.toString().length === 1 ? `0${monthNumber}` : monthNumber;
dates.push({
names: names,
date: `${dayNumber}/${monthNumber}`
})
}
}
fs.writeFile('it_namedays.json', JSON.stringify({data: dates}, null, 4), function(err){
console.log('File successfully written! - ./it_namedays.json file');
})
{
"name": "scrapper",
"version": "1.0.0",
"description": "Scrap html italian namedays site to json",
"main": "index.js",
"scripts": {
"start": "node index.js"
},
"keywords": [
"scraper",
"html-to-json"
],
"author": "Chrysa Papadopoulou",
"license": "MIT",
"dependencies": {
"cheerio": "^0.22.0",
"lodash": "^4.17.4",
"sync-request": "^4.0.1"
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment