Created
September 10, 2016 11:57
-
-
Save aloisdg/50a4f61e6dc3d7c5e56ba60be311f4b1 to your computer and use it in GitHub Desktop.
A tiny scrapper made with TypeScript and Cheerio
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// https://www.codementor.io/nodejs/tutorial/how-to-write-a-web-scraper-in-nodejs | |
const cheerioReq = require("cheerio-req") | |
const fs = require('fs') | |
class Career { // secteur professionnel | |
name: string // private set | |
jobs: string[] // private set | |
constructor(name: string) { | |
this.name = name | |
this.jobs = new Array<string>() | |
} | |
add(job: string): void { | |
this.jobs.push(job); | |
} | |
} | |
cheerioReq("http://fr.wikimini.org/wiki/Liste_des_m%C3%A9tiers", (err, $) => { | |
let careers = new Array<Career>() | |
$('h2').each(function (i, elem) { | |
careers.push(new Career($(this).text().trim())) | |
$(this).next().children().each(function (_, elem) { | |
careers[i].add($(this).text().trim()) | |
}) | |
}) | |
// remove trailing h2, we could use cheerio slice too | |
careers.shift() | |
careers.pop() | |
fs.writeFile("jobs.json", JSON.stringify(careers), err => { | |
if (err) throw err | |
console.log('It\'s saved!') | |
}) | |
}) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment