Skip to content

Instantly share code, notes, and snippets.

View gahabeen's full-sized avatar

Gabin Desserprit gahabeen

View GitHub Profile
@gahabeen
gahabeen / index.html
Last active January 27, 2017 22:43
List of companies
<!DOCTYPE html>
<html lang="en">
<head>
</head>
<body>
<!-- Data we want to scrape starts here -->
<div class="list items">
<div class="item">
<div class="header">
@gahabeen
gahabeen / structureofdata.txt
Last active January 27, 2017 22:45
Companies Structure
company
|_ name
|_ description
|_ url
|_ contact
|_ telephone
|_ employee
|_ name
|_ jobTitle
|_ email
@gahabeen
gahabeen / selectorsofdata.txt
Last active January 27, 2017 22:45
Selectors Structure of Data
company : .list.items .item
|_ name : .header [itemprop=name]
|_ description : .header [rel=description]
|_ url : .header [itemprop=name] a
|_ contact : .contact
|_ telephone : [itemprop=telephone]
|_ employee
|_ name : [itemprop=employeeName]
|_ jobTitle : [itemprop=employeeJobTitle]
|_ email : [itemprop=email]
@gahabeen
gahabeen / barecheerioextractdata.js
Last active February 2, 2018 20:06
Bare Cheerio Code to Extract Data
let cheerio = require('cheerio')
let $ = cheerio.load('our html page url here')
var companiesList = [];
// For each .item, we add all the structure of a company to the companiesList array
// Don't try to understand what follows because we will do it differently.
$('.list.items .item').each(function(index, element){
companiesList[index] = {};
var header = $(element).find('.header');
@gahabeen
gahabeen / jsonframedataextraction.js
Last active January 28, 2017 10:41
jsonframe data extraction
let cheerio = require('cheerio');
let jsonframe = require('jsonframe-cheerio');
let $ = cheerio.load('our html page url here');
jsonframe($); // initializes the plugin
var frame = {
"companies": { // setting the parent item as "companies"
"selector": ".item", // defines the elements to search for
"data": [{ // "data": [{}] defines a list of items
@gahabeen
gahabeen / barecheerioextractdataoutput.js
Created January 27, 2017 22:47
Bare Cheerio Code to Extract Data Output
// Here is the output data:
// [
// {
// "name": " Tessera ",
// "description": " Proud of our wide range of product\n\t\t\t\twe developped many project in the past 4 years. You can find the company \n\t\t\t\tin 14 different countries ",
// "contact": {
// "telephone": " Phone: (841) 467-168 ",
// "employee": {
// "name": " Mike Layn ",
// "jobTitle": " Marketing Assistant",
@gahabeen
gahabeen / jsonframedataextractionoutput.js
Created January 27, 2017 22:50
jsonframe data extraction output
// Here is the output data:
// {
// "companies": [
// {
// "name": "Tessera",
// "description": "Proud of our wide range of product we developped many project in the past 4 years. You can find the company in 14 different countries in the world. Blablabla.",
// "url": "/comp/tessera",
// "contact": {
// "telephone": "841467168",
// "employee": {
let cheerio = require('cheerio');
let jsonframe = require('jsonframe-cheerio');
request('https://www.growthhacking.fr', function (error, response, html) {
if (!error && response.statusCode == 200) {
let $ = cheerio.load('https://www.growthhacking.fr');
jsonframe($); // initializes the plugin
var frame = {
"post": { // setting the parent item as "companies"
product
|_ name
|_ description
|_ image
|_ upvotes
|_ comments
product : ul.postsList_3n2Ck li
|_ name : .content_3Qj0y .title_24w6f
|_ description : .content_3Qj0y .subtle_fyrho
|_ image : img (attr: src)
|_ upvotes : [data-test=vote-button] .buttonContainer_1ROJn
|_ comments : [data-test=vote-button] + a .buttonContainer_1ROJn