Skip to content

Instantly share code, notes, and snippets.

@nikiizvorski
Created July 25, 2019 06:48
Show Gist options
  • Save nikiizvorski/c6a90949f78ae4a9fe34884770c25a4d to your computer and use it in GitHub Desktop.
Save nikiizvorski/c6a90949f78ae4a9fe34884770c25a4d to your computer and use it in GitHub Desktop.
JS App using Cheerio and Request to Firebase Firestore Parser/Scraper
var admin = require("firebase-admin");
var serviceAccount = require("./recipebook-admin.json");
admin.initializeApp({
credential: admin.credential.cert(serviceAccount),
databaseURL: "dburl"
});
const db = admin.firestore();
let pork = db.collection('pork');
var request = require('request');
var cheerio = require('cheerio');
request('recipe', function(err, resp, html) {
if (!err){
const $ = cheerio.load(html);
const bot = $('h3.recipeDetailHeader.hideOnTabletToDesktop').text();
console.log(bot);
getIds($);
}
});
function getIds($) {
$('a').each(function () {
url = ($(this).attr('href'));
if (typeof url == 'string') {
extension = url.split('.').pop();
var arrStr = getFromBetween.get(url, "website recipe", "/");
if(arrStr != "") {
getRecipeDetails(arrStr);
}
}
});
}
function getRecipeDetails(arrStr) {
const recipeAddr = "rep" + arrStr;
console.log(recipeAddr);
request(recipeAddr, function(err, resp, html) {
if (!err){
const $ = cheerio.load(html);
const recipeName = $('h3.recipeDetailHeader.showOnTabletToDesktop').text();
console.log(recipeName);
const recipeAuthor = $('span.submitterTitle.centerLineHeight').text();
console.log(recipeAuthor);
const recipeDesc = $("meta[property='og:description']").attr("content");
console.log(recipeDesc);
const recipeRating = $("meta[property='og:rating']").attr("content");
console.log(recipeRating);
const recipeImage = $('img.recipeDetailSummaryImageMain').attr('src');
console.log(recipeImage);
// ingredience
// const ingredient = $('li.checkListListItem.checkListLine').text();
const ingredient = $('ul.multiColumn.listIngredients.clearfix span').attr('title');
console.log(ingredient)
var result = [];
$('ul.multiColumn.listIngredients.clearfix li.checkListListItem.checkListLine').each(function(el) {
var $div = $(el).text()
console.log($div)
var obj = {
cookieName: $div
};
result.push(obj);
});
console.log('result', result);
// const arrayIngredients = [];
// arrayIngredients.push(ingredient);
// arrayIngredients.forEach(ingredientHandle);
// directions
const direction = $('span.recipeDirectionsListItem').text().split('.');
const arrayDirections = [];
arrayDirections.push(direction);
arrayDirections.forEach(directionHandle);
const recipeTime = $('span.prepTime__item--time').text();
console.log(recipeTime.substring(2, recipeTime.length));
// pork.doc('' + arrStr).set({
// name: recipeName,
// author: recipeAuthor,
// description: recipeDesc,
// rating: recipeRating,
// image: recipeImage,
// directions: { direction },
// ingredients: { element },
// time: recipeTime
// });
}
});
}
// let setAda = docRef.set({
// first: 'Ada',
// last: 'Lovelace',
// born: 1815
// });
// console.log('write success')
// db.collection('users').get()
// .then((snapshot) => {
// snapshot.forEach((doc) => {
// console.log(doc.id, '=>', doc.data());
// });
// })
// .catch((err) => {
// console.log('Error getting documents', err);
// });
function ingredientHandle(item, arrStr) {
console.log(item.trim());
}
function directionHandle(item) {
item.forEach(itemHandle)
}
function itemHandle(item, index) {
if(item != "") {
console.log(index + " " + item.trim());
}
}
var getFromBetween = {
results:[],
string:"",
getFromBetween:function (sub1,sub2) {
if(this.string.indexOf(sub1) < 0 || this.string.indexOf(sub2) < 0) return false;
var SP = this.string.indexOf(sub1)+sub1.length;
var string1 = this.string.substr(0,SP);
var string2 = this.string.substr(SP);
var TP = string1.length + string2.indexOf(sub2);
return this.string.substring(SP,TP);
},
removeFromBetween:function (sub1,sub2) {
if(this.string.indexOf(sub1) < 0 || this.string.indexOf(sub2) < 0) return false;
var removal = sub1+this.getFromBetween(sub1,sub2)+sub2;
this.string = this.string.replace(removal,"");
},
getAllResults:function (sub1,sub2) {
// first check to see if we do have both substringsZ
if(this.string.indexOf(sub1) < 0 || this.string.indexOf(sub2) < 0) return;
// find one result
var result = this.getFromBetween(sub1,sub2);
// push it to the results array
this.results.push(result);
// remove the most recently found one from the string
this.removeFromBetween(sub1,sub2);
// if there's more substrings
if(this.string.indexOf(sub1) > -1 && this.string.indexOf(sub2) > -1) {
this.getAllResults(sub1,sub2);
}
else return;
},
get:function (string,sub1,sub2) {
this.results = [];
this.string = string;
this.getAllResults(sub1,sub2);
return this.results
}
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment