Created
July 25, 2019 06:48
-
-
Save nikiizvorski/c6a90949f78ae4a9fe34884770c25a4d to your computer and use it in GitHub Desktop.
JS App using Cheerio and Request to Firebase Firestore Parser/Scraper
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var admin = require("firebase-admin"); | |
var serviceAccount = require("./recipebook-admin.json"); | |
admin.initializeApp({ | |
credential: admin.credential.cert(serviceAccount), | |
databaseURL: "dburl" | |
}); | |
const db = admin.firestore(); | |
let pork = db.collection('pork'); | |
var request = require('request'); | |
var cheerio = require('cheerio'); | |
request('recipe', function(err, resp, html) { | |
if (!err){ | |
const $ = cheerio.load(html); | |
const bot = $('h3.recipeDetailHeader.hideOnTabletToDesktop').text(); | |
console.log(bot); | |
getIds($); | |
} | |
}); | |
function getIds($) { | |
$('a').each(function () { | |
url = ($(this).attr('href')); | |
if (typeof url == 'string') { | |
extension = url.split('.').pop(); | |
var arrStr = getFromBetween.get(url, "website recipe", "/"); | |
if(arrStr != "") { | |
getRecipeDetails(arrStr); | |
} | |
} | |
}); | |
} | |
function getRecipeDetails(arrStr) { | |
const recipeAddr = "rep" + arrStr; | |
console.log(recipeAddr); | |
request(recipeAddr, function(err, resp, html) { | |
if (!err){ | |
const $ = cheerio.load(html); | |
const recipeName = $('h3.recipeDetailHeader.showOnTabletToDesktop').text(); | |
console.log(recipeName); | |
const recipeAuthor = $('span.submitterTitle.centerLineHeight').text(); | |
console.log(recipeAuthor); | |
const recipeDesc = $("meta[property='og:description']").attr("content"); | |
console.log(recipeDesc); | |
const recipeRating = $("meta[property='og:rating']").attr("content"); | |
console.log(recipeRating); | |
const recipeImage = $('img.recipeDetailSummaryImageMain').attr('src'); | |
console.log(recipeImage); | |
// ingredience | |
// const ingredient = $('li.checkListListItem.checkListLine').text(); | |
const ingredient = $('ul.multiColumn.listIngredients.clearfix span').attr('title'); | |
console.log(ingredient) | |
var result = []; | |
$('ul.multiColumn.listIngredients.clearfix li.checkListListItem.checkListLine').each(function(el) { | |
var $div = $(el).text() | |
console.log($div) | |
var obj = { | |
cookieName: $div | |
}; | |
result.push(obj); | |
}); | |
console.log('result', result); | |
// const arrayIngredients = []; | |
// arrayIngredients.push(ingredient); | |
// arrayIngredients.forEach(ingredientHandle); | |
// directions | |
const direction = $('span.recipeDirectionsListItem').text().split('.'); | |
const arrayDirections = []; | |
arrayDirections.push(direction); | |
arrayDirections.forEach(directionHandle); | |
const recipeTime = $('span.prepTime__item--time').text(); | |
console.log(recipeTime.substring(2, recipeTime.length)); | |
// pork.doc('' + arrStr).set({ | |
// name: recipeName, | |
// author: recipeAuthor, | |
// description: recipeDesc, | |
// rating: recipeRating, | |
// image: recipeImage, | |
// directions: { direction }, | |
// ingredients: { element }, | |
// time: recipeTime | |
// }); | |
} | |
}); | |
} | |
// let setAda = docRef.set({ | |
// first: 'Ada', | |
// last: 'Lovelace', | |
// born: 1815 | |
// }); | |
// console.log('write success') | |
// db.collection('users').get() | |
// .then((snapshot) => { | |
// snapshot.forEach((doc) => { | |
// console.log(doc.id, '=>', doc.data()); | |
// }); | |
// }) | |
// .catch((err) => { | |
// console.log('Error getting documents', err); | |
// }); | |
function ingredientHandle(item, arrStr) { | |
console.log(item.trim()); | |
} | |
function directionHandle(item) { | |
item.forEach(itemHandle) | |
} | |
function itemHandle(item, index) { | |
if(item != "") { | |
console.log(index + " " + item.trim()); | |
} | |
} | |
var getFromBetween = { | |
results:[], | |
string:"", | |
getFromBetween:function (sub1,sub2) { | |
if(this.string.indexOf(sub1) < 0 || this.string.indexOf(sub2) < 0) return false; | |
var SP = this.string.indexOf(sub1)+sub1.length; | |
var string1 = this.string.substr(0,SP); | |
var string2 = this.string.substr(SP); | |
var TP = string1.length + string2.indexOf(sub2); | |
return this.string.substring(SP,TP); | |
}, | |
removeFromBetween:function (sub1,sub2) { | |
if(this.string.indexOf(sub1) < 0 || this.string.indexOf(sub2) < 0) return false; | |
var removal = sub1+this.getFromBetween(sub1,sub2)+sub2; | |
this.string = this.string.replace(removal,""); | |
}, | |
getAllResults:function (sub1,sub2) { | |
// first check to see if we do have both substringsZ | |
if(this.string.indexOf(sub1) < 0 || this.string.indexOf(sub2) < 0) return; | |
// find one result | |
var result = this.getFromBetween(sub1,sub2); | |
// push it to the results array | |
this.results.push(result); | |
// remove the most recently found one from the string | |
this.removeFromBetween(sub1,sub2); | |
// if there's more substrings | |
if(this.string.indexOf(sub1) > -1 && this.string.indexOf(sub2) > -1) { | |
this.getAllResults(sub1,sub2); | |
} | |
else return; | |
}, | |
get:function (string,sub1,sub2) { | |
this.results = []; | |
this.string = string; | |
this.getAllResults(sub1,sub2); | |
return this.results | |
} | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment