Last active
May 7, 2021 08:56
-
-
Save ankitshekhawat/95cdcd7fc045bf7246d44858fe7f6bc5 to your computer and use it in GitHub Desktop.
A basic scraper for google sheets using CheerioGS #appscript #google #sheets
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Author: Ankit Shekhawat | |
// Copy this code | |
// Import CheerioGS lib with script id: 1ReeQ6WO8kKNxoaA_O0XEQ589cIrRvEBA9qcWpNqdOP17i47u6N9M5Xh0 | |
// Put Identifier as "Cheerio" | |
// Version as 12 | |
// ImportXML has limitations | |
/** | |
* A function that lets you parse a url with as css selector and return the text inside that element. only gets the first element found. | |
* | |
* @param {String} url The url to be parsed. | |
* @param {String} cssSelector The Css Selector string. | |
* @return {string} The text inside the element. | |
*/ | |
function PARSEtext(url, cssSelector) { | |
if (url === undefined){ | |
return "Empty URL"; | |
} | |
if(cssSelector == undefined){ | |
return "Empty CSS Selector" | |
} | |
const obj = getObj_(url, cssSelector); | |
return obj.text() | |
} | |
/** | |
* A function that lets you parse a url with as css selector and return the src attribute of that element. Used for images. Only gets the first element found. | |
* | |
* @param {String} url The url to be parsed. | |
* @param {String} cssSelector The Css Selector string. | |
* @return {string} The image Url. | |
*/ | |
function PARSEsrc(url, cssSelector) { | |
if (url === undefined){ | |
return "Empty URL"; | |
} | |
if(cssSelector == undefined){ | |
return "Emptu CSS Selector" | |
} | |
const obj = getObj_(url, cssSelector); | |
return obj.attr('src') | |
} | |
/** | |
* A function that lets you parse a url with as css selector and return an attribute of that element. Only gets the first element found. | |
* | |
* @param {String} url The url to be parsed. | |
* @param {String} cssSelector The Css Selector string. | |
* * @param {String} attr The attibute that you want to extract. | |
* @return {string} The attribute string. | |
*/ | |
function PARSEattr(url, cssSelector, attr) { | |
if (url === undefined){ | |
return "Empty URL"; | |
} | |
if(cssSelector == undefined){ | |
return "Empty CSS Selector" | |
} | |
if(attr == undefined){ | |
return "Empty ATTR String" | |
} | |
const obj = getObj_(url, cssSelector); | |
return obj.attr(attr) | |
} | |
// Internal functions: | |
function getObj_(url, cssSelector){ | |
const html = UrlFetchApp.fetch(url).getContentText(); | |
const $ = Cheerio.load(html); | |
return $(cssSelector).first() | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment