Skip to content

Instantly share code, notes, and snippets.

@andersonbosa
Last active May 31, 2022 12:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save andersonbosa/13684328cf2b86c9d2df4108e3de515b to your computer and use it in GitHub Desktop.
Save andersonbosa/13684328cf2b86c9d2df4108e3de515b to your computer and use it in GitHub Desktop.
function parseResponseToDOM (html) {
var parser = new DOMParser()
var dom = parser.parseFromString(html, "text/html")
return {parser, dom}
}
async function getDomFromUrl (url) {
return fetch(url)
.then(resp => resp.text())
.then(parseResponseToDOM)
.catch(err => console.log(err))
}
const pensadorDom = await getDomFromUrl('https://www.pensador.com/poemas_de_amor/1/')
const phraseBlocks = Array.from(pensadorDom.dom.querySelectorAll('[id*=phrase-block]'))
function getInnerText (domItem, selector='') {
if (!selector) {
return domItem.innerText.trim()
}
const item = domItem.querySelector(selector)
return item && item.innerText.trim()
}
}
function parsePhrases (domItem) {
return {
author: getInnerText(domItem, 'span.autor').replace(/\n/g, ''),
phrase: getInnerText(domItem, 'p.frase')
}
}
const parsedPhrases = phraseBlocks.map(parsePhrases)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment