dev.to top posts scraping script
// navigate to https://dev.to/top/infinity and scroll untill postsData's length is more than 500 | |
postsCount = 500 | |
postsData = [...document.querySelectorAll('#substories .single-article')] | |
.slice(0, postsCount) | |
.map(post => { | |
try { | |
const [author, date] = post.querySelector('h4').textContent.trim().split('・') | |
const postData = { | |
title: post.querySelector('h3').textContent.trim(), | |
author, | |
date, | |
tags: post.querySelector('.tags').textContent.trim().split(/\s+/), | |
reactionsCount: parseInt(post.querySelector('.reactions-count .engagement-count-number').textContent.trim(), 10), | |
commentsCount: parseInt(post.querySelector('.comments-count .engagement-count-number').textContent.trim(), 10), | |
} | |
return postData; | |
} catch (e) { | |
// There are two ill formatted posts and we are going to ignore them | |
console.log(e); | |
} | |
}) | |
.filter(Boolean) | |
JSON.stringify(postsData, null, 2) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment