Skip to content

Instantly share code, notes, and snippets.

@zcaceres
Created January 21, 2020 21:16
Show Gist options
  • Save zcaceres/fd2885d389f0d3aae065d3ff21c75a55 to your computer and use it in GitHub Desktop.
Save zcaceres/fd2885d389f0d3aae065d3ff21c75a55 to your computer and use it in GitHub Desktop.
A simple parser for pulling structured data from the HTML of an Instagram profile.
const jsdom = require('jsdom')
const {
JSDOM
} = jsdom
function toJSDOM(responseBody) {
return new JSDOM(responseBody)
}
/**
* Pull structured data from the HTML of an Instagram page (e.g. www.instagram.com/theshaderoom)
*/
module.exports = function parse(body) {
const rawDom = toJSDOM(body)
const dom = rawDom.window.document.body
function formatNumber(text, labelText) {
let num = text.replace(labelText, '').replace(/,/g, '')
// '17.4m'
if (text.includes('m')) {
// 17.4
// expand out millions
condensedNum = Number(condensedNum.replace('m', '')) * 1000000
}
return Number(num.trim())
}
const blocks = {
posts: null,
followers: null,
following: null
}
// Map description row chunks (mostly lacking good semantic CSS) into structured data
Array.from(dom.querySelectorAll('ul li'))
.map(node => node.textContent)
.forEach(textNode => {
// '128,123 posts'
if (textNode.includes(' posts')) {
blocks.posts = formatNumber(textNode, ' posts')
// '27.4m followers'
} else if (textNode.includes(' followers')) {
blocks.followers = formatNumber(textNode, ' followers', '')
// '128,123 following'
} else if (textNode.includes(' following')) {
blocks.following = formatNumber(textNode, ' following', '')
}
})
return {
name: dom.querySelector('h1').textContent,
isVerified: Boolean(dom.querySelector('.coreSpriteVerifiedBadge')),
...blocks
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment