Skip to content

Instantly share code, notes, and snippets.

@Maxim-Mazurok
Last active September 17, 2018 08:11
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Maxim-Mazurok/404755f94091ab748f257309779c134b to your computer and use it in GitHub Desktop.
Save Maxim-Mazurok/404755f94091ab748f257309779c134b to your computer and use it in GitHub Desktop.
Scrape Instagram posts by hashtag in NodeJS (ES6)
/* Inspired by: https://github.com/AH72KING/Instagram-scraping/blob/master/instagram_hashtag_images.php */
const https = require('https');
const getContent = function (url) {
return new Promise((resolve, reject) => {
const lib = url.startsWith('https') ? require('https') : require('http');
const request = lib.get(url, (response) => {
if (response.statusCode < 200 || response.statusCode > 299) {
reject(new Error('Failed to load page, status code: ' + response.statusCode));
}
const body = [];
response.on('data', (chunk) => body.push(chunk));
response.on('end', () => resolve(body.join('')));
});
request.on('error', (err) => reject(err))
})
};
function scrape_insta_hash(tag) {
return new Promise((resolve, reject) => {
getContent(`https://www.instagram.com/explore/tags/${tag}/`)
.then(insta_source => {
let shards = insta_source.split('window._sharedData = ');
let insta_json = shards[1].split(';</script>');
let insta_array = JSON.parse(insta_json[0]);
let images = insta_array['entry_data']['TagPage'][0]['graphql']['hashtag']['edge_hashtag_to_media']['edges'].map(x => x['node']['display_url']);
resolve(images);
})
.catch(err => reject(err));
})
}
scrape_insta_hash('it_vocabulary').then(images => console.log(images));
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment