Create a gist now

Instantly share code, notes, and snippets.

@oncletom /README.md
Last active Dec 20, 2016

What would you like to do?
Downloading daily archives of an Instagram Hashtag.

Instagram Hashtag Archiver

This script helps you to download locally the latest pictures related to a specific Instagram hashtag. It will fetch them and add them in a daily folder (like 2013-8-16 for the 16th of August 2013).

No resume feature. No extra metadata. No OAuth pain.

Install

You need to clone this Gist and install at least CasperJS 1.1.

brew update
brew install casperjs --devel

git clone https://gist.github.com/6245811.git instagram-hashtag-gist

Usage

cd instagram-hashtag-gist

# Downloading the 200 latest pictures of Hack the Barbican 2013
# > hackthebarbican.org
casperjs instagram-hashtag.js htb2013 --limit=200

# Downloading the 100 latest pictures related to Sud Web
# > http://sudweb.fr
casperjs instagram-hashtag.js sudweb

Sample Output

.
├── 2012-7-14
│   ├── 20d3e07acd9611e1b31022000a1d03a8_7.jpg
│   └── 27d48126cd9711e1a98b22000a1e879e_7.jpg
├── 2012-7-8
│   └── 19ba6698c8ef11e1985822000a1d011d_7.jpg
├── 2013-4-22
│   └── 425da056ab3e11e28b3722000a1f99d9_7.jpg
├── 2013-6-11
│   └── b1365f08d2d111e2aea022000a9d0ee7_7.jpg
└── 2013-7-13
    ├── 607ac52aeb8811e2be0d22000a9f14df_7.jpg
    ├── 68742bfaebd711e2974122000a9e2969_7.jpg
    ├── cf24dfb8ebbc11e28cf922000a1fbaf5_7.jpg
    ├── d75cdb0aebbb11e2b39c22000a1f8adc_7.jpg
    └── ef0e89eceb9811e2b83c22000a9e184f_7.jpg
"use strict";
var casper = require('casper').create({
waitTimeout: 10000,
pageSettings: {
loadImages: false,
loadPlugins: false
},
viewportSize: {
height: 1000,
width: 1024
}
});
var instagramTag = casper.cli.get(0);
var threshold = casper.cli.get('limit') || 100;
var baseUrl = "http://iconosquare.com/tag/" + instagramTag + '/';
var downloaded = [];
var queued = [];
if (!instagramTag){
casper.echo('Requiring at least a valid Instagram hashtag to query.').exit();
}
function queue(url){
queued.push(url.replace(/_s.jpg/, '_n.jpg'));
}
function processQueue(){
if (queued.length === 0){
return;
}
console.log("DOWNLAOD TIME!!!");
var count = 0;
casper.eachThen(queued, function(response){
this.thenOpen(response.data, function(response) {
var modified = new Date(response.headers.get("Last-Modified"));
var position = queued.indexOf(response.url);
casper.echo('Download #' + (++count) + ''+response.url, 'INFO');
casper.download(
response.url,
[modified.getUTCFullYear(), modified.getUTCMonth()+1, modified.getUTCDate()].join("-") + "/" + response.url.split("/").pop()
);
// Stacking in downloaded
// and removing the url from the queued array
downloaded.push(response.url);
queued = queued.slice(0, position).concat(queued.slice(position + 1));
});
});
}
function clickAndLoad(){
casper.click('.more');
casper.waitWhileVisible('#conteneurLoaderEnCours', function(){});
casper.then(function(){
var elements = casper.getElementsAttribute('.photos-wrapper .lienPhotoGrid:only-child img', 'src');
console.log("Found " + elements.length + " pictures…");
if (elements.length < threshold){
casper.waitForSelector(".more", clickAndLoad, function(){
elements.map(queue);
processQueue();
});
}
else{
elements.map(queue);
processQueue();
}
});
}
casper.start(baseUrl, clickAndLoad);
casper.run();

You don't need to install phantomjs before, brew will install phantomjs with casperjs as a dependency.

Owner

oncletom commented Aug 17, 2013

Oh nice, I did not know that :-) Thanks for sharing that info, I'll update the README.

betzerra commented Dec 6, 2014

Excelent gist. Thanks for this :-)

vixCY commented Apr 24, 2015

May i know why it show the folder 0 bytes and no photo can be viewed?

Same there.. I think a change or restriction in instagram. All seems going to work but the .jpg are zero bytes.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment