Skip to content

Instantly share code, notes, and snippets.

@256cats
Created May 8, 2015 10:17
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save 256cats/c84b602d3ea8253d81aa to your computer and use it in GitHub Desktop.
Save 256cats/c84b602d3ea8253d81aa to your computer and use it in GitHub Desktop.
Instagram (API) popular scraper and downloader with redis and curl, description here: http://256cats.com/how-to-scrape-instagram-and-quickly-download-images/
<?php
$dir = __DIR__.'/photos';
$redis = new Redis();
$redis->connect('127.0.0.1', 6379);
function get($url) {
//curl get
echo $url."\n";
$curlOptions = array(
CURLOPT_ENCODING => 'gzip,deflate',
CURLOPT_AUTOREFERER => 1,
CURLOPT_CONNECTTIMEOUT => 10, // timeout on connect
CURLOPT_TIMEOUT => 10, // timeout on response
CURLOPT_URL => $url,
CURLOPT_SSL_VERIFYPEER => false,
CURLOPT_SSL_VERIFYHOST => false,
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_MAXREDIRS => 9,
CURLOPT_RETURNTRANSFER => 1,
CURLOPT_HEADER => 0,
CURLOPT_USERAGENT => "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36",
CURLOPT_VERBOSE => true,
CURLINFO_HEADER_OUT => true,
);
$curl = curl_init();
curl_setopt_array($curl, $curlOptions);
$data = curl_exec($curl);
curl_close($curl);
return $data;
}
while(true) {
$item = $redis->brPop('photo:queue', 10); // wait until we get new item from Redis
$retry = 1;
if($item) {
$item = unserialize($item[1]);
$filename = $dir.'/'.$item['filename'];
if(!file_exists($filename)) {
while(!($photo = get($item['images']->standard_resolution->url))) {
echo "retrying download {$retry}\n";
sleep(2);
$retry++;
}
file_put_contents($filename, $photo);
echo "Loaded {$filename}\n";
}
} else {
echo "no items in Redis\n";
}
}
<?php
require_once 'vendor/autoload.php';
use MetzWeb\Instagram\Instagram;
date_default_timezone_set('UTC');
$redis = new Redis();
$redis->connect('127.0.0.1', 6379);
$instagram = new Instagram(array(
'apiKey' => 'YOUR_APP_KEY',
'apiSecret' => 'YOUR_APP_SECRET',
));
$accessToken = 'YOUR_ACCESS_TOKEN';
$instagram->setAccessToken($accessToken);
$search = $instagram->getPopularMedia();
$data = $search->data;
foreach($data as $d) {
if($d->type == 'image') {
$item = array(
'images' => $d->images,
'caption' => $d->caption,
'created_time' => $d->created_time,
'id' => $d->id,
'filename' => $id.'.jpg'
);
$redis->lPush('photo:queue', serialize($item));
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment