Skip to content

Instantly share code, notes, and snippets.

@hlfcoding
Last active December 11, 2015 18:28
Show Gist options
  • Save hlfcoding/4641715 to your computer and use it in GitHub Desktop.
Save hlfcoding/4641715 to your computer and use it in GitHub Desktop.
Flickr photoset downloader. Simple and stupid.
<?php
# Config.
/*
Requires:
- Array of photo-ids, you can get them via the included js. Just run it in the console a bunch of times on the photo-set page. Crude but works.
- Public subdirectory from this script's location named 'photos'.
- The url pattern for the original-photo pages.
Sample urls:
http://www.flickr.com/photos/themuseumofmodernart/4479621448/sizes/o - Pages.
http://farm3.staticflickr.com/2779/4479621448_nnnnnnnnnn_o.jpg - Images.
http://farm5.staticflickr.com/4060/4479434490_nnnnnnnnnn_o.jpg
*/
$file = 'flickr-photo-ids.php';
$page_url_format = 'http://www.flickr.com/photos/themuseumofmodernart/%d/sizes/o';
$image_url_pattern = '/<img src="(.*\.jpg)">/';
$image_dir = __DIR__.'/photos';
# Script.
function d_log($str, $is_section=false){
if ($is_section) echo PHP_EOL;
echo $str.PHP_EOL;
if ($is_section) echo PHP_EOL;
return;
}
function d_log_thru(){
call_user_func_array('d_log', func_get_args());
return func_get_arg(0);
}
function make_handle($url){
$handle = curl_init();
curl_setopt_array($handle, array(
CURLOPT_URL => $url,
CURLOPT_RETURNTRANSFER => 1,
CURLOPT_CONNECTTIMEOUT => 10,
CURLOPT_USERAGENT => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2) AppleWebKit/537.22 (KHTML, like Gecko) Chrome/25.0.1364.36 Safari/537.22',
));
return $handle;
}
function get_html($url){
$html = curl_exec(make_handle($url));
return $html;
}
function get_image($html, $pattern){
if ($html === false) return d_log('No html!');
$matches = array();
if (preg_match($pattern, $html, $matches) === 0) return false;
return $matches[1];
}
function save_image($url, $name){
if (!isset($url)) return;
file_put_contents(d_log_thru("$image_dir/$name.jpg"), curl_exec(make_handle($url)));
}
include $file;
if (!isset($ids)) {
d_log("No photo ids; '$file' not found!", true);
die;
}
d_log('Starting in '.__DIR__.'...', true);
$total = count($ids);
$chunks = array_chunk($ids, 10);
$from_chunk = 1;
$until_chunk = $total + 1;
$image_ordl = 0;
foreach ($chunks as $i => $ids) {
$chunk_ordl = $i + 1;
if ($chunk_ordl >= $until_chunk || $chunk_ordl < $from_chunk) continue;
foreach ($ids as $j => $id) {
$image_ordl++;
save_image(
get_image(
get_html(sprintf($page_url_format, $id))
, $image_url_pattern)
, "{$image_ordl}_{$id}");
}
d_log('Taking a break...', true);
sleep(5);
}
d_log('Done!', true);
var d = document, A = Array.prototype;
setTimeout(function(){ d.querySelector('[data-track=next]').click(); }, 10000); // Click next for us.
A.slice.call(d.querySelectorAll('[data-photo-id]')).map(function(el){ return el.getAttribute('data-photo-id'); }); // Copy the output from the console.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment