Skip to content

Instantly share code, notes, and snippets.

@jongacnik
Created December 6, 2016 05:13
Show Gist options
  • Save jongacnik/1dae2c9852b12e2b9a029fba4caab6ad to your computer and use it in GitHub Desktop.
Save jongacnik/1dae2c9852b12e2b9a029fba4caab6ad to your computer and use it in GitHub Desktop.
Tumblr image scrape
<?php
$offset = 0;
$count = 50;
$domain = $argv[1];
$tagged = isset($argv[2]) ? $argv[2] : false;
$api = 'http://' . $domain . '.tumblr.com/api/read/json?debug=1&num=' . $count . ($tagged ? '&tagged=' . $tagged : '');
function scrape ($api, $count, $offset) {
$endpoint = $api . '&start=' . ($count * $offset);
$json = json_decode(file_get_contents($endpoint));
if (count($json->posts)) {
foreach($json->posts as $post) {
$post = (array) $post;
if (isset($post['photos']) {
foreach ($post['photos'] as $image) {
$image = (array) $image;
$url = $image['photo-url-1280'];
$filename = basename($url);
$img = file_get_contents($url); // get the image from the url
file_put_contents($filename, $img); // create a file
}
}
}
scrape($api, $count, $offset + 1);
}
}
scrape($api, $count, $offset);
# from command line, will scrape tumblr-name.tumblr.com for all images:
$ php scrape.php tumblr-name
# optionally pass a tag to filter by:
$ php scrape.php tumblr-name tag
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment