Last active
April 23, 2017 10:52
-
-
Save bohwaz/98335874874413392fab0c5de234a57a to your computer and use it in GitHub Desktop.
Download images from SLSA (State Library of South Australia) collections
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* Download large size images from SLSA Library | |
* (State Library of South Australia) | |
* Copyleft (C) 2015-2017 BohwaZ http://bohwaz.net/ | |
* GNU AGPL license | |
*/ | |
if (empty($argv[1])) | |
{ | |
die('Usage: ' . $arrgv[0] . ' http://collections.slsa.sa.gov.au/resource/B+61438' . PHP_EOL); | |
} | |
$url = $argv[1]; | |
// Normalize URL | |
$url = preg_replace('!/continue$!', '', $url); | |
$url = rtrim($url, '/'); | |
// Get the unique ID | |
$id = str_replace('http://collections.slsa.sa.gov.au/resource/', '', $url); | |
$id = preg_replace('/[^\w_]+/i', '_', $id); | |
// Skip if file has already been fetched | |
if (count(glob('SLSA_' . $id . '*')) > 0) | |
{ | |
die('Already found' . PHP_EOL); | |
} | |
// Fetch page title | |
$page = file_get_contents($url); | |
$title = ''; | |
if (preg_match('!<title>(.*?)(?:•.*?)?</title>!is', $page, $match)) | |
{ | |
$title = trim($match[1]); | |
} | |
// Fetch tiles index | |
$url .= '/tiles.json'; | |
$json = json_decode(file_get_contents($url)); | |
if (empty($json)) | |
{ | |
die('Invalid json' . PHP_EOL); | |
} | |
$tiles = []; | |
$w = $h = null; | |
// Look up for zoom level 0 | |
foreach ($json->levels as $level) | |
{ | |
if ($level->name == 'z0') | |
{ | |
$tiles = $level->tiles; | |
$w = $level->width; | |
$h = $level->height; | |
break; | |
} | |
} | |
if (empty($tiles)) | |
{ | |
die('No tiles found' . PHP_EOL); | |
} | |
// Download tiles | |
$max_x = 0; | |
$max_y = 0; | |
foreach ($tiles as $tile) | |
{ | |
$filename = sprintf('tile-y%02d-x%02d.jpg', $tile->y, $tile->x); | |
echo '.'; | |
if (!file_exists($filename)) | |
{ | |
copy($tile->url, $filename); | |
} | |
$max_x = max($max_x, $tile->x); | |
$max_y = max($max_y, $tile->y); | |
} | |
echo " $w x $h / $max_x, $max_y" . PHP_EOL; | |
// Create filename | |
$name = preg_replace('/[^\w_]+/i', '_', $id . '_' . $title); | |
$name = 'SLSA_' . $name . '.jpg'; | |
// Assemble tiles with imagemagick | |
$command = sprintf('montage tile*.jpg -quality 95 -tile %dx%d -geometry %dx%d+0+0 -mode Concatenate %s', $max_x+1, $max_y+1, 512, 512, escapeshellarg($name)); | |
echo $command; | |
shell_exec($command); | |
// Cleanup | |
shell_exec('rm -f tile-*.jpg'); | |
echo PHP_EOL; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment