Skip to content

Instantly share code, notes, and snippets.

@yoya
Last active February 17, 2017 04:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save yoya/de609ca73d094fc609da6a8c853338f4 to your computer and use it in GitHub Desktop.
Save yoya/de609ca73d094fc609da6a8c853338f4 to your computer and use it in GitHub Desktop.
Metropolitan Museum of Art Public Images Downloader
<?php
/*
* Metropolitan Museum of Art Public Images Downloader
* (c) 2017/02/17(Fri) yoya@awm.jp
* Usage) php getMetImages.php MetObjects.csv
*/
$file = new SplFileObject($argv[1]);
$file->setFlags(SplFileObject::READ_CSV | SplFileObject::READ_AHEAD | SplFileObject::SKIP_EMPTY | SplFileObject::DROP_NEW_LINE);
$idx = 0;
foreach ($file as $record) {
if (is_null($record[0])) {
continue;
}
foreach ($record as $i => $value) {
if (substr($value, 0, 3) === "\xef\xbb\xbf") { // <U+FEFF>
$value = substr($value, 3);
}
$record[$i] = trim($value);
}
if ($idx === 0) {
$keys = $record;
} else {
echo "$idx".PHP_EOL;
$new_record = [];
foreach ($record as $i => $value) {
$new_record[$keys[$i]] = $value;
}
$record = $new_record;
/*
* main routine
*/
$isPublicDomain = $record["Is Public Domain"];
if ($isPublicDomain !== "False") {
if ($isPublicDomain !== "True") {
$errMesg = "Unknown Is Public Domain flag:".$isPublicDomain;
trigger_error($errMesg, E_USER_ERROR);
}
$linkResource = $record["Link Resource"];
echo $linkResource.PHP_EOL;
$html = file_get_contents($linkResource);
file_put_contents("$idx.html", $html);
if (preg_match("(http://.+/original/.*\.jpg)", $html, $matches) ||
preg_match("(http://.+/original/.*\.png)", $html, $matches)) {
$originalUrl = $matches[0];
echo $originalUrl.PHP_EOL;
`wget $originalUrl`;
}
}
}
$idx ++;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment