Created
October 7, 2014 23:20
-
-
Save DivinityArcane/ad2cc3c2e1fa146be3dc to your computer and use it in GitHub Desktop.
Simple scraper for MangaPanda.com
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
// MangaPanda.com Scraper | |
// Downloads manga to be read locally, and organizes the files. | |
/// !!! It goes without saying, but if you don't own a physical copy of the manga, downloading it is illegal! | |
/// @ Justin Eittreim <eittreim.justin@live.com> | |
$name = "ao-haru-ride"; // Name portion of the URL. | |
$prettyname = "Ao Haru Ride"; // Proper name, for the folder. | |
$first_chapter = 16; // First chapter to scrape. | |
$last_chapter = 44; // Last chapter to scrape. | |
/// Get the total number of pages for this chapter | |
function get_page_count($name, $chap) { | |
$url = "http://www.mangapanda.com/{$name}/{$chap}/1"; | |
$page = file_get_contents($url) or FALSE; | |
if ($page !== FALSE && preg_match('%</select> of (\d+)</div>%', $page, $matches)) { | |
return intval($matches[1]); | |
} else { | |
return FALSE; | |
} | |
} | |
/// Get the page image url | |
function get_page($name, $chap, $page) { | |
$url = "http://www.mangapanda.com/{$name}/{$chap}/{$page}"; | |
$page = file_get_contents($url) or FALSE; | |
if ($page !== FALSE && preg_match('%<img id="img" width="\d+" height="\d+" src="([^"]+)" alt="[^"]+" name="img" />%', $page, $matches)) { | |
return $matches[1]; | |
} else { | |
return FALSE; | |
} | |
} | |
// Ensure we have our folders set up | |
if (!file_exists("./Manga/{$prettyname}/")) { | |
mkdir("./Manga/{$prettyname}/", $recursive=true); | |
} | |
// For every chapter from first_chapter to last_chapter, download each page | |
for ($chap = $first_chapter; $chap <= $last_chapter; $chap++) { | |
// Ensure we have a folder for this chapter | |
if (!file_exists("./Manga/{$prettyname}/Chapter {$chap}/")) { | |
mkdir("./Manga/{$prettyname}/Chapter {$chap}/"); | |
} | |
// Get the page count, so we can iterate from 1 to N | |
$pages = get_page_count($name, $chap); | |
if ($pages === FALSE) { | |
die('FATAL: get_page_count returned FALSE for '.$name.' chapter '.$chap); | |
} | |
// Iterate over and download each page | |
for ($page = 1; $page <= $pages; $page++) { | |
$img = get_page($name, $chap, $page); | |
if ($img === FALSE) { | |
die('FATAL: get_page returned FALSE for '.$name.' chapter '.$chap.' page '.$page); | |
} | |
// Get the actual data for the page image | |
$data = file_get_contents($img) or FALSE; | |
if ($data === FALSE) { | |
die('FATAL: page image data is FALSE for '.$name.' chapter '.$chap.' page '.$page); | |
} | |
// Write the data to an appropriately named file | |
file_put_contents("./Manga/{$prettyname}/Chapter {$chap}/Page {$page}.jpg", $data); | |
echo("Saved chapter {$chap} page {$page}: {$img}\n"); | |
} | |
} | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment