Skip to content

Instantly share code, notes, and snippets.

@DivinityArcane
Created October 7, 2014 23:20
Show Gist options
  • Save DivinityArcane/ad2cc3c2e1fa146be3dc to your computer and use it in GitHub Desktop.
Save DivinityArcane/ad2cc3c2e1fa146be3dc to your computer and use it in GitHub Desktop.
Simple scraper for MangaPanda.com
<?php
// MangaPanda.com Scraper
// Downloads manga to be read locally, and organizes the files.
/// !!! It goes without saying, but if you don't own a physical copy of the manga, downloading it is illegal!
/// @ Justin Eittreim <eittreim.justin@live.com>
$name = "ao-haru-ride"; // Name portion of the URL.
$prettyname = "Ao Haru Ride"; // Proper name, for the folder.
$first_chapter = 16; // First chapter to scrape.
$last_chapter = 44; // Last chapter to scrape.
/// Get the total number of pages for this chapter
function get_page_count($name, $chap) {
$url = "http://www.mangapanda.com/{$name}/{$chap}/1";
$page = file_get_contents($url) or FALSE;
if ($page !== FALSE && preg_match('%</select> of (\d+)</div>%', $page, $matches)) {
return intval($matches[1]);
} else {
return FALSE;
}
}
/// Get the page image url
function get_page($name, $chap, $page) {
$url = "http://www.mangapanda.com/{$name}/{$chap}/{$page}";
$page = file_get_contents($url) or FALSE;
if ($page !== FALSE && preg_match('%<img id="img" width="\d+" height="\d+" src="([^"]+)" alt="[^"]+" name="img" />%', $page, $matches)) {
return $matches[1];
} else {
return FALSE;
}
}
// Ensure we have our folders set up
if (!file_exists("./Manga/{$prettyname}/")) {
mkdir("./Manga/{$prettyname}/", $recursive=true);
}
// For every chapter from first_chapter to last_chapter, download each page
for ($chap = $first_chapter; $chap <= $last_chapter; $chap++) {
// Ensure we have a folder for this chapter
if (!file_exists("./Manga/{$prettyname}/Chapter {$chap}/")) {
mkdir("./Manga/{$prettyname}/Chapter {$chap}/");
}
// Get the page count, so we can iterate from 1 to N
$pages = get_page_count($name, $chap);
if ($pages === FALSE) {
die('FATAL: get_page_count returned FALSE for '.$name.' chapter '.$chap);
}
// Iterate over and download each page
for ($page = 1; $page <= $pages; $page++) {
$img = get_page($name, $chap, $page);
if ($img === FALSE) {
die('FATAL: get_page returned FALSE for '.$name.' chapter '.$chap.' page '.$page);
}
// Get the actual data for the page image
$data = file_get_contents($img) or FALSE;
if ($data === FALSE) {
die('FATAL: page image data is FALSE for '.$name.' chapter '.$chap.' page '.$page);
}
// Write the data to an appropriately named file
file_put_contents("./Manga/{$prettyname}/Chapter {$chap}/Page {$page}.jpg", $data);
echo("Saved chapter {$chap} page {$page}: {$img}\n");
}
}
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment