Skip to content

Instantly share code, notes, and snippets.

@phannam1412
Last active February 12, 2017 14:49
Show Gist options
  • Save phannam1412/841db22a2c846d21cd0eda831be0a69d to your computer and use it in GitHub Desktop.
Save phannam1412/841db22a2c846d21cd0eda831be0a69d to your computer and use it in GitHub Desktop.
download manga
<?php
include 'vendor/autoload.php';
use Dompdf\Dompdf;
$DOMAIN = 'http://manga-scan.com';
function printPdf($manga_name) {
date_default_timezone_set('Asia/Ho_Chi_Minh');
$html = '';
$pdf_dir = 'download/' . $manga_name . '/pdf';
createDir($pdf_dir);
$start = 0;
$total = 0;
// Scan each folder for images. Each folder is corresponding to a chapter.
for($a = 0; $a<10000;$a++) {
$dir = 'download/' . $manga_name . '/' . sprintf("%04.0f",$a);
// The folder of this chapter doesn't exist ?
// It is because we have already processed the last chapter.
if(!file_exists($dir)) break;
$names = scandir($dir);
// All images of this chapter will be added to html for printing.
foreach($names as $name) {
$file = $dir . '/' . $name;
if(!file_exists($file) || !is_file($file)) continue;
$html .= '<img src="'.$file.'">';
}
$total += count($names);
// Each pdf file must not contain too much images, 200 pages are good enough.
if($total > 200) {
$pdf_file = $pdf_dir . '/' . $manga_name . '_chap_'. $start .'-' . $a . '.pdf';
// Does this pdf file already exist ? Skip it.
if(file_exists($pdf_file)) {
$html = '';
$start = $a + 1;
continue;
}
print 'Saving pdf ' . $pdf_file . PHP_EOL;
// instantiate and use the dompdf class
$dompdf = new Dompdf();
$dompdf->loadHtml($html);
// Render the HTML as PDF
$dompdf->render();
// Output the generated PDF to Browser
file_put_contents($pdf_file,$dompdf->output());
$html = '';
$start = $a + 1;
$total = 0;
}
}
if(!empty($html)) {
$pdf_file = $pdf_dir . '/' . $manga_name . '_chap_'. $start .'-' . ($a - 1) . '.pdf';
if(file_exists($pdf_file)) return;
print 'Saving pdf ' . $pdf_file . PHP_EOL;
// instantiate and use the dompdf class
$dompdf = new Dompdf();
$dompdf->loadHtml($html);
// Render the HTML as PDF
$dompdf->render();
// Output the generated PDF to Browser
file_put_contents($pdf_file,$dompdf->output());
}
}
function createDir($dir) {
if(!file_exists($dir)) {
$result = mkdir($dir);
if($result == false) {
throw new Exception("Cannot create '$dir' directory");
}
$result = chmod($dir,0777);
if($result == false) {
throw new Exception("Cannot set mod 777 for '$dir' directory");
}
}
}
function run() {
global $argv;
global $DOMAIN;
if(count($argv) < 2) {
print 'Please specify link for download' . PHP_EOL;
return;
}
$DOWNLOAD_DIR = 'download';
createDir($DOWNLOAD_DIR);
$link = $argv[1];
$info = pathinfo($link);
$manga_name = $info['filename'];
$save_to = $DOWNLOAD_DIR . '/' . $manga_name;
createDir($save_to);
print 'Retrieving all chapter links...' . PHP_EOL;
$qp = html5qp($link);
$chap_nodes = $qp->find('.divContenuCentre .floatLeft.cacheOverflow a');
$chap_links = '';
foreach($chap_nodes as $chap_node)
$chap_links[] = $DOMAIN . $chap_node->attr('href');
foreach($chap_links as $index => $chap_link)
downloadChap($index,$chap_link,$save_to);
printPdf($manga_name);
}
function downloadChap($chap_index,$chap_link,$save_to) {
$dir = $save_to . '/' . sprintf("%04.0f",$chap_index);
createDir($dir);
print 'Retrieving number of pages of chapter '.($chap_index + 1).'...' . PHP_EOL;
$qp = htmlqp($chap_link);
$total_page = intval($qp->find('#divFormNbImages')->text());
print "Ready to download $total_page pages" . PHP_EOL;
for($a=0;$a<$total_page;$a++) {
$saved_img = $dir . '/' . sprintf("%04.0f",$a);
if(file_exists($saved_img . '.png') || file_exists($saved_img . '.jpg') || file_exists($saved_img . '.jpeg')) {
continue;
}
$link = $chap_link . '?page=' . ($a + 1);
print 'Accessing web page ' . $link . PHP_EOL;
$qp = html5qp($link);
$src = $qp->find('#divLectureContenu img')->attr('src');
$info = pathinfo($src);
$file_path = $saved_img . '.' . $info['extension'];
if(file_exists($file_path)) continue;
if(empty($info['extension'])) continue;
$src = 'http://manga-scan.com' . $src;
print 'Downloading image...' . PHP_EOL;
$img = file_get_contents(str_replace(' ','%20',$src));
file_put_contents($file_path,$img);
}
}
run();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment