Created
May 15, 2012 19:46
-
-
Save oranj/2704524 to your computer and use it in GitHub Desktop.
Apache Open Directory Fetch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/php | |
<?php | |
if (! isset($argv[1])) { | |
fputs(STDERR, "Please provide a URL to crawl\n"); | |
die(1); | |
} | |
if (! isset($argv[2])) { | |
fputs(STDERR, "Please provide an output filename\n"); | |
die(1); | |
} | |
$url = $argv[1]; | |
$target = $argv[2]; | |
if (! preg_match('/\.zip$/i', $target)) { | |
$target .= '.zip'; | |
} | |
if (file_exists($target)) { | |
fputs(STDERR, "The file $zip already exists\n"); | |
die(1); | |
} | |
if (! extension_loaded('zip')) { | |
fputs(STDERR, "Cannot access PHP zip library\n"); | |
die(1); | |
} | |
function apache_url_put_zip($base_url, $destination, $path = '', &$zip = NULL) { | |
if (is_null($zip)) { | |
$zip = new ZipArchive(); | |
if (! $zip->open($destination, ZIPARCHIVE::CREATE)) { | |
fputs(STDERR, "Could not create zip\n"); | |
die(1); | |
} | |
} | |
if (! $html = file_get_contents($base_url . $path)) { | |
fputs(STDERR, "Could not fetch URL {$base_url}{$path}"); | |
die(1); | |
} | |
if (! $zip) { | |
die(1); | |
} | |
if (preg_match_all('/<li><a href="(.*?)(\/?)">.*?<\/a><\/li>/i', $html, $matches)) { | |
foreach ($matches[1] as $key => $url) { | |
if (strstr($url, '..') !== false) { | |
continue; | |
} else if ($matches[2][$key] == '/'){ | |
$zip->addEmptyDir($path.$url.'/'); | |
apache_url_put_zip($base_url, $destination, $path.$url.'/', $zip); | |
} else { | |
$zip->addFromString($path.$url, file_get_contents($base_url.$path.$url)); | |
} | |
} | |
} | |
} | |
apache_url_put_zip($url, $target); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment