Skip to content

Instantly share code, notes, and snippets.

@oranj
Created May 15, 2012 19:46
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save oranj/2704524 to your computer and use it in GitHub Desktop.
Save oranj/2704524 to your computer and use it in GitHub Desktop.
Apache Open Directory Fetch
#!/usr/bin/php
<?php
if (! isset($argv[1])) {
fputs(STDERR, "Please provide a URL to crawl\n");
die(1);
}
if (! isset($argv[2])) {
fputs(STDERR, "Please provide an output filename\n");
die(1);
}
$url = $argv[1];
$target = $argv[2];
if (! preg_match('/\.zip$/i', $target)) {
$target .= '.zip';
}
if (file_exists($target)) {
fputs(STDERR, "The file $zip already exists\n");
die(1);
}
if (! extension_loaded('zip')) {
fputs(STDERR, "Cannot access PHP zip library\n");
die(1);
}
function apache_url_put_zip($base_url, $destination, $path = '', &$zip = NULL) {
if (is_null($zip)) {
$zip = new ZipArchive();
if (! $zip->open($destination, ZIPARCHIVE::CREATE)) {
fputs(STDERR, "Could not create zip\n");
die(1);
}
}
if (! $html = file_get_contents($base_url . $path)) {
fputs(STDERR, "Could not fetch URL {$base_url}{$path}");
die(1);
}
if (! $zip) {
die(1);
}
if (preg_match_all('/<li><a href="(.*?)(\/?)">.*?<\/a><\/li>/i', $html, $matches)) {
foreach ($matches[1] as $key => $url) {
if (strstr($url, '..') !== false) {
continue;
} else if ($matches[2][$key] == '/'){
$zip->addEmptyDir($path.$url.'/');
apache_url_put_zip($base_url, $destination, $path.$url.'/', $zip);
} else {
$zip->addFromString($path.$url, file_get_contents($base_url.$path.$url));
}
}
}
}
apache_url_put_zip($url, $target);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment