Created
November 4, 2021 23:55
-
-
Save p182/1d722776620da229696804ea49f2ffdd to your computer and use it in GitHub Desktop.
Copy snippet
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/*Below a code snippet for downloading a file from a web server to a local file. | |
It demonstrates useful customizations of the request (such as setting a User-Agent and Referrer, often required by web sites), and how to download only files if the copy on the web site is newer than the local copy. | |
It further demonstrates the processing of response headers (if set by server) to determine the timestamp and file name. The file type is checked because some servers return a 200 OK return code with a textual "not found" page, instead of a proper 404 return code. | |
*/ | |
// $fURI: URL to a file located on a web server | |
// $target_file: Path to a local file | |
if ( file_exists( $target_file ) ) { | |
$ifmodhdr = 'If-Modified-Since: '.date( "r", filemtime( $target_file ) )."\r\n"; | |
} | |
else { | |
$ifmodhdr = ''; | |
} | |
// set request header for GET with referrer for modified files, that follows redirects | |
$arrRequestHeaders = array( | |
'http'=>array( | |
'method' =>'GET', | |
'protocol_version' =>1.1, | |
'follow_location' =>1, | |
'header'=> "User-Agent: Anamera-Feed/1.0\r\n" . | |
"Referer: $source\r\n" . | |
$ifmodhdr | |
) | |
); | |
$rc = copy( $fURI, $target_file, stream_context_create($arrRequestHeaders) ); | |
// HTTP request completed, preserve system error, if any | |
if( $rc ) { | |
if ( fclose( $rc ) ) { | |
unset( $err ); | |
} | |
else { | |
$err = error_get_last(); | |
} | |
} | |
else { | |
$err = error_get_last(); | |
} | |
// Parse HTTP Response Headers for HTTP Status, as well filename, type, date information | |
// Need to start from rear, to get last set of headers after possible sets of redirection headers | |
if ( $http_response_header ) { | |
for ( $i = sizeof($http_response_header) - 1; $i >= 0; $i-- ) { | |
if ( preg_match('@^http/\S+ (\S{3,}) (.+)$@i', $http_response_header[$i], $http_status) > 0 ) { | |
// HTTP Status header means we have reached beginning of response headers for last request | |
break; | |
} | |
elseif ( preg_match('@^(\S+):\s*(.+)\s*$@', $http_response_header[$i], $arrHeader) > 0 ) { | |
switch ( $arrHeader[1] ) { | |
case 'Last-Modified': | |
if ( !isset($http_content_modtime) ) { | |
$http_content_modtime = strtotime( $arrHeader[2] ); | |
} | |
break; | |
case 'Content-Type': | |
if ( !isset($http_content_image_type) ) { | |
if ( preg_match('@^image/(\w+)@ims', $arrHeader[2], $arrTokens) > 0 ) { | |
if ( in_array(strtolower($arrTokens[1]), $arrValidTypes)) { | |
$http_content_image_type = $arrTokens[1]; | |
break; | |
} | |
} | |
throw new Exception( "Error accessing file $fURI; invalid content type: $arrHeader[2]", 2); | |
} | |
break; | |
case 'Content-Disposition': | |
if ( !isset($http_content_filename) && preg_match('@filename\\s*=\\s*(?|"([^"]+)"|([\\S]+));?@ims', $arrHeader[2], $arrTokens) > 0 ) { | |
$http_content_filename = basename($arrTokens[1]); | |
} | |
break; | |
} | |
} | |
} | |
} | |
if ( $http_status ) { | |
// Make sure we have good HTTP Status | |
switch ( $http_status[1] ) { | |
case '200': | |
// SUCCESS: HTTP Status is "200 OK" | |
break; | |
case '304': | |
throw new Exception( "Remote file not newer: $fURI", $http_status[1] ); | |
break; | |
case '404': | |
throw new Exception( "Remote file not found: $fURI", $http_status[1] ); | |
break; | |
default: | |
throw new Exception( "HTTP Error, $http_status[2], accessing $fURI", $http_status[1] ); | |
break; | |
} | |
} | |
elseif ( $err ) { | |
// Protocol / Communication error | |
throw new Exception( $err['message']/*."; Remote file: $fURI"*/, $err['type'] ); | |
} | |
else { | |
// No HTTP status and no error | |
throw new customException( "Unknown HTTP response accessing $fURI: $http_response_header[0]", -1 ); | |
} | |
/* | |
Notes: | |
1. Currently copy() does NOT appropriately handle the 304 response code. Instead of NOT performing a copy (possibly setting the RC), it will overwrite the target file with an zero length file. | |
2. There may be a problem accessing a list of remote files when HTTP 1.1 protocol is used. If you experience time-out errors, try the default 1.0 protocol version. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment