Skip to content

Instantly share code, notes, and snippets.

@craiga
Last active November 18, 2015 23:30
Show Gist options
  • Save craiga/2723217 to your computer and use it in GitHub Desktop.
Save craiga/2723217 to your computer and use it in GitHub Desktop.
Get a URL.
<?php
require_once("GetUrlHttpErrorException.php");
include_once(dirname(__FILE__) . "/../removeOldFiles/removeOldFiles.php");
/**
* Get a URL.
*
* Get a URL, optionally caching the response. Will remove old cached files if {@link https://gist.github.com/craiga/3161529 removeOldFiles} is present.
*
* @param $url The URL to get.
* @param $method The HTTP method to use. GET by default.
* @param $data Data to send as part of the request. Will be converted to a query string if method is GET.
* @param $additionalCurlOptions Additional options to be set in cURL. Will override anything set internally in this function.
* @param $tempMaxAge Maximum allowed age for temporary items. Zero by default, which disables the cache.
* @param $tempFreeSpace Amount of free space to ensure remains when writing temporary files.
* @param $tempDir Where temporary cache and cookie files are written. Will attempt to create this directory if it doesn't exist. Inside sys_get_temp_dir() by default.
* @param $tempPermissions Permissions to create temporary directories and cache files with. 0700 (only available to the creating user) by default.
* @param $responseHeaders If provided, this array will be populated with the response headers.
*
* @author Craig Anderson <craiga@craiga.id.au>
* @link https://gist.github.com/craiga/2723217
*/
function getUrl($url, $method = "GET", $data = array(), $additionalCurlOptions = array(), $tempMaxAge = 0, $tempFreeSpace = 0, $tempDir = null, $tempPermissions = 0700, &$responseHeaders = null)
{
$rawResponse = null;
$cacheFile = null;
if (is_null($tempDir)) {
$tempDir = realpath(sys_get_temp_dir()) . DIRECTORY_SEPARATOR . "getUrlCache";
}
if (!is_dir($tempDir)) {
if (!mkdir($tempDir, $tempPermissions, true)) {
throw new RuntimeException("Couldn't create temporary directory for cache and cookie files");
}
// Permissions set by mkdir are modified by umask; we need to explicitly set them with chmod to ensure they take effect.
if (!chmod($tempDir, $tempPermissions)) {
throw new RuntimeException("Couldn't change permissions on newly created temporary directory");
}
}
if (!is_readable($tempDir)) {
throw new RuntimeException("Temporary directory for cache and cookie files is not readable");
}
if (!is_writeable($tempDir)) {
throw new RuntimeException("Temporary directory for cache and cookie files is not writeable");
}
if (($method == "GET" || $method == "HEAD") && count($data) > 0 && strpos($url, "?") === false) {
$url = $url . "?" . http_build_query($data);
}
if ($method == "GET") { // only GET requests can be cached
if ($tempMaxAge > 0) {
$cacheFile = realpath($tempDir) . DIRECTORY_SEPARATOR . md5($method . $url);
if (file_exists($cacheFile)) {
$age = time() - filemtime($cacheFile);
if ($age < $tempMaxAge) {
$rawResponse = file_get_contents($cacheFile);
}
}
}
}
if (!is_null($tempDir) && function_exists("removeOldFiles")) {
removeOldFiles($tempDir, true, $tempMaxAge, $tempFreeSpace, true);
}
if (is_null($rawResponse)) {
try {
$curl = curl_init();
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curl, CURLOPT_URL, $url);
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true);
$cookieJar = $tempDir . DIRECTORY_SEPARATOR . getmypid() . ".cookiejar"; // TODO: Test this changes between Apache requests.
curl_setopt($curl, CURLOPT_COOKIEFILE, $cookieJar);
curl_setopt($curl, CURLOPT_COOKIEJAR, $cookieJar);
curl_setopt($curl, CURLOPT_HEADER, true);
if (getenv("http_proxy")) {
$proxy = getenv("http_proxy");
$proxyHost = parse_url($proxy, PHP_URL_HOST);
$proxyPort = parse_url($proxy, PHP_URL_PORT);
if ($proxyHost && $proxyPort && preg_match("/^\d+$/", $proxyPort)) {
curl_setopt($curl, CURLOPT_PROXY, sprintf("%s:%d", $proxyHost, $proxyPort));
} else {
if (!trigger_error(sprintf("http_proxy invalid; \"%s\" should contain a host name and port number", $proxy), E_USER_WARNING)) {
throw new RuntimeException("Couldn't trigger warning");
}
}
}
switch (strtoupper($method)) {
case "POST":
curl_setopt($curl, CURLOPT_POST, true);
curl_setopt($curl, CURLOPT_POSTFIELDS, $data);
break;
case "GET":
curl_setopt($curl, CURLOPT_HTTPGET, true);
curl_setopt($curl, CURLOPT_URL, $url);
break;
case "HEAD":
curl_setopt($curl, CURLOPT_NOBODY, true);
curl_setopt($curl, CURLOPT_URL, $url);
break;
default:
// TODO: Support methods other than POST, GET and HEAD.
throw new InvalidArgumentException("Unsupported HTTP method; only POST, GET and HEAD methods are supported.");
}
curl_setopt_array($curl, $additionalCurlOptions);
$rawResponse = curl_exec($curl);
if ($rawResponse === false) {
throw new RuntimeException(sprintf("cURL Error %d: %s", curl_errno($curl), curl_error($curl)));
}
// We do this instead of setting CURLOPT_FAILONERROR so you have an
// opportunity to examine the response.
$responseStatus = curl_getinfo($curl, CURLINFO_HTTP_CODE);
if ($responseStatus >= 400) {
throw new GetUrlHttpErrorException($responseStatus, $curl, $rawResponse);
}
curl_close($curl);
} catch(Exception $e) {
@curl_close($curl);
throw $e;
}
if (!is_null($cacheFile)) {
if (!@file_put_contents($cacheFile, $rawResponse)) {
throw new RuntimeException("Couldn't write cache entry.");
}
if (!chmod($cacheFile, $tempPermissions)) {
throw new RuntimeException("Couldn't change permissions of cache entry");
}
}
}
$rawResponseParts = explode("\r\n\r\n", $rawResponse);
$rawHeaderParts = array();
$responseParts = array();
foreach ($rawResponseParts as $rawResponsePart) {
if (preg_match("/^HTTP/", $rawResponsePart) && count($responseParts) == 0) {
$rawHeaderParts[] = $rawResponsePart;
} else {
$responseParts[] = $rawResponsePart;
}
}
$rawHeaders = implode("\r\n\r\n", $rawHeaderParts);
$response = implode("\r\n\r\n", $responseParts);
if (is_array($responseHeaders)) {
$responseHeaders = explode("\r\n", $rawHeaders);
// If response headers include an Expires header, set it to the time our cache expires.
if (!is_null($cacheFile) && file_exists($cacheFile)) {
foreach ($responseHeaders as $key => $responseHeader) {
if (preg_match("/^Expires:\s+(.*)$/", $responseHeader, $matches)) {
$theirExpires = strtotime($matches[1]);
$ourExpires = filemtime($cacheFile) + $tempMaxAge;
if ($ourExpires > $theirExpires) {
$responseHeaders[$key] = "Expires: " . date("r", $ourExpires);
}
}
}
}
}
return $response;
}
<?php
class GetUrlHttpErrorException extends RuntimeException
{
protected $_httpStatusCode = null;
protected $_curlInfo = array();
protected $_rawResponse = null;
protected $_responseBody = null;
protected $_responseHeaders = null;
public function __construct($httpStatusCode, $curl = null, $rawResponse = null, $message = "", $code = 0, Exception $previous = NULL)
{
$this->_httpStatusCode = $httpStatusCode;
if (!is_null($curl)) {
$this->_curlInfo = curl_getinfo($curl);
}
$this->_rawResponse = $rawResponse;
if ($message == "") {
$message = sprintf("Server responded with an HTTP %d error", $httpStatusCode);
if ($this->getContentType() == "application/xml" && $this->getResponseHeader("Server") == "AmazonS3" && $this->getResponseBody() != "") {
$dom = new DOMDocument();
$dom->loadXML($this->getResponseBody());
$xpath = new DOMXPath($dom);
$nodes = $xpath->query("/Error/Message");
if($nodes->length == 1) {
$message = sprintf("Amazon S3 responded with an HTTP %d error (%s)", $httpStatusCode, $nodes->item(0)->nodeValue);
}
}
}
parent::__construct($message, $code, $previous);
}
public function getHttpStatusCode()
{
return $this->_httpStatusCode;
}
public function getRawResponse()
{
return $this->_rawResponse;
}
public function getResponseBody()
{
$this->_parseRawResponse();
return $this->_responseBody;
}
public function getResponseHeaders()
{
$this->_parseRawResponse();
return $this->_responseHeaders;
}
public function getResponseHeadersAsArray()
{
$headerLines = explode("\r\n", $this->getResponseHeaders());
$headersAsArray = array();
foreach ($headerLines as $headerLine) {
if (preg_match("/^(.*):\s*(.*)$/", $headerLine, $matches)) {
$headersAsArray[$matches[1]] = $matches[2];
}
}
return $headersAsArray;
}
public function getResponseHeader($key)
{
$header = null;
$headers = $this->getResponseHeadersAsArray();
if (array_key_exists($key, $headers)) {
$header = $headers[$key];
}
return $header;
}
protected function _parseRawResponse()
{
$rawResponseParts = explode("\r\n\r\n", $this->_rawResponse);
$rawHeaderParts = array();
$responseParts = array();
foreach ($rawResponseParts as $rawResponsePart) {
if (preg_match("/^HTTP/", $rawResponsePart) && count($responseParts) == 0) {
$rawHeaderParts[] = $rawResponsePart;
} else {
$responseParts[] = $rawResponsePart;
}
}
$this->_responseHeaders = implode("\r\n\r\n", $rawHeaderParts);
$this->_responseBody = implode("\r\n\r\n", $responseParts);
}
public function __call($name, $args)
{
if (preg_match("/^get(.*)$/", $name, $matches)) {
$camelCaseProperty = $matches[1];
$property = strtolower(preg_replace("/(\w)([A-Z])/", "\\1_\\2", $camelCaseProperty));
if (isset($this->_curlInfo[$property])) {
return $this->_curlInfo[$property];
}
return null;
}
throw new BadMethodCallException();
}
}
<?php
require("getUrl.php");
include("../removeOldFiles/removeOldFiles.php");
getUrl(
"http://craiga.id.au/",
"GET",
array(), array(
CURLOPT_VERBOSE => true
),
60,
0.5
);
try {
getUrl("https://s3-ap-southeast-2.amazonaws.com/craigs-junk-bucket/file-for-getUrl-testing");
} catch (GetUrlHttpErrorException $e) {
var_dump($e->getMessage());
var_dump($e->getContentType());
var_dump($e->getUrl());
var_dump($e->getRawResponse());
var_dump($e->getResponseBody());
var_dump($e->getResponseHeaders());
var_dump($e->getResponseHeader("Server"));
try {
$e->thing();
} catch (BadMethodCallException $e) {
// expected
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment