mattattui/gist:879259

## gistfile1.php
<?php
// See https://lazycat.org/php-curl.html for license & known issues
// P.S. You'd better have a very good reason for using this instead of http://guzzlephp.org/

function httpGet($url, $ttl = 86400)
{
  /* Change this or make it an option as appropriate. If you're
   * getting urls that shouldn't be visible to the public, put the
   * cache folder somewhere it can't be accessed from the web
   */
  $cache_path = dirname(__FILE__).'/cache';


  /* Check the cache first - setting ttl to 0 overrides
   * the check. I'm using crc32() to make URLs safe here; if you're
   * fetching millions of URLs, it might not be different enough to
   * avoid clashes. If you get collisions, use md5() or something,
   * and change the sprintf() pattern.
   */
  $cache_file   = sprintf('%s/%08X.dat', $cache_path, crc32($url));
  $cache_exists = is_readable($cache_file);

  /* If the cache is newer than the Time To Live, return it
   * instead of doing a new request. The default TTL is 1 day.
   */
  if ($ttl && $cache_exists &&
      (filemtime($cache_file) > (time() - $ttl))
     )
  {
    return file_get_contents($cache_file);
  }

  /* Need to regenerate the cache. First thing to do here is update
   * the modification time on the cache file so that no one else
   * tries to update the cache while we're updating it. But first
   * keep the original time for making a conditional request later.
   */
  $modified_time = filemtime($cache_file);
  touch($cache_file);
  clearstatcache();


  /* Set up the cURL pointer. It's important to set a User-Agent
   * that's unique to you, and provides contact details in case your
   * script is misbehaving and a server owner needs to contact you.
   * More than that, it's just the polite thing to do.
   */
  $c = curl_init();
  curl_setopt($c, CURLOPT_URL, $url);
  curl_setopt($c, CURLOPT_TIMEOUT, 15);
  curl_setopt($c, CURLOPT_RETURNTRANSFER, true);
  curl_setopt($c, CURLOPT_USERAGENT,
    'ExampleFetcher/0.9  (http://example.com/; bob@example.com)');


  /* If we've got a cache, do the web a favour and make a
   * conditional HTTP request. What this means is that if the
   * server supports it, it will tell us if nothing has changed -
   * this means we can reuse the cache for a while, and the
   * request is returned faster.
   */
  if ($cache_exists) {
    curl_setopt($c, CURLOPT_TIMECONDITION, CURL_TIMECOND_IFMODSINCE);
    curl_setopt($c, CURLOPT_TIMEVALUE, $modified_time);
  }


  /* Make the request and check the result. */
  $content = curl_exec($c);
  $status = curl_getinfo($c, CURLINFO_HTTP_CODE);

  // Document unmodified? Return the cache file
  if ($cache_exists && ($status == 304)) {
    return file_get_contents($cache_file);
  }

  /* You could be more forgiving of errors here. I've chosen to
   * fail hard instead, because at least it'll be obvious when
   * something goes wrong.
   */
  if ($status != 200) {
    throw new Exception(sprintf('Unexpected HTTP return code %d', $status));
  }


  /* If everything is fine, save the new cache file, make sure
   * it's world-readable, and writeable by the server
   */
  file_put_contents($cache_file, $content);
  chmod($cache_file, 0644);
  return $content;
}
?>
	<?php
	// See https://lazycat.org/php-curl.html for license & known issues
	// P.S. You'd better have a very good reason for using this instead of http://guzzlephp.org/

	function httpGet($url, $ttl = 86400)
	{
	/* Change this or make it an option as appropriate. If you're
	* getting urls that shouldn't be visible to the public, put the
	* cache folder somewhere it can't be accessed from the web
	*/
	$cache_path = dirname(__FILE__).'/cache';


	/* Check the cache first - setting ttl to 0 overrides
	* the check. I'm using crc32() to make URLs safe here; if you're
	* fetching millions of URLs, it might not be different enough to
	* avoid clashes. If you get collisions, use md5() or something,
	* and change the sprintf() pattern.
	*/
	$cache_file = sprintf('%s/%08X.dat', $cache_path, crc32($url));
	$cache_exists = is_readable($cache_file);

	/* If the cache is newer than the Time To Live, return it
	* instead of doing a new request. The default TTL is 1 day.
	*/
	if ($ttl && $cache_exists &&
	(filemtime($cache_file) > (time() - $ttl))
	)
	{
	return file_get_contents($cache_file);
	}

	/* Need to regenerate the cache. First thing to do here is update
	* the modification time on the cache file so that no one else
	* tries to update the cache while we're updating it. But first
	* keep the original time for making a conditional request later.
	*/
	$modified_time = filemtime($cache_file);
	touch($cache_file);
	clearstatcache();


	/* Set up the cURL pointer. It's important to set a User-Agent
	* that's unique to you, and provides contact details in case your
	* script is misbehaving and a server owner needs to contact you.
	* More than that, it's just the polite thing to do.
	*/
	$c = curl_init();
	curl_setopt($c, CURLOPT_URL, $url);
	curl_setopt($c, CURLOPT_TIMEOUT, 15);
	curl_setopt($c, CURLOPT_RETURNTRANSFER, true);
	curl_setopt($c, CURLOPT_USERAGENT,
	'ExampleFetcher/0.9 (http://example.com/; bob@example.com)');


	/* If we've got a cache, do the web a favour and make a
	* conditional HTTP request. What this means is that if the
	* server supports it, it will tell us if nothing has changed -
	* this means we can reuse the cache for a while, and the
	* request is returned faster.
	*/
	if ($cache_exists) {
	curl_setopt($c, CURLOPT_TIMECONDITION, CURL_TIMECOND_IFMODSINCE);
	curl_setopt($c, CURLOPT_TIMEVALUE, $modified_time);
	}


	/* Make the request and check the result. */
	$content = curl_exec($c);
	$status = curl_getinfo($c, CURLINFO_HTTP_CODE);

	// Document unmodified? Return the cache file
	if ($cache_exists && ($status == 304)) {
	return file_get_contents($cache_file);
	}

	/* You could be more forgiving of errors here. I've chosen to
	* fail hard instead, because at least it'll be obvious when
	* something goes wrong.
	*/
	if ($status != 200) {
	throw new Exception(sprintf('Unexpected HTTP return code %d', $status));
	}


	/* If everything is fine, save the new cache file, make sure
	* it's world-readable, and writeable by the server
	*/
	file_put_contents($cache_file, $content);
	chmod($cache_file, 0644);
	return $content;
	}
	?>