Skip to content

Instantly share code, notes, and snippets.

@nickdunn
Created June 30, 2012 14:52
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nickdunn/23df4e36932c3ef79ec6 to your computer and use it in GitHub Desktop.
Save nickdunn/23df4e36932c3ef79ec6 to your computer and use it in GitHub Desktop.
<?php
class Archers_GenericFeedModel {
protected static $_httpClient = null;
protected static $_config = array();
protected static $_cache = null;
public function __construct(Zend_Cache_Core $cache) {
self::$_cache = $cache;
}
public static function setConfig($config){
self::$_config = $config;
}
public static function getConfig(){
return self::$_config;
}
public function getCache(){
return self::$_cache;
}
public static function getHttpClient() {
if (!self::$_httpClient instanceof BBC_Http_Client) {
self::$_httpClient = BBC_Http_Client_Factory::getFactory();
}
return self::$_httpClient;
}
private function parseParametersInUri($url) {
$is_preview = (isset($_SERVER['HTTP_SSLCLIENTCERTSTATUS']) && $_SERVER['HTTP_SSLCLIENTCERTSTATUS'] === 'OK');
preg_match('/:[0-9A-Za-z_]+/', $url, $variables);
foreach($variables as $variable) {
// replace normal url with preview URL
//if ($variable == 'baseurl_cms_nuxeo' && $is_preview) $variable = 'baseurl_data_preview';
$url = preg_replace('/' . $variable . '/', self::$_config->feed->{trim($variable,':')}, $url);
}
return $url;
}
public function fetchCachedContent($uri, $throw_error=TRUE, $custom_ttl=FALSE) {
$cache = BBC_Cache_Builder::getInstance()->getContentCache();
$ttl = (int)self::$_config->cache->http_request;
if (!is_null($custom_ttl)) $ttl = (int)$custom_ttl;
$uri = self::parseParametersInUri($uri);
$key_prefix = 'pal_archers_';
$key_suffix = '_' . sha1($uri);
/*
There are two cache keys for each object to cache. The "object" cache stores the actual
"thing" to be cached, and it given an unreasonably long TTL (same principle as setting a
far-futures Expires header in HTTP). The "semaphore" cache doesn't store any value, but
exists with a TTL of the "actual" TTL of the object.
When the sempahore cache becomes stale it is time to update the object cache. However under
high load there may be a race condition whereby the HTTP request to refresh the object cache
does not complete before the next request for the object arrives. Therefore when the semaphore
cache has expired, even before the HTTP request is made, the semaphore cache is populated again.
This allows the HTTP request to continue to update the object cache, while subsequent requests
for the object will load the "stale" object.
The object cache stores both the HTTP response as well as the response ETag. Subsequent attempts
to update the semaphore/object cache will first look to see if the object has an associated ETag
and will send this in the request so that, even if the semaphore has become stale, we only make
a conditional request. If a 304 Not Modified is returned then we can use the "stale" object cache
again.
*/
// cache the object for unreasonably long period of time
$key_object = $key_prefix . 'object' . $key_suffix;
$ttl_object = 60 * 60 * 24 * 30; // 30 days
// cache "sempahore" key for the amount of time we want object key to be refreshed
$key_semaphore = $key_prefix . 'semaphore' . $key_suffix;
$ttl_semaphore = $ttl; // seconds
/*
If the object cache is empty this is either the very first request fror this key,
or it has expired in memcache (unlikely). We want to populate with the HTTP response.
If the semaphore is empty this is either the very first request for this key, or
it has expired in memcache (likely). Send If-None-Match first to see if we can use the
object already in memcache for a bit longer.
*/
$object_cache = $cache->load($key_object);
$semaphore_cache = $cache->load($key_semaphore);
if ($object_cache === FALSE || $semaphore_cache === FALSE) {
BBC_Archers_Monitor::monitor(BBC_Archers_Monitor::PAL_Memcache_Miss);
// semaphore is set, the next request process will get previously cached object
$cache->save(NULL, $key_semaphore, array(), $ttl_semaphore);
$client = BBC_Http_Client_Factory::getFactory()->buildClient($uri);
if (isset($object_cache->last_modified) && (string)$object_cache->last_modified != '') {
$client->setHeaders('If-Modified-Since: ' . $object_cache->last_modified);
}
$response = $client->request('GET');
$last_modified = (string)$response->getHeader('Last-Modified');
$http_status = (int)$response->getStatus();
switch ($http_status) {
case 304:
// use cached object
$response = $object_cache->object;
break;
case 200:
$response_body = $response->getBody();
// fresh object for the cache
$cache->save(
(object)array(
'last_modified' => $last_modified,
'object'=> $response_body
),
$key_object, array(), $ttl_object
);
// reset semaphore
$cache->save(NULL, $key_semaphore, array(), $ttl_semaphore);
$response = $response_body;
break;
default:
$response_body = $response->getBody();
$log = array(
'message' => sprintf("Service layer HTTP %d. GET: %s", $http_status, $uri),
'debug' => $response->getHeaders()
);
switch($http_status) {
case 404:
BBC_Archers_Monitor::monitor(BBC_Archers_Monitor::Service_404, $log);
break;
case 500:
BBC_Archers_Monitor::monitor(BBC_Archers_Monitor::Service_500, $log);
break;
default:
BBC_Archers_Monitor::monitor(BBC_Archers_Monitor::Service_Non_200, $log);
break;
}
// Do not cache responses that are not 200s
// https://jira.dev.bbc.co.uk/browse/ARCHERS-587
/*
$cache->save(
(object)array(
'last_modified' => $last_modified,
'object'=> $response_body
),
$key_object, array(), $ttl_object
);
$cache->save(NULL, $key_semaphore, array(), $ttl_semaphore);
*/
$response = NULL;
break;
}
} else {
BBC_Archers_Monitor::monitor(BBC_Archers_Monitor::PAL_Memcache_Hit);
$response = $object_cache->object;
}
return $response;
}
public function getContents($url, $throw_error=true, $override_timeout=null) {
// deprecated getContents used to reside here, so is now just a placeholder
// forwarding on to method that implements caching instead
return self::fetchCachedContent($url, $throw_error, $override_timeout);
}
public function getFeed($url, $xpath=NULL, $throw_error=true) {
$response = self::getContents($url, $throw_error);
if (!$response) return false;
// handle XML errors ourselves (cannot use try/catch)
libxml_use_internal_errors(true);
$xml = simplexml_load_string($response);
$errors = libxml_get_errors();
libxml_clear_errors();
if (is_array($errors) && !empty($errors)) {
$log_errors = array();
// only log the parts of the error that are useful for debugging
foreach($errors as $error) {
$log_errors[] = array(
'level' => $error->level,
'code' => $error->code,
'line' => $error->line,
'message' => trim($error->message)
);
}
BBC_Archers_Monitor::monitor(
BBC_Archers_Monitor::Service_Invalid_XML,
array(
'message' => sprintf("XML parsing errors parsing response from: GET %s", self::parseParametersInUri($url)),
'debug' => $log_errors
)
);
throw new Zend_Controller_Action_Exception(sprintf('XML parsing errors parsing response from: GET %s', self::parseParametersInUri($url)), 500);
}
if (is_null($xpath) || !$xml) {
return $xml;
} else {
return $xml->xpath($xpath);
}
}
// used for sorting the items in the family tree, called from WhoswhoController.
public function subval_sort($a,$subkey) {
foreach($a as $k=>$v) {
$b[$k] = strtolower($v[$subkey]);
}
asort($b);
foreach($b as $key=>$val) {
$c[] = $a[$key];
}
return $c;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment