Created
January 2, 2013 10:06
-
-
Save dropmeaword/4433487 to your computer and use it in GitHub Desktop.
Locale string parsing and normalizing from the Lithium framework.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* Lithium: the most rad php framework | |
* | |
* @copyright Copyright 2012, Union of RAD (http://union-of-rad.org) | |
* @license http://opensource.org/licenses/bsd-license.php The BSD License | |
*/ | |
namespace lithium\g11n; | |
use BadMethodCallException; | |
use InvalidArgumentException; | |
use lithium\action\Request as ActionRequest; | |
use lithium\console\Request as ConsoleRequest; | |
/** | |
* The `Locale` class provides methods to deal with locale identifiers. The locale | |
* (here: _locale identifier_) is used to distinguish among different sets of common | |
* preferences. | |
* | |
* In order to avoid unnecessary overhead all methods throughout the framework accepting | |
* a locale require it to be well-formed according to the structure laid out below. For | |
* assuring the correct format use `Locale::canonicalize()` once on the locale. | |
* | |
* However the methods within this class will also work with not-so-well-formed locales. | |
* They accept both underscores and hyphens as separators between and don't care about the | |
* case of the individual tags. | |
* | |
* The identifier used by Lithium is based in its structure upon Unicode's | |
* language identifier and is compliant to BCP 47. | |
* | |
* `language[_Script][_TERRITORY][_VARIANT]` | |
* - `language` The spoken language, here represented by an ISO 639-1 code, | |
* where not available ISO 639-3 and ISO 639-5 codes are allowed too) tag. | |
* The tag should be lower-cased and is required. | |
* - `Script` The tag should have it's first character capitalized, all others | |
* lower-cased. The tag is optional. | |
* - `TERRITORY` A geographical area, here represented by an ISO 3166-1 code. | |
* Should be all upper-cased and is optional. | |
* - `VARIANT` Should be all upper-cased and is optional. | |
* | |
* @link http://www.unicode.org/reports/tr35/tr35-12.html#Identifiers | |
* @link http://www.rfc-editor.org/rfc/bcp/bcp47.txt | |
* @link http://www.iana.org/assignments/language-subtag-registry | |
*/ | |
class Locale extends \lithium\core\StaticObject { | |
/** | |
* Properties for locale tags. | |
* | |
* @var array | |
*/ | |
protected static $_tags = array( | |
'language' => array('formatter' => 'strtolower'), | |
'script' => array('formatter' => array('strtolower', 'ucfirst')), | |
'territory' => array('formatter' => 'strtoupper'), | |
'variant' => array('formatter' => 'strtoupper') | |
); | |
/** | |
* Magic method enabling `language`, `script`, `territory` and `variant` | |
* methods to parse and retrieve individual tags from a locale. | |
* | |
* {{{ | |
* Locale::language('en_US'); // returns 'en' | |
* Locale::territory('en_US'); // returns 'US' | |
* }}} | |
* | |
* @see lithium\g11n\Locale::$_tags | |
* @see lithium\g11n\Locale::decompose() | |
* @param string $method | |
* @param array $params | |
* @return mixed | |
*/ | |
public static function __callStatic($method, $params = array()) { | |
$tags = static::invokeMethod('decompose', $params); | |
if (!isset(static::$_tags[$method])) { | |
throw new BadMethodCallException("Invalid locale tag `{$method}`."); | |
} | |
return isset($tags[$method]) ? $tags[$method] : null; | |
} | |
/** | |
* Composes a locale from locale tags. This is the pendant to `Locale::decompose()`. | |
* | |
* @param array $tags An array as obtained from `Locale::decompose()`. | |
* @return string A locale with tags separated by underscores or `null` | |
* if none of the passed tags could be used to compose a locale. | |
*/ | |
public static function compose($tags) { | |
$result = array(); | |
foreach (static::$_tags as $name => $tag) { | |
if (isset($tags[$name])) { | |
$result[] = $tags[$name]; | |
} | |
} | |
if ($result) { | |
return implode('_', $result); | |
} | |
} | |
/** | |
* Parses a locale into locale tags. This is the pendant to `Locale::compose()``. | |
* | |
* @param string $locale A locale in an arbitrary form (i.e. `'en_US'` or `'EN-US'`). | |
* @return array Parsed language, script, territory and variant tags. | |
* @throws InvalidArgumentException | |
*/ | |
public static function decompose($locale) { | |
$regex = '(?P<language>[a-z]{2,3})'; | |
$regex .= '(?:[_-](?P<script>[a-z]{4}))?'; | |
$regex .= '(?:[_-](?P<territory>[a-z]{2}))?'; | |
$regex .= '(?:[_-](?P<variant>[a-z]{5,}))?'; | |
if (!preg_match("/^{$regex}$/i", $locale, $matches)) { | |
throw new InvalidArgumentException("Locale `{$locale}` could not be parsed."); | |
} | |
return array_filter(array_intersect_key($matches, static::$_tags)); | |
} | |
/** | |
* Returns a locale in its canonical form with tags formatted properly. | |
* | |
* @param string $locale A locale in an arbitrary form (i.e. `'ZH-HANS-HK_REVISED'`). | |
* @return string A locale in it's canonical form (i.e. `'zh_Hans_HK_REVISED'`). | |
*/ | |
public static function canonicalize($locale) { | |
$tags = static::decompose($locale); | |
foreach ($tags as $name => &$tag) { | |
foreach ((array) static::$_tags[$name]['formatter'] as $formatter) { | |
$tag = $formatter($tag); | |
} | |
} | |
return static::compose($tags); | |
} | |
/** | |
* Cascades a locale. | |
* | |
* Usage: | |
* {{{ | |
* Locale::cascade('en_US'); | |
* // returns array('en_US', 'en', 'root') | |
* | |
* Locale::cascade('zh_Hans_HK_REVISED'); | |
* // returns array('zh_Hans_HK_REVISED', 'zh_Hans_HK', 'zh_Hans', 'zh', 'root') | |
* }}} | |
* | |
* @link http://www.unicode.org/reports/tr35/tr35-13.html#Locale_Inheritance | |
* @param string $locale A locale in an arbitrary form (i.e. `'en_US'` or `'EN-US'`). | |
* @return array Indexed array of locales (starting with the most specific one). | |
*/ | |
public static function cascade($locale) { | |
$locales[] = $locale; | |
if ($locale === 'root') { | |
return $locales; | |
} | |
$tags = static::decompose($locale); | |
while (count($tags) > 1) { | |
array_pop($tags); | |
$locales[] = static::compose($tags); | |
} | |
$locales[] = 'root'; | |
return $locales; | |
} | |
/** | |
* Searches an array of locales for the best match to a locale. The locale | |
* is iteratively simplified until either it matches one of the locales | |
* in the list or the locale can't be further simplified. | |
* | |
* This method partially implements the lookup matching scheme as described | |
* in RFC 4647, section 3.4 and thus does not strictly conform to the | |
* specification. | |
* | |
* Differences to specification: | |
* - No support for wildcards in the to-be-matched locales. | |
* - No support for locales with private subtags. | |
* - No support for a default return value. | |
* - Passed locales are required to be in canonical form (i.e. `'ja_JP'`). | |
* | |
* @link http://www.ietf.org/rfc/rfc4647.txt | |
* @param array $locales Locales to match against `$locale`. | |
* @param string $locale A locale in it's canonical form (i.e. `'zh_Hans_HK_REVISED'`). | |
* @return string The matched locale. | |
*/ | |
public static function lookup($locales, $locale) { | |
$tags = static::decompose($locale); | |
$count = count($tags); | |
while ($count > 0) { | |
if (($key = array_search(static::compose($tags), $locales)) !== false) { | |
return $locales[$key]; | |
} elseif ($count == 1) { | |
foreach ($locales as $currentLocale) { | |
if (strpos($currentLocale, current($tags) . '_') === 0) { | |
return $currentLocale; | |
} | |
} | |
} | |
if (($key = array_search(static::compose($tags), $locales)) !== false) { | |
return $locales[$key]; | |
} | |
array_pop($tags); | |
$count = count($tags); | |
} | |
} | |
/** | |
* Determines the preferred locale from a request or array. Optionally negotiates | |
* the preferred locale with available locales. | |
* | |
* @see lithium\g11n\Locale::_preferredAction() | |
* @see lithium\g11n\Locale::_preferredConsole() | |
* @see lithium\g11n\Locale::lookup() | |
* @param object|array $request An action or console request object or an array of locales. | |
* @param array $available A list of locales to negotiate the preferred locale with. | |
* @return string The preferred locale in it's canonical form (i.e. `'fr_CA'`). | |
* @todo Rewrite this to remove hard-coded class names. | |
*/ | |
public static function preferred($request, $available = null) { | |
if (is_array($request)) { | |
$result = $request; | |
} elseif ($request instanceof ActionRequest) { | |
$result = static::_preferredAction($request); | |
} elseif ($request instanceof ConsoleRequest) { | |
$result = static::_preferredConsole($request); | |
} else { | |
return null; | |
} | |
if (!$available) { | |
return array_shift($result); | |
} | |
foreach ((array) $result as $locale) { | |
if ($match = static::lookup($available, $locale)) { | |
return $match; | |
} | |
} | |
} | |
/** | |
* Detects preferred locales from an action request by looking at the | |
* `'Accept-Language'` header as described by RFC 2616, section 14.4. | |
* | |
* @link http://www.ietf.org/rfc/rfc2616.txt | |
* @param object $request An instance of `lithium\action\Request`. | |
* @return array Preferred locales in their canonical form (i.e. `'fr_CA'`). | |
*/ | |
protected static function _preferredAction($request) { | |
$result = array(); | |
$regex = "/^\s*(?P<locale>\w\w(?:[-]\w\w)?)(?:;q=(?P<quality>(0|1|0\.\d+)))?\s*$/"; | |
foreach (explode(',', $request->env('HTTP_ACCEPT_LANGUAGE')) as $part) { | |
if (preg_match($regex, $part, $matches)) { | |
$locale = static::canonicalize($matches['locale']); | |
$quality = isset($matches['quality']) ? $matches['quality'] : 1; | |
$result[$quality][] = $locale; | |
} | |
} | |
krsort($result); | |
$return = array(); | |
foreach ($result as $locales) { | |
$return = array_merge($return, array_values($locales)); | |
} | |
return $return; | |
} | |
/** | |
* Detects preferred locales from a console request by looking at certain | |
* environment variables. The environment variables may be present or not | |
* depending on your system. If multiple variables are present the following | |
* hierarchy is used: `'LANGUAGE'`, `'LC_ALL'`, `'LANG'`. | |
* | |
* The locales of the `'LC_ALL'` and the `'LANG'` are formatted according | |
* to the posix standard: `language(_territory)(.encoding)(@modifier)`. | |
* Locales having such a format are automatically canonicalized and transformed | |
* into the `Locale` class' format. | |
* | |
* @link http://www.linux.com/archive/feature/53781 | |
* @param object $request An instance of `lithium\console\Request`. | |
* @return array Preferred locales in their canonical form (i.e. `'fr_CA'`). | |
*/ | |
protected static function _preferredConsole($request) { | |
$regex = '(?P<locale>[\w\_]+)(\.|@|$)+'; | |
$result = array(); | |
if ($value = $request->env('LANGUAGE')) { | |
return explode(':', $value); | |
} | |
foreach (array('LC_ALL', 'LANG') as $variable) { | |
$value = $request->env($variable); | |
if (!$value || $value == 'C' || $value == 'POSIX') { | |
continue; | |
} | |
if (preg_match("/{$regex}/", $value, $matches)) { | |
return (array) $matches['locale']; | |
} | |
} | |
return $result; | |
} | |
} | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment