Instantly share code, notes, and snippets.

Embed
What would you like to do?
<?php
/**
* @file
* This file was auto-generated by generate-includes.php and includes all of
* the core files required by HTML Purifier. Use this if performance is a
* primary concern and you are using an opcode cache. PLEASE DO NOT EDIT THIS
* FILE, changes will be overwritten the next time the script is run.
*
* @version 4.9.2
*
* @warning
* You must *not* include any other HTML Purifier files before this file,
* because 'require' not 'require_once' is used.
*
* @warning
* This file requires that the include path contains the HTML Purifier
* library directory; this is not auto-set.
*/
/*! @mainpage
*
* HTML Purifier is an HTML filter that will take an arbitrary snippet of
* HTML and rigorously test, validate and filter it into a version that
* is safe for output onto webpages. It achieves this by:
*
* -# Lexing (parsing into tokens) the document,
* -# Executing various strategies on the tokens:
* -# Removing all elements not in the whitelist,
* -# Making the tokens well-formed,
* -# Fixing the nesting of the nodes, and
* -# Validating attributes of the nodes; and
* -# Generating HTML from the purified tokens.
*
* However, most users will only need to interface with the HTMLPurifier
* and HTMLPurifier_Config.
*/
/*
HTML Purifier 4.9.2 - Standards Compliant HTML Filtering
Copyright (C) 2006-2008 Edward Z. Yang
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* Facade that coordinates HTML Purifier's subsystems in order to purify HTML.
*
* @note There are several points in which configuration can be specified
* for HTML Purifier. The precedence of these (from lowest to
* highest) is as follows:
* -# Instance: new HTMLPurifier($config)
* -# Invocation: purify($html, $config)
* These configurations are entirely independent of each other and
* are *not* merged (this behavior may change in the future).
*
* @todo We need an easier way to inject strategies using the configuration
* object.
*/
class HTMLPurifier
{
/**
* Version of HTML Purifier.
* @type string
*/
public $version = '4.9.2';
/**
* Constant with version of HTML Purifier.
*/
const VERSION = '4.9.2';
/**
* Global configuration object.
* @type HTMLPurifier_Config
*/
public $config;
/**
* Array of extra filter objects to run on HTML,
* for backwards compatibility.
* @type HTMLPurifier_Filter[]
*/
private $filters = array();
/**
* Single instance of HTML Purifier.
* @type HTMLPurifier
*/
private static $instance;
/**
* @type HTMLPurifier_Strategy_Core
*/
protected $strategy;
/**
* @type HTMLPurifier_Generator
*/
protected $generator;
/**
* Resultant context of last run purification.
* Is an array of contexts if the last called method was purifyArray().
* @type HTMLPurifier_Context
*/
public $context;
/**
* Initializes the purifier.
*
* @param HTMLPurifier_Config|mixed $config Optional HTMLPurifier_Config object
* for all instances of the purifier, if omitted, a default
* configuration is supplied (which can be overridden on a
* per-use basis).
* The parameter can also be any type that
* HTMLPurifier_Config::create() supports.
*/
public function __construct($config = null)
{
$this->config = HTMLPurifier_Config::create($config);
$this->strategy = new HTMLPurifier_Strategy_Core();
}
/**
* Adds a filter to process the output. First come first serve
*
* @param HTMLPurifier_Filter $filter HTMLPurifier_Filter object
*/
public function addFilter($filter)
{
trigger_error(
'HTMLPurifier->addFilter() is deprecated, use configuration directives' .
' in the Filter namespace or Filter.Custom',
E_USER_WARNING
);
$this->filters[] = $filter;
}
/**
* Filters an HTML snippet/document to be XSS-free and standards-compliant.
*
* @param string $html String of HTML to purify
* @param HTMLPurifier_Config $config Config object for this operation,
* if omitted, defaults to the config object specified during this
* object's construction. The parameter can also be any type
* that HTMLPurifier_Config::create() supports.
*
* @return string Purified HTML
*/
public function purify($html, $config = null)
{
// :TODO: make the config merge in, instead of replace
$config = $config ? HTMLPurifier_Config::create($config) : $this->config;
// implementation is partially environment dependant, partially
// configuration dependant
$lexer = HTMLPurifier_Lexer::create($config);
$context = new HTMLPurifier_Context();
// setup HTML generator
$this->generator = new HTMLPurifier_Generator($config, $context);
$context->register('Generator', $this->generator);
// set up global context variables
if ($config->get('Core.CollectErrors')) {
// may get moved out if other facilities use it
$language_factory = HTMLPurifier_LanguageFactory::instance();
$language = $language_factory->create($config, $context);
$context->register('Locale', $language);
$error_collector = new HTMLPurifier_ErrorCollector($context);
$context->register('ErrorCollector', $error_collector);
}
// setup id_accumulator context, necessary due to the fact that
// AttrValidator can be called from many places
$id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context);
$context->register('IDAccumulator', $id_accumulator);
$html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context);
// setup filters
$filter_flags = $config->getBatch('Filter');
$custom_filters = $filter_flags['Custom'];
unset($filter_flags['Custom']);
$filters = array();
foreach ($filter_flags as $filter => $flag) {
if (!$flag) {
continue;
}
if (strpos($filter, '.') !== false) {
continue;
}
$class = "HTMLPurifier_Filter_$filter";
$filters[] = new $class;
}
foreach ($custom_filters as $filter) {
// maybe "HTMLPurifier_Filter_$filter", but be consistent with AutoFormat
$filters[] = $filter;
}
$filters = array_merge($filters, $this->filters);
// maybe prepare(), but later
for ($i = 0, $filter_size = count($filters); $i < $filter_size; $i++) {
$html = $filters[$i]->preFilter($html, $config, $context);
}
// purified HTML
$html =
$this->generator->generateFromTokens(
// list of tokens
$this->strategy->execute(
// list of un-purified tokens
$lexer->tokenizeHTML(
// un-purified HTML
$html,
$config,
$context
),
$config,
$context
)
);
for ($i = $filter_size - 1; $i >= 0; $i--) {
$html = $filters[$i]->postFilter($html, $config, $context);
}
$html = HTMLPurifier_Encoder::convertFromUTF8($html, $config, $context);
$this->context =& $context;
return $html;
}
/**
* Filters an array of HTML snippets
*
* @param string[] $array_of_html Array of html snippets
* @param HTMLPurifier_Config $config Optional config object for this operation.
* See HTMLPurifier::purify() for more details.
*
* @return string[] Array of purified HTML
*/
public function purifyArray($array_of_html, $config = null)
{
$context_array = array();
foreach ($array_of_html as $key => $html) {
$array_of_html[$key] = $this->purify($html, $config);
$context_array[$key] = $this->context;
}
$this->context = $context_array;
return $array_of_html;
}
/**
* Singleton for enforcing just one HTML Purifier in your system
*
* @param HTMLPurifier|HTMLPurifier_Config $prototype Optional prototype
* HTMLPurifier instance to overload singleton with,
* or HTMLPurifier_Config instance to configure the
* generated version with.
*
* @return HTMLPurifier
*/
public static function instance($prototype = null)
{
if (!self::$instance || $prototype) {
if ($prototype instanceof HTMLPurifier) {
self::$instance = $prototype;
} elseif ($prototype) {
self::$instance = new HTMLPurifier($prototype);
} else {
self::$instance = new HTMLPurifier();
}
}
return self::$instance;
}
/**
* Singleton for enforcing just one HTML Purifier in your system
*
* @param HTMLPurifier|HTMLPurifier_Config $prototype Optional prototype
* HTMLPurifier instance to overload singleton with,
* or HTMLPurifier_Config instance to configure the
* generated version with.
*
* @return HTMLPurifier
* @note Backwards compatibility, see instance()
*/
public static function getInstance($prototype = null)
{
return HTMLPurifier::instance($prototype);
}
}
/**
* Converts a stream of HTMLPurifier_Token into an HTMLPurifier_Node,
* and back again.
*
* @note This transformation is not an equivalence. We mutate the input
* token stream to make it so; see all [MUT] markers in code.
*/
class HTMLPurifier_Arborize
{
public static function arborize($tokens, $config, $context) {
$definition = $config->getHTMLDefinition();
$parent = new HTMLPurifier_Token_Start($definition->info_parent);
$stack = array($parent->toNode());
foreach ($tokens as $token) {
$token->skip = null; // [MUT]
$token->carryover = null; // [MUT]
if ($token instanceof HTMLPurifier_Token_End) {
$token->start = null; // [MUT]
$r = array_pop($stack);
//assert($r->name === $token->name);
//assert(empty($token->attr));
$r->endCol = $token->col;
$r->endLine = $token->line;
$r->endArmor = $token->armor;
continue;
}
$node = $token->toNode();
$stack[count($stack)-1]->children[] = $node;
if ($token instanceof HTMLPurifier_Token_Start) {
$stack[] = $node;
}
}
//assert(count($stack) == 1);
return $stack[0];
}
public static function flatten($node, $config, $context) {
$level = 0;
$nodes = array($level => new HTMLPurifier_Queue(array($node)));
$closingTokens = array();
$tokens = array();
do {
while (!$nodes[$level]->isEmpty()) {
$node = $nodes[$level]->shift(); // FIFO
list($start, $end) = $node->toTokenPair();
if ($level > 0) {
$tokens[] = $start;
}
if ($end !== NULL) {
$closingTokens[$level][] = $end;
}
if ($node instanceof HTMLPurifier_Node_Element) {
$level++;
$nodes[$level] = new HTMLPurifier_Queue();
foreach ($node->children as $childNode) {
$nodes[$level]->push($childNode);
}
}
}
$level--;
if ($level && isset($closingTokens[$level])) {
while ($token = array_pop($closingTokens[$level])) {
$tokens[] = $token;
}
}
} while ($level > 0);
return $tokens;
}
}
/**
* Defines common attribute collections that modules reference
*/
class HTMLPurifier_AttrCollections
{
/**
* Associative array of attribute collections, indexed by name.
* @type array
*/
public $info = array();
/**
* Performs all expansions on internal data for use by other inclusions
* It also collects all attribute collection extensions from
* modules
* @param HTMLPurifier_AttrTypes $attr_types HTMLPurifier_AttrTypes instance
* @param HTMLPurifier_HTMLModule[] $modules Hash array of HTMLPurifier_HTMLModule members
*/
public function __construct($attr_types, $modules)
{
$this->doConstruct($attr_types, $modules);
}
public function doConstruct($attr_types, $modules)
{
// load extensions from the modules
foreach ($modules as $module) {
foreach ($module->attr_collections as $coll_i => $coll) {
if (!isset($this->info[$coll_i])) {
$this->info[$coll_i] = array();
}
foreach ($coll as $attr_i => $attr) {
if ($attr_i === 0 && isset($this->info[$coll_i][$attr_i])) {
// merge in includes
$this->info[$coll_i][$attr_i] = array_merge(
$this->info[$coll_i][$attr_i],
$attr
);
continue;
}
$this->info[$coll_i][$attr_i] = $attr;
}
}
}
// perform internal expansions and inclusions
foreach ($this->info as $name => $attr) {
// merge attribute collections that include others
$this->performInclusions($this->info[$name]);
// replace string identifiers with actual attribute objects
$this->expandIdentifiers($this->info[$name], $attr_types);
}
}
/**
* Takes a reference to an attribute associative array and performs
* all inclusions specified by the zero index.
* @param array &$attr Reference to attribute array
*/
public function performInclusions(&$attr)
{
if (!isset($attr[0])) {
return;
}
$merge = $attr[0];
$seen = array(); // recursion guard
// loop through all the inclusions
for ($i = 0; isset($merge[$i]); $i++) {
if (isset($seen[$merge[$i]])) {
continue;
}
$seen[$merge[$i]] = true;
// foreach attribute of the inclusion, copy it over
if (!isset($this->info[$merge[$i]])) {
continue;
}
foreach ($this->info[$merge[$i]] as $key => $value) {
if (isset($attr[$key])) {
continue;
} // also catches more inclusions
$attr[$key] = $value;
}
if (isset($this->info[$merge[$i]][0])) {
// recursion
$merge = array_merge($merge, $this->info[$merge[$i]][0]);
}
}
unset($attr[0]);
}
/**
* Expands all string identifiers in an attribute array by replacing
* them with the appropriate values inside HTMLPurifier_AttrTypes
* @param array &$attr Reference to attribute array
* @param HTMLPurifier_AttrTypes $attr_types HTMLPurifier_AttrTypes instance
*/
public function expandIdentifiers(&$attr, $attr_types)
{
// because foreach will process new elements we add, make sure we
// skip duplicates
$processed = array();
foreach ($attr as $def_i => $def) {
// skip inclusions
if ($def_i === 0) {
continue;
}
if (isset($processed[$def_i])) {
continue;
}
// determine whether or not attribute is required
if ($required = (strpos($def_i, '*') !== false)) {
// rename the definition
unset($attr[$def_i]);
$def_i = trim($def_i, '*');
$attr[$def_i] = $def;
}
$processed[$def_i] = true;
// if we've already got a literal object, move on
if (is_object($def)) {
// preserve previous required
$attr[$def_i]->required = ($required || $attr[$def_i]->required);
continue;
}
if ($def === false) {
unset($attr[$def_i]);
continue;
}
if ($t = $attr_types->get($def)) {
$attr[$def_i] = $t;
$attr[$def_i]->required = $required;
} else {
unset($attr[$def_i]);
}
}
}
}
/**
* Base class for all validating attribute definitions.
*
* This family of classes forms the core for not only HTML attribute validation,
* but also any sort of string that needs to be validated or cleaned (which
* means CSS properties and composite definitions are defined here too).
* Besides defining (through code) what precisely makes the string valid,
* subclasses are also responsible for cleaning the code if possible.
*/
abstract class HTMLPurifier_AttrDef
{
/**
* Tells us whether or not an HTML attribute is minimized.
* Has no meaning in other contexts.
* @type bool
*/
public $minimized = false;
/**
* Tells us whether or not an HTML attribute is required.
* Has no meaning in other contexts
* @type bool
*/
public $required = false;
/**
* Validates and cleans passed string according to a definition.
*
* @param string $string String to be validated and cleaned.
* @param HTMLPurifier_Config $config Mandatory HTMLPurifier_Config object.
* @param HTMLPurifier_Context $context Mandatory HTMLPurifier_Context object.
*/
abstract public function validate($string, $config, $context);
/**
* Convenience method that parses a string as if it were CDATA.
*
* This method process a string in the manner specified at
* <http://www.w3.org/TR/html4/types.html#h-6.2> by removing
* leading and trailing whitespace, ignoring line feeds, and replacing
* carriage returns and tabs with spaces. While most useful for HTML
* attributes specified as CDATA, it can also be applied to most CSS
* values.
*
* @note This method is not entirely standards compliant, as trim() removes
* more types of whitespace than specified in the spec. In practice,
* this is rarely a problem, as those extra characters usually have
* already been removed by HTMLPurifier_Encoder.
*
* @warning This processing is inconsistent with XML's whitespace handling
* as specified by section 3.3.3 and referenced XHTML 1.0 section
* 4.7. However, note that we are NOT necessarily
* parsing XML, thus, this behavior may still be correct. We
* assume that newlines have been normalized.
*/
public function parseCDATA($string)
{
$string = trim($string);
$string = str_replace(array("\n", "\t", "\r"), ' ', $string);
return $string;
}
/**
* Factory method for creating this class from a string.
* @param string $string String construction info
* @return HTMLPurifier_AttrDef Created AttrDef object corresponding to $string
*/
public function make($string)
{
// default implementation, return a flyweight of this object.
// If $string has an effect on the returned object (i.e. you
// need to overload this method), it is best
// to clone or instantiate new copies. (Instantiation is safer.)
return $this;
}
/**
* Removes spaces from rgb(0, 0, 0) so that shorthand CSS properties work
* properly. THIS IS A HACK!
* @param string $string a CSS colour definition
* @return string
*/
protected function mungeRgb($string)
{
$p = '\s*(\d+(\.\d+)?([%]?))\s*';
if (preg_match('/(rgba|hsla)\(/', $string)) {
return preg_replace('/(rgba|hsla)\('.$p.','.$p.','.$p.','.$p.'\)/', '\1(\2,\5,\8,\11)', $string);
}
return preg_replace('/(rgb|hsl)\('.$p.','.$p.','.$p.'\)/', '\1(\2,\5,\8)', $string);
}
/**
* Parses a possibly escaped CSS string and returns the "pure"
* version of it.
*/
protected function expandCSSEscape($string)
{
// flexibly parse it
$ret = '';
for ($i = 0, $c = strlen($string); $i < $c; $i++) {
if ($string[$i] === '\\') {
$i++;
if ($i >= $c) {
$ret .= '\\';
break;
}
if (ctype_xdigit($string[$i])) {
$code = $string[$i];
for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) {
if (!ctype_xdigit($string[$i])) {
break;
}
$code .= $string[$i];
}
// We have to be extremely careful when adding
// new characters, to make sure we're not breaking
// the encoding.
$char = HTMLPurifier_Encoder::unichr(hexdec($code));
if (HTMLPurifier_Encoder::cleanUTF8($char) === '') {
continue;
}
$ret .= $char;
if ($i < $c && trim($string[$i]) !== '') {
$i--;
}
continue;
}
if ($string[$i] === "\n") {
continue;
}
}
$ret .= $string[$i];
}
return $ret;
}
}
/**
* Processes an entire attribute array for corrections needing multiple values.
*
* Occasionally, a certain attribute will need to be removed and popped onto
* another value. Instead of creating a complex return syntax for
* HTMLPurifier_AttrDef, we just pass the whole attribute array to a
* specialized object and have that do the special work. That is the
* family of HTMLPurifier_AttrTransform.
*
* An attribute transformation can be assigned to run before or after
* HTMLPurifier_AttrDef validation. See HTMLPurifier_HTMLDefinition for
* more details.
*/
abstract class HTMLPurifier_AttrTransform
{
/**
* Abstract: makes changes to the attributes dependent on multiple values.
*
* @param array $attr Assoc array of attributes, usually from
* HTMLPurifier_Token_Tag::$attr
* @param HTMLPurifier_Config $config Mandatory HTMLPurifier_Config object.
* @param HTMLPurifier_Context $context Mandatory HTMLPurifier_Context object
* @return array Processed attribute array.
*/
abstract public function transform($attr, $config, $context);
/**
* Prepends CSS properties to the style attribute, creating the
* attribute if it doesn't exist.
* @param array &$attr Attribute array to process (passed by reference)
* @param string $css CSS to prepend
*/
public function prependCSS(&$attr, $css)
{
$attr['style'] = isset($attr['style']) ? $attr['style'] : '';
$attr['style'] = $css . $attr['style'];
}
/**
* Retrieves and removes an attribute
* @param array &$attr Attribute array to process (passed by reference)
* @param mixed $key Key of attribute to confiscate
* @return mixed
*/
public function confiscateAttr(&$attr, $key)
{
if (!isset($attr[$key])) {
return null;
}
$value = $attr[$key];
unset($attr[$key]);
return $value;
}
}
/**
* Provides lookup array of attribute types to HTMLPurifier_AttrDef objects
*/
class HTMLPurifier_AttrTypes
{
/**
* Lookup array of attribute string identifiers to concrete implementations.
* @type HTMLPurifier_AttrDef[]
*/
protected $info = array();
/**
* Constructs the info array, supplying default implementations for attribute
* types.
*/
public function __construct()
{
// XXX This is kind of poor, since we don't actually /clone/
// instances; instead, we use the supplied make() attribute. So,
// the underlying class must know how to deal with arguments.
// With the old implementation of Enum, that ignored its
// arguments when handling a make dispatch, the IAlign
// definition wouldn't work.
// pseudo-types, must be instantiated via shorthand
$this->info['Enum'] = new HTMLPurifier_AttrDef_Enum();
$this->info['Bool'] = new HTMLPurifier_AttrDef_HTML_Bool();
$this->info['CDATA'] = new HTMLPurifier_AttrDef_Text();
$this->info['ID'] = new HTMLPurifier_AttrDef_HTML_ID();
$this->info['Length'] = new HTMLPurifier_AttrDef_HTML_Length();
$this->info['MultiLength'] = new HTMLPurifier_AttrDef_HTML_MultiLength();
$this->info['NMTOKENS'] = new HTMLPurifier_AttrDef_HTML_Nmtokens();
$this->info['Pixels'] = new HTMLPurifier_AttrDef_HTML_Pixels();
$this->info['Text'] = new HTMLPurifier_AttrDef_Text();
$this->info['URI'] = new HTMLPurifier_AttrDef_URI();
$this->info['LanguageCode'] = new HTMLPurifier_AttrDef_Lang();
$this->info['Color'] = new HTMLPurifier_AttrDef_HTML_Color();
$this->info['IAlign'] = self::makeEnum('top,middle,bottom,left,right');
$this->info['LAlign'] = self::makeEnum('top,bottom,left,right');
$this->info['FrameTarget'] = new HTMLPurifier_AttrDef_HTML_FrameTarget();
// unimplemented aliases
$this->info['ContentType'] = new HTMLPurifier_AttrDef_Text();
$this->info['ContentTypes'] = new HTMLPurifier_AttrDef_Text();
$this->info['Charsets'] = new HTMLPurifier_AttrDef_Text();
$this->info['Character'] = new HTMLPurifier_AttrDef_Text();
// "proprietary" types
$this->info['Class'] = new HTMLPurifier_AttrDef_HTML_Class();
// number is really a positive integer (one or more digits)
// FIXME: ^^ not always, see start and value of list items
$this->info['Number'] = new HTMLPurifier_AttrDef_Integer(false, false, true);
}
private static function makeEnum($in)
{
return new HTMLPurifier_AttrDef_Clone(new HTMLPurifier_AttrDef_Enum(explode(',', $in)));
}
/**
* Retrieves a type
* @param string $type String type name
* @return HTMLPurifier_AttrDef Object AttrDef for type
*/
public function get($type)
{
// determine if there is any extra info tacked on
if (strpos($type, '#') !== false) {
list($type, $string) = explode('#', $type, 2);
} else {
$string = '';
}
if (!isset($this->info[$type])) {
trigger_error('Cannot retrieve undefined attribute type ' . $type, E_USER_ERROR);
return;
}
return $this->info[$type]->make($string);
}
/**
* Sets a new implementation for a type
* @param string $type String type name
* @param HTMLPurifier_AttrDef $impl Object AttrDef for type
*/
public function set($type, $impl)
{
$this->info[$type] = $impl;
}
}
/**
* Validates the attributes of a token. Doesn't manage required attributes
* very well. The only reason we factored this out was because RemoveForeignElements
* also needed it besides ValidateAttributes.
*/
class HTMLPurifier_AttrValidator
{
/**
* Validates the attributes of a token, mutating it as necessary.
* that has valid tokens
* @param HTMLPurifier_Token $token Token to validate.
* @param HTMLPurifier_Config $config Instance of HTMLPurifier_Config
* @param HTMLPurifier_Context $context Instance of HTMLPurifier_Context
*/
public function validateToken($token, $config, $context)
{
$definition = $config->getHTMLDefinition();
$e =& $context->get('ErrorCollector', true);
// initialize IDAccumulator if necessary
$ok =& $context->get('IDAccumulator', true);
if (!$ok) {
$id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context);
$context->register('IDAccumulator', $id_accumulator);
}
// initialize CurrentToken if necessary
$current_token =& $context->get('CurrentToken', true);
if (!$current_token) {
$context->register('CurrentToken', $token);
}
if (!$token instanceof HTMLPurifier_Token_Start &&
!$token instanceof HTMLPurifier_Token_Empty
) {
return;
}
// create alias to global definition array, see also $defs
// DEFINITION CALL
$d_defs = $definition->info_global_attr;
// don't update token until the very end, to ensure an atomic update
$attr = $token->attr;
// do global transformations (pre)
// nothing currently utilizes this
foreach ($definition->info_attr_transform_pre as $transform) {
$attr = $transform->transform($o = $attr, $config, $context);
if ($e) {
if ($attr != $o) {
$e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
}
}
}
// do local transformations only applicable to this element (pre)
// ex. <p align="right"> to <p style="text-align:right;">
foreach ($definition->info[$token->name]->attr_transform_pre as $transform) {
$attr = $transform->transform($o = $attr, $config, $context);
if ($e) {
if ($attr != $o) {
$e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
}
}
}
// create alias to this element's attribute definition array, see
// also $d_defs (global attribute definition array)
// DEFINITION CALL
$defs = $definition->info[$token->name]->attr;
$attr_key = false;
$context->register('CurrentAttr', $attr_key);
// iterate through all the attribute keypairs
// Watch out for name collisions: $key has previously been used
foreach ($attr as $attr_key => $value) {
// call the definition
if (isset($defs[$attr_key])) {
// there is a local definition defined
if ($defs[$attr_key] === false) {
// We've explicitly been told not to allow this element.
// This is usually when there's a global definition
// that must be overridden.
// Theoretically speaking, we could have a
// AttrDef_DenyAll, but this is faster!
$result = false;
} else {
// validate according to the element's definition
$result = $defs[$attr_key]->validate(
$value,
$config,
$context
);
}
} elseif (isset($d_defs[$attr_key])) {
// there is a global definition defined, validate according
// to the global definition
$result = $d_defs[$attr_key]->validate(
$value,
$config,
$context
);
} else {
// system never heard of the attribute? DELETE!
$result = false;
}
// put the results into effect
if ($result === false || $result === null) {
// this is a generic error message that should replaced
// with more specific ones when possible
if ($e) {
$e->send(E_ERROR, 'AttrValidator: Attribute removed');
}
// remove the attribute
unset($attr[$attr_key]);
} elseif (is_string($result)) {
// generally, if a substitution is happening, there
// was some sort of implicit correction going on. We'll
// delegate it to the attribute classes to say exactly what.
// simple substitution
$attr[$attr_key] = $result;
} else {
// nothing happens
}
// we'd also want slightly more complicated substitution
// involving an array as the return value,
// although we're not sure how colliding attributes would
// resolve (certain ones would be completely overriden,
// others would prepend themselves).
}
$context->destroy('CurrentAttr');
// post transforms
// global (error reporting untested)
foreach ($definition->info_attr_transform_post as $transform) {
$attr = $transform->transform($o = $attr, $config, $context);
if ($e) {
if ($attr != $o) {
$e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
}
}
}
// local (error reporting untested)
foreach ($definition->info[$token->name]->attr_transform_post as $transform) {
$attr = $transform->transform($o = $attr, $config, $context);
if ($e) {
if ($attr != $o) {
$e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
}
}
}
$token->attr = $attr;
// destroy CurrentToken if we made it ourselves
if (!$current_token) {
$context->destroy('CurrentToken');
}
}
}
// constants are slow, so we use as few as possible
if (!defined('HTMLPURIFIER_PREFIX')) {
define('HTMLPURIFIER_PREFIX', dirname(__FILE__) . '/standalone');
set_include_path(HTMLPURIFIER_PREFIX . PATH_SEPARATOR . get_include_path());
}
// accomodations for versions earlier than 5.0.2
// borrowed from PHP_Compat, LGPL licensed, by Aidan Lister <aidan@php.net>
if (!defined('PHP_EOL')) {
switch (strtoupper(substr(PHP_OS, 0, 3))) {
case 'WIN':
define('PHP_EOL', "\r\n");
break;
case 'DAR':
define('PHP_EOL', "\r");
break;
default:
define('PHP_EOL', "\n");
}
}
/**
* Bootstrap class that contains meta-functionality for HTML Purifier such as
* the autoload function.
*
* @note
* This class may be used without any other files from HTML Purifier.
*/
class HTMLPurifier_Bootstrap
{
/**
* Autoload function for HTML Purifier
* @param string $class Class to load
* @return bool
*/
public static function autoload($class)
{
$file = HTMLPurifier_Bootstrap::getPath($class);
if (!$file) {
return false;
}
// Technically speaking, it should be ok and more efficient to
// just do 'require', but Antonio Parraga reports that with
// Zend extensions such as Zend debugger and APC, this invariant
// may be broken. Since we have efficient alternatives, pay
// the cost here and avoid the bug.
require_once HTMLPURIFIER_PREFIX . '/' . $file;
return true;
}
/**
* Returns the path for a specific class.
* @param string $class Class path to get
* @return string
*/
public static function getPath($class)
{
if (strncmp('HTMLPurifier', $class, 12) !== 0) {
return false;
}
// Custom implementations
if (strncmp('HTMLPurifier_Language_', $class, 22) === 0) {
$code = str_replace('_', '-', substr($class, 22));
$file = 'HTMLPurifier/Language/classes/' . $code . '.php';
} else {
$file = str_replace('_', '/', $class) . '.php';
}
if (!file_exists(HTMLPURIFIER_PREFIX . '/' . $file)) {
return false;
}
return $file;
}
/**
* "Pre-registers" our autoloader on the SPL stack.
*/
public static function registerAutoload()
{
$autoload = array('HTMLPurifier_Bootstrap', 'autoload');
if (($funcs = spl_autoload_functions()) === false) {
spl_autoload_register($autoload);
} elseif (function_exists('spl_autoload_unregister')) {
if (version_compare(PHP_VERSION, '5.3.0', '>=')) {
// prepend flag exists, no need for shenanigans
spl_autoload_register($autoload, true, true);
} else {
$buggy = version_compare(PHP_VERSION, '5.2.11', '<');
$compat = version_compare(PHP_VERSION, '5.1.2', '<=') &&
version_compare(PHP_VERSION, '5.1.0', '>=');
foreach ($funcs as $func) {
if ($buggy && is_array($func)) {
// :TRICKY: There are some compatibility issues and some
// places where we need to error out
$reflector = new ReflectionMethod($func[0], $func[1]);
if (!$reflector->isStatic()) {
throw new Exception(
'HTML Purifier autoloader registrar is not compatible
with non-static object methods due to PHP Bug #44144;
Please do not use HTMLPurifier.autoload.php (or any
file that includes this file); instead, place the code:
spl_autoload_register(array(\'HTMLPurifier_Bootstrap\', \'autoload\'))
after your own autoloaders.'
);
}
// Suprisingly, spl_autoload_register supports the
// Class::staticMethod callback format, although call_user_func doesn't
if ($compat) {
$func = implode('::', $func);
}
}
spl_autoload_unregister($func);
}
spl_autoload_register($autoload);
foreach ($funcs as $func) {
spl_autoload_register($func);
}
}
}
}
}
/**
* Super-class for definition datatype objects, implements serialization
* functions for the class.
*/
abstract class HTMLPurifier_Definition
{
/**
* Has setup() been called yet?
* @type bool
*/
public $setup = false;
/**
* If true, write out the final definition object to the cache after
* setup. This will be true only if all invocations to get a raw
* definition object are also optimized. This does not cause file
* system thrashing because on subsequent calls the cached object
* is used and any writes to the raw definition object are short
* circuited. See enduser-customize.html for the high-level
* picture.
* @type bool
*/
public $optimized = null;
/**
* What type of definition is it?
* @type string
*/
public $type;
/**
* Sets up the definition object into the final form, something
* not done by the constructor
* @param HTMLPurifier_Config $config
*/
abstract protected function doSetup($config);
/**
* Setup function that aborts if already setup
* @param HTMLPurifier_Config $config
*/
public function setup($config)
{
if ($this->setup) {
return;
}
$this->setup = true;
$this->doSetup($config);
}
}
/**
* Defines allowed CSS attributes and what their values are.
* @see HTMLPurifier_HTMLDefinition
*/
class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
{
public $type = 'CSS';
/**
* Assoc array of attribute name to definition object.
* @type HTMLPurifier_AttrDef[]
*/
public $info = array();
/**
* Constructs the info array. The meat of this class.
* @param HTMLPurifier_Config $config
*/
protected function doSetup($config)
{
$this->info['text-align'] = new HTMLPurifier_AttrDef_Enum(
array('left', 'right', 'center', 'justify'),
false
);
$border_style =
$this->info['border-bottom-style'] =
$this->info['border-right-style'] =
$this->info['border-left-style'] =
$this->info['border-top-style'] = new HTMLPurifier_AttrDef_Enum(
array(
'none',
'hidden',
'dotted',
'dashed',
'solid',
'double',
'groove',
'ridge',
'inset',
'outset'
),
false
);
$this->info['border-style'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_style);
$this->info['clear'] = new HTMLPurifier_AttrDef_Enum(
array('none', 'left', 'right', 'both'),
false
);
$this->info['float'] = new HTMLPurifier_AttrDef_Enum(
array('none', 'left', 'right'),
false
);
$this->info['font-style'] = new HTMLPurifier_AttrDef_Enum(
array('normal', 'italic', 'oblique'),
false
);
$this->info['font-variant'] = new HTMLPurifier_AttrDef_Enum(
array('normal', 'small-caps'),
false
);
$uri_or_none = new HTMLPurifier_AttrDef_CSS_Composite(
array(
new HTMLPurifier_AttrDef_Enum(array('none')),
new HTMLPurifier_AttrDef_CSS_URI()
)
);
$this->info['list-style-position'] = new HTMLPurifier_AttrDef_Enum(
array('inside', 'outside'),
false
);
$this->info['list-style-type'] = new HTMLPurifier_AttrDef_Enum(
array(
'disc',
'circle',
'square',
'decimal',
'lower-roman',
'upper-roman',
'lower-alpha',
'upper-alpha',
'none'
),
false
);
$this->info['list-style-image'] = $uri_or_none;
$this->info['list-style'] = new HTMLPurifier_AttrDef_CSS_ListStyle($config);
$this->info['text-transform'] = new HTMLPurifier_AttrDef_Enum(
array('capitalize', 'uppercase', 'lowercase', 'none'),
false
);
$this->info['color'] = new HTMLPurifier_AttrDef_CSS_Color();
$this->info['background-image'] = $uri_or_none;
$this->info['background-repeat'] = new HTMLPurifier_AttrDef_Enum(
array('repeat', 'repeat-x', 'repeat-y', 'no-repeat')
);
$this->info['background-attachment'] = new HTMLPurifier_AttrDef_Enum(
array('scroll', 'fixed')
);
$this->info['background-position'] = new HTMLPurifier_AttrDef_CSS_BackgroundPosition();
$border_color =
$this->info['border-top-color'] =
$this->info['border-bottom-color'] =
$this->info['border-left-color'] =
$this->info['border-right-color'] =
$this->info['background-color'] = new HTMLPurifier_AttrDef_CSS_Composite(
array(
new HTMLPurifier_AttrDef_Enum(array('transparent')),
new HTMLPurifier_AttrDef_CSS_Color()
)
);
$this->info['background'] = new HTMLPurifier_AttrDef_CSS_Background($config);
$this->info['border-color'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_color);
$border_width =
$this->info['border-top-width'] =
$this->info['border-bottom-width'] =
$this->info['border-left-width'] =
$this->info['border-right-width'] = new HTMLPurifier_AttrDef_CSS_Composite(
array(
new HTMLPurifier_AttrDef_Enum(array('thin', 'medium', 'thick')),
new HTMLPurifier_AttrDef_CSS_Length('0') //disallow negative
)
);
$this->info['border-width'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_width);
$this->info['letter-spacing'] = new HTMLPurifier_AttrDef_CSS_Composite(
array(
new HTMLPurifier_AttrDef_Enum(array('normal')),
new HTMLPurifier_AttrDef_CSS_Length()
)
);
$this->info['word-spacing'] = new HTMLPurifier_AttrDef_CSS_Composite(
array(
new HTMLPurifier_AttrDef_Enum(array('normal')),
new HTMLPurifier_AttrDef_CSS_Length()
)
);
$this->info['font-size'] = new HTMLPurifier_AttrDef_CSS_Composite(
array(
new HTMLPurifier_AttrDef_Enum(
array(
'xx-small',
'x-small',
'small',
'medium',
'large',
'x-large',
'xx-large',
'larger',
'smaller'
)
),
new HTMLPurifier_AttrDef_CSS_Percentage(),
new HTMLPurifier_AttrDef_CSS_Length()
)
);
$this->info['line-height'] = new HTMLPurifier_AttrDef_CSS_Composite(
array(
new HTMLPurifier_AttrDef_Enum(array('normal')),
new HTMLPurifier_AttrDef_CSS_Number(true), // no negatives
new HTMLPurifier_AttrDef_CSS_Length('0'),
new HTMLPurifier_AttrDef_CSS_Percentage(true)
)
);
$margin =
$this->info['margin-top'] =
$this->info['margin-bottom'] =
$this->info['margin-left'] =
$this->info['margin-right'] = new HTMLPurifier_AttrDef_CSS_Composite(
array(
new HTMLPurifier_AttrDef_CSS_Length(),
new HTMLPurifier_AttrDef_CSS_Percentage(),
new HTMLPurifier_AttrDef_Enum(array('auto'))
)
);
$this->info['margin'] = new HTMLPurifier_AttrDef_CSS_Multiple($margin);
// non-negative
$padding =
$this->info['padding-top'] =
$this->info['padding-bottom'] =
$this->info['padding-left'] =
$this->info['padding-right'] = new HTMLPurifier_AttrDef_CSS_Composite(
array(
new HTMLPurifier_AttrDef_CSS_Length('0'),
new HTMLPurifier_AttrDef_CSS_Percentage(true)
)
);
$this->info['padding'] = new HTMLPurifier_AttrDef_CSS_Multiple($padding);
$this->info['text-indent'] = new HTMLPurifier_AttrDef_CSS_Composite(
array(
new HTMLPurifier_AttrDef_CSS_Length(),
new HTMLPurifier_AttrDef_CSS_Percentage()
)
);
$trusted_wh = new HTMLPurifier_AttrDef_CSS_Composite(
array(
new HTMLPurifier_AttrDef_CSS_Length('0'),
new HTMLPurifier_AttrDef_CSS_Percentage(true),
new HTMLPurifier_AttrDef_Enum(array('auto'))
)
);
$max = $config->get('CSS.MaxImgLength');
$this->info['min-width'] =
$this->info['max-width'] =
$this->info['min-height'] =
$this->info['max-height'] =
$this->info['width'] =
$this->info['height'] =
$max === null ?
$trusted_wh :
new HTMLPurifier_AttrDef_Switch(
'img',
// For img tags:
new HTMLPurifier_AttrDef_CSS_Composite(
array(
new HTMLPurifier_AttrDef_CSS_Length('0', $max),
new HTMLPurifier_AttrDef_Enum(array('auto'))
)
),
// For everyone else:
$trusted_wh
);
$this->info['text-decoration'] = new HTMLPurifier_AttrDef_CSS_TextDecoration();
$this->info['font-family'] = new HTMLPurifier_AttrDef_CSS_FontFamily();
// this could use specialized code
$this->info['font-weight'] = new HTMLPurifier_AttrDef_Enum(
array(
'normal',
'bold',
'bolder',
'lighter',
'100',
'200',
'300',
'400',
'500',
'600',
'700',
'800',
'900'
),
false
);
// MUST be called after other font properties, as it references
// a CSSDefinition object
$this->info['font'] = new HTMLPurifier_AttrDef_CSS_Font($config);
// same here
$this->info['border'] =
$this->info['border-bottom'] =
$this->info['border-top'] =
$this->info['border-left'] =
$this->info['border-right'] = new HTMLPurifier_AttrDef_CSS_Border($config);
$this->info['border-collapse'] = new HTMLPurifier_AttrDef_Enum(
array('collapse', 'separate')
);
$this->info['caption-side'] = new HTMLPurifier_AttrDef_Enum(
array('top', 'bottom')
);
$this->info['table-layout'] = new HTMLPurifier_AttrDef_Enum(
array('auto', 'fixed')
);
$this->info['vertical-align'] = new HTMLPurifier_AttrDef_CSS_Composite(
array(
new HTMLPurifier_AttrDef_Enum(
array(
'baseline',
'sub',
'super',
'top',
'text-top',
'middle',
'bottom',
'text-bottom'
)
),
new HTMLPurifier_AttrDef_CSS_Length(),
new HTMLPurifier_AttrDef_CSS_Percentage()
)
);
$this->info['border-spacing'] = new HTMLPurifier_AttrDef_CSS_Multiple(new HTMLPurifier_AttrDef_CSS_Length(), 2);
// These CSS properties don't work on many browsers, but we live
// in THE FUTURE!
$this->info['white-space'] = new HTMLPurifier_AttrDef_Enum(
array('nowrap', 'normal', 'pre', 'pre-wrap', 'pre-line')
);
if ($config->get('CSS.Proprietary')) {
$this->doSetupProprietary($config);
}
if ($config->get('CSS.AllowTricky')) {
$this->doSetupTricky($config);
}
if ($config->get('CSS.Trusted')) {
$this->doSetupTrusted($config);
}
$allow_important = $config->get('CSS.AllowImportant');
// wrap all attr-defs with decorator that handles !important
foreach ($this->info as $k => $v) {
$this->info[$k] = new HTMLPurifier_AttrDef_CSS_ImportantDecorator($v, $allow_important);
}
$this->setupConfigStuff($config);
}
/**
* @param HTMLPurifier_Config $config
*/
protected function doSetupProprietary($config)
{
// Internet Explorer only scrollbar colors
$this->info['scrollbar-arrow-color'] = new HTMLPurifier_AttrDef_CSS_Color();
$this->info['scrollbar-base-color'] = new HTMLPurifier_AttrDef_CSS_Color();
$this->info['scrollbar-darkshadow-color'] = new HTMLPurifier_AttrDef_CSS_Color();
$this->info['scrollbar-face-color'] = new HTMLPurifier_AttrDef_CSS_Color();
$this->info['scrollbar-highlight-color'] = new HTMLPurifier_AttrDef_CSS_Color();
$this->info['scrollbar-shadow-color'] = new HTMLPurifier_AttrDef_CSS_Color();
// vendor specific prefixes of opacity
$this->info['-moz-opacity'] = new HTMLPurifier_AttrDef_CSS_AlphaValue();
$this->info['-khtml-opacity'] = new HTMLPurifier_AttrDef_CSS_AlphaValue();
// only opacity, for now
$this->info['filter'] = new HTMLPurifier_AttrDef_CSS_Filter();
// more CSS3
$this->info['page-break-after'] =
$this->info['page-break-before'] = new HTMLPurifier_AttrDef_Enum(
array(
'auto',
'always',
'avoid',
'left',
'right'
)
);
$this->info['page-break-inside'] = new HTMLPurifier_AttrDef_Enum(array('auto', 'avoid'));
$border_radius = new HTMLPurifier_AttrDef_CSS_Composite(
array(
new HTMLPurifier_AttrDef_CSS_Percentage(true), // disallow negative
new HTMLPurifier_AttrDef_CSS_Length('0') // disallow negative
));
$this->info['border-top-left-radius'] =
$this->info['border-top-right-radius'] =
$this->info['border-bottom-right-radius'] =
$this->info['border-bottom-left-radius'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_radius, 2);
// TODO: support SLASH syntax
$this->info['border-radius'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_radius, 4);
}
/**
* @param HTMLPurifier_Config $config
*/
protected function doSetupTricky($config)
{
$this->info['display'] = new HTMLPurifier_AttrDef_Enum(
array(
'inline',
'block',
'list-item',
'run-in',
'compact',
'marker',
'table',
'inline-block',
'inline-table',
'table-row-group',
'table-header-group',
'table-footer-group',
'table-row',
'table-column-group',
'table-column',
'table-cell',
'table-caption',
'none'
)
);
$this->info['visibility'] = new HTMLPurifier_AttrDef_Enum(
array('visible', 'hidden', 'collapse')
);
$this->info['overflow'] = new HTMLPurifier_AttrDef_Enum(array('visible', 'hidden', 'auto', 'scroll'));
$this->info['opacity'] = new HTMLPurifier_AttrDef_CSS_AlphaValue();
}
/**
* @param HTMLPurifier_Config $config
*/
protected function doSetupTrusted($config)
{
$this->info['position'] = new HTMLPurifier_AttrDef_Enum(
array('static', 'relative', 'absolute', 'fixed')
);
$this->info['top'] =
$this->info['left'] =
$this->info['right'] =
$this->info['bottom'] = new HTMLPurifier_AttrDef_CSS_Composite(
array(
new HTMLPurifier_AttrDef_CSS_Length(),
new HTMLPurifier_AttrDef_CSS_Percentage(),
new HTMLPurifier_AttrDef_Enum(array('auto')),
)
);
$this->info['z-index'] = new HTMLPurifier_AttrDef_CSS_Composite(
array(
new HTMLPurifier_AttrDef_Integer(),
new HTMLPurifier_AttrDef_Enum(array('auto')),
)
);
}
/**
* Performs extra config-based processing. Based off of
* HTMLPurifier_HTMLDefinition.
* @param HTMLPurifier_Config $config
* @todo Refactor duplicate elements into common class (probably using
* composition, not inheritance).
*/
protected function setupConfigStuff($config)
{
// setup allowed elements
$support = "(for information on implementing this, see the " .
"support forums) ";
$allowed_properties = $config->get('CSS.AllowedProperties');
if ($allowed_properties !== null) {
foreach ($this->info as $name => $d) {
if (!isset($allowed_properties[$name])) {
unset($this->info[$name]);
}
unset($allowed_properties[$name]);
}
// emit errors
foreach ($allowed_properties as $name => $d) {
// :TODO: Is this htmlspecialchars() call really necessary?
$name = htmlspecialchars($name);
trigger_error("Style attribute '$name' is not supported $support", E_USER_WARNING);
}
}
$forbidden_properties = $config->get('CSS.ForbiddenProperties');
if ($forbidden_properties !== null) {
foreach ($this->info as $name => $d) {
if (isset($forbidden_properties[$name])) {
unset($this->info[$name]);
}
}
}
}
}
/**
* Defines allowed child nodes and validates nodes against it.
*/
abstract class HTMLPurifier_ChildDef
{
/**
* Type of child definition, usually right-most part of class name lowercase.
* Used occasionally in terms of context.
* @type string
*/
public $type;
/**
* Indicates whether or not an empty array of children is okay.
*
* This is necessary for redundant checking when changes affecting
* a child node may cause a parent node to now be disallowed.
* @type bool
*/
public $allow_empty;
/**
* Lookup array of all elements that this definition could possibly allow.
* @type array
*/
public $elements = array();
/**
* Get lookup of tag names that should not close this element automatically.
* All other elements will do so.
* @param HTMLPurifier_Config $config HTMLPurifier_Config object
* @return array
*/
public function getAllowedElements($config)
{
return $this->elements;
}
/**
* Validates nodes according to definition and returns modification.
*
* @param HTMLPurifier_Node[] $children Array of HTMLPurifier_Node
* @param HTMLPurifier_Config $config HTMLPurifier_Config object
* @param HTMLPurifier_Context $context HTMLPurifier_Context object
* @return bool|array true to leave nodes as is, false to remove parent node, array of replacement children
*/
abstract public function validateChildren($children, $config, $context);
}
/**
* Configuration object that triggers customizable behavior.
*
* @warning This class is strongly defined: that means that the class
* will fail if an undefined directive is retrieved or set.
*
* @note Many classes that could (although many times don't) use the
* configuration object make it a mandatory parameter. This is
* because a configuration object should always be forwarded,
* otherwise, you run the risk of missing a parameter and then
* being stumped when a configuration directive doesn't work.
*
* @todo Reconsider some of the public member variables
*/
class HTMLPurifier_Config
{
/**
* HTML Purifier's version
* @type string
*/
public $version = '4.9.2';
/**
* Whether or not to automatically finalize
* the object if a read operation is done.
* @type bool
*/
public $autoFinalize = true;
// protected member variables
/**
* Namespace indexed array of serials for specific namespaces.
* @see getSerial() for more info.
* @type string[]
*/
protected $serials = array();
/**
* Serial for entire configuration object.
* @type string
*/
protected $serial;
/**
* Parser for variables.
* @type HTMLPurifier_VarParser_Flexible
*/
protected $parser = null;
/**
* Reference HTMLPurifier_ConfigSchema for value checking.
* @type HTMLPurifier_ConfigSchema
* @note This is public for introspective purposes. Please don't
* abuse!
*/
public $def;
/**
* Indexed array of definitions.
* @type HTMLPurifier_Definition[]
*/
protected $definitions;
/**
* Whether or not config is finalized.
* @type bool
*/
protected $finalized = false;
/**
* Property list containing configuration directives.
* @type array
*/
protected $plist;
/**
* Whether or not a set is taking place due to an alias lookup.
* @type bool
*/
private $aliasMode;
/**
* Set to false if you do not want line and file numbers in errors.
* (useful when unit testing). This will also compress some errors
* and exceptions.
* @type bool
*/
public $chatty = true;
/**
* Current lock; only gets to this namespace are allowed.
* @type string
*/
private $lock;
/**
* Constructor
* @param HTMLPurifier_ConfigSchema $definition ConfigSchema that defines
* what directives are allowed.
* @param HTMLPurifier_PropertyList $parent
*/
public function __construct($definition, $parent = null)
{
$parent = $parent ? $parent : $definition->defaultPlist;
$this->plist = new HTMLPurifier_PropertyList($parent);
$this->def = $definition; // keep a copy around for checking
$this->parser = new HTMLPurifier_VarParser_Flexible();
}
/**
* Convenience constructor that creates a config object based on a mixed var
* @param mixed $config Variable that defines the state of the config
* object. Can be: a HTMLPurifier_Config() object,
* an array of directives based on loadArray(),
* or a string filename of an ini file.
* @param HTMLPurifier_ConfigSchema $schema Schema object
* @return HTMLPurifier_Config Configured object
*/
public static function create($config, $schema = null)
{
if ($config instanceof HTMLPurifier_Config) {
// pass-through
return $config;
}
if (!$schema) {
$ret = HTMLPurifier_Config::createDefault();
} else {
$ret = new HTMLPurifier_Config($schema);
}
if (is_string($config)) {
$ret->loadIni($config);
} elseif (is_array($config)) $ret->loadArray($config);
return $ret;
}
/**
* Creates a new config object that inherits from a previous one.
* @param HTMLPurifier_Config $config Configuration object to inherit from.
* @return HTMLPurifier_Config object with $config as its parent.
*/
public static function inherit(HTMLPurifier_Config $config)
{
return new HTMLPurifier_Config($config->def, $config->plist);
}
/**
* Convenience constructor that creates a default configuration object.
* @return HTMLPurifier_Config default object.
*/
public static function createDefault()
{
$definition = HTMLPurifier_ConfigSchema::instance();
$config = new HTMLPurifier_Config($definition);
return $config;
}
/**
* Retrieves a value from the configuration.
*
* @param string $key String key
* @param mixed $a
*
* @return mixed
*/
public function get($key, $a = null)
{
if ($a !== null) {
$this->triggerError(
"Using deprecated API: use \$config->get('$key.$a') instead",
E_USER_WARNING
);
$key = "$key.$a";
}
if (!$this->finalized) {
$this->autoFinalize();
}
if (!isset($this->def->info[$key])) {
// can't add % due to SimpleTest bug
$this->triggerError(
'Cannot retrieve value of undefined directive ' . htmlspecialchars($key),
E_USER_WARNING
);
return;
}
if (isset($this->def->info[$key]->isAlias)) {
$d = $this->def->info[$key];
$this->triggerError(
'Cannot get value from aliased directive, use real name ' . $d->key,
E_USER_ERROR
);
return;
}
if ($this->lock) {
list($ns) = explode('.', $key);
if ($ns !== $this->lock) {
$this->triggerError(
'Cannot get value of namespace ' . $ns . ' when lock for ' .
$this->lock .
' is active, this probably indicates a Definition setup method ' .
'is accessing directives that are not within its namespace',
E_USER_ERROR
);
return;
}
}
return $this->plist->get($key);
}
/**
* Retrieves an array of directives to values from a given namespace
*
* @param string $namespace String namespace
*
* @return array
*/
public function getBatch($namespace)
{
if (!$this->finalized) {
$this->autoFinalize();
}
$full = $this->getAll();
if (!isset($full[$namespace])) {
$this->triggerError(
'Cannot retrieve undefined namespace ' .
htmlspecialchars($namespace),
E_USER_WARNING
);
return;
}
return $full[$namespace];
}
/**
* Returns a SHA-1 signature of a segment of the configuration object
* that uniquely identifies that particular configuration
*
* @param string $namespace Namespace to get serial for
*
* @return string
* @note Revision is handled specially and is removed from the batch
* before processing!
*/
public function getBatchSerial($namespace)
{
if (empty($this->serials[$namespace])) {
$batch = $this->getBatch($namespace);
unset($batch['DefinitionRev']);
$this->serials[$namespace] = sha1(serialize($batch));
}
return $this->serials[$namespace];
}
/**
* Returns a SHA-1 signature for the entire configuration object
* that uniquely identifies that particular configuration
*
* @return string
*/
public function getSerial()
{
if (empty($this->serial)) {
$this->serial = sha1(serialize($this->getAll()));
}
return $this->serial;
}
/**
* Retrieves all directives, organized by namespace
*
* @warning This is a pretty inefficient function, avoid if you can
*/
public function getAll()
{
if (!$this->finalized) {
$this->autoFinalize();
}
$ret = array();
foreach ($this->plist->squash() as $name => $value) {
list($ns, $key) = explode('.', $name, 2);
$ret[$ns][$key] = $value;
}
return $ret;
}
/**
* Sets a value to configuration.
*
* @param string $key key
* @param mixed $value value
* @param mixed $a
*/
public function set($key, $value, $a = null)
{
if (strpos($key, '.') === false) {
$namespace = $key;
$directive = $value;
$value = $a;
$key = "$key.$directive";
$this->triggerError("Using deprecated API: use \$config->set('$key', ...) instead", E_USER_NOTICE);
} else {
list($namespace) = explode('.', $key);
}
if ($this->isFinalized('Cannot set directive after finalization')) {
return;
}
if (!isset($this->def->info[$key])) {
$this->triggerError(
'Cannot set undefined directive ' . htmlspecialchars($key) . ' to value',
E_USER_WARNING
);
return;
}
$def = $this->def->info[$key];
if (isset($def->isAlias)) {
if ($this->aliasMode) {
$this->triggerError(
'Double-aliases not allowed, please fix '.
'ConfigSchema bug with' . $key,
E_USER_ERROR
);
return;
}
$this->aliasMode = true;
$this->set($def->key, $value);
$this->aliasMode = false;
$this->triggerError("$key is an alias, preferred directive name is {$def->key}", E_USER_NOTICE);
return;
}
// Raw type might be negative when using the fully optimized form
// of stdclass, which indicates allow_null == true
$rtype = is_int($def) ? $def : $def->type;
if ($rtype < 0) {
$type = -$rtype;
$allow_null = true;
} else {
$type = $rtype;
$allow_null = isset($def->allow_null);
}
try {
$value = $this->parser->parse($value, $type, $allow_null);
} catch (HTMLPurifier_VarParserException $e) {
$this->triggerError(
'Value for ' . $key . ' is of invalid type, should be ' .
HTMLPurifier_VarParser::getTypeName($type),
E_USER_WARNING
);
return;
}
if (is_string($value) && is_object($def)) {
// resolve value alias if defined
if (isset($def->aliases[$value])) {
$value = $def->aliases[$value];
}
// check to see if the value is allowed
if (isset($def->allowed) && !isset($def->allowed[$value])) {
$this->triggerError(
'Value not supported, valid values are: ' .
$this->_listify($def->allowed),
E_USER_WARNING
);
return;
}
}
$this->plist->set($key, $value);
// reset definitions if the directives they depend on changed
// this is a very costly process, so it's discouraged
// with finalization
if ($namespace == 'HTML' || $namespace == 'CSS' || $namespace == 'URI') {
$this->definitions[$namespace] = null;
}
$this->serials[$namespace] = false;
}
/**
* Convenience function for error reporting
*
* @param array $lookup
*
* @return string
*/
private function _listify($lookup)
{
$list = array();
foreach ($lookup as $name => $b) {
$list[] = $name;
}
return implode(', ', $list);
}
/**
* Retrieves object reference to the HTML definition.
*
* @param bool $raw Return a copy that has not been setup yet. Must be
* called before it's been setup, otherwise won't work.
* @param bool $optimized If true, this method may return null, to
* indicate that a cached version of the modified
* definition object is available and no further edits
* are necessary. Consider using
* maybeGetRawHTMLDefinition, which is more explicitly
* named, instead.
*
* @return HTMLPurifier_HTMLDefinition
*/
public function getHTMLDefinition($raw = false, $optimized = false)
{
return $this->getDefinition('HTML', $raw, $optimized);
}
/**
* Retrieves object reference to the CSS definition
*
* @param bool $raw Return a copy that has not been setup yet. Must be
* called before it's been setup, otherwise won't work.
* @param bool $optimized If true, this method may return null, to
* indicate that a cached version of the modified
* definition object is available and no further edits
* are necessary. Consider using
* maybeGetRawCSSDefinition, which is more explicitly
* named, instead.
*
* @return HTMLPurifier_CSSDefinition
*/
public function getCSSDefinition($raw = false, $optimized = false)
{
return $this->getDefinition('CSS', $raw, $optimized);
}
/**
* Retrieves object reference to the URI definition
*
* @param bool $raw Return a copy that has not been setup yet. Must be
* called before it's been setup, otherwise won't work.
* @param bool $optimized If true, this method may return null, to
* indicate that a cached version of the modified
* definition object is available and no further edits
* are necessary. Consider using
* maybeGetRawURIDefinition, which is more explicitly
* named, instead.
*
* @return HTMLPurifier_URIDefinition
*/
public function getURIDefinition($raw = false, $optimized = false)
{
return $this->getDefinition('URI', $raw, $optimized);
}
/**
* Retrieves a definition
*
* @param string $type Type of definition: HTML, CSS, etc
* @param bool $raw Whether or not definition should be returned raw
* @param bool $optimized Only has an effect when $raw is true. Whether
* or not to return null if the result is already present in
* the cache. This is off by default for backwards
* compatibility reasons, but you need to do things this
* way in order to ensure that caching is done properly.
* Check out enduser-customize.html for more details.
* We probably won't ever change this default, as much as the
* maybe semantics is the "right thing to do."
*
* @throws HTMLPurifier_Exception
* @return HTMLPurifier_Definition
*/
public function getDefinition($type, $raw = false, $optimized = false)
{
if ($optimized && !$raw) {
throw new HTMLPurifier_Exception("Cannot set optimized = true when raw = false");
}
if (!$this->finalized) {
$this->autoFinalize();
}
// temporarily suspend locks, so we can handle recursive definition calls
$lock = $this->lock;
$this->lock = null;
$factory = HTMLPurifier_DefinitionCacheFactory::instance();
$cache = $factory->create($type, $this);
$this->lock = $lock;
if (!$raw) {
// full definition
// ---------------
// check if definition is in memory
if (!empty($this->definitions[$type])) {
$def = $this->definitions[$type];
// check if the definition is setup
if ($def->setup) {
return $def;
} else {
$def->setup($this);
if ($def->optimized) {
$cache->add($def, $this);
}
return $def;
}
}
// check if definition is in cache
$def = $cache->get($this);
if ($def) {
// definition in cache, save to memory and return it
$this->definitions[$type] = $def;
return $def;
}
// initialize it
$def = $this->initDefinition($type);
// set it up
$this->lock = $type;
$def->setup($this);
$this->lock = null;
// save in cache
$cache->add($def, $this);
// return it
return $def;
} else {
// raw definition
// --------------
// check preconditions
$def = null;
if ($optimized) {
if (is_null($this->get($type . '.DefinitionID'))) {
// fatally error out if definition ID not set
throw new HTMLPurifier_Exception(
"Cannot retrieve raw version without specifying %$type.DefinitionID"
);
}
}
if (!empty($this->definitions[$type])) {
$def = $this->definitions[$type];
if ($def->setup && !$optimized) {
$extra = $this->chatty ?
" (try moving this code block earlier in your initialization)" :
"";
throw new HTMLPurifier_Exception(
"Cannot retrieve raw definition after it has already been setup" .
$extra
);
}
if ($def->optimized === null) {
$extra = $this->chatty ? " (try flushing your cache)" : "";
throw new HTMLPurifier_Exception(
"Optimization status of definition is unknown" . $extra
);
}
if ($def->optimized !== $optimized) {
$msg = $optimized ? "optimized" : "unoptimized";
$extra = $this->chatty ?
" (this backtrace is for the first inconsistent call, which was for a $msg raw definition)"
: "";
throw new HTMLPurifier_Exception(
"Inconsistent use of optimized and unoptimized raw definition retrievals" . $extra
);
}
}
// check if definition was in memory
if ($def) {
if ($def->setup) {
// invariant: $optimized === true (checked above)
return null;
} else {
return $def;
}
}
// if optimized, check if definition was in cache
// (because we do the memory check first, this formulation
// is prone to cache slamming, but I think
// guaranteeing that either /all/ of the raw
// setup code or /none/ of it is run is more important.)
if ($optimized) {
// This code path only gets run once; once we put
// something in $definitions (which is guaranteed by the
// trailing code), we always short-circuit above.
$def = $cache->get($this);
if ($def) {
// save the full definition for later, but don't
// return it yet
$this->definitions[$type] = $def;
return null;
}
}
// check invariants for creation
if (!$optimized) {
if (!is_null($this->get($type . '.DefinitionID'))) {
if ($this->chatty) {
$this->triggerError(
'Due to a documentation error in previous version of HTML Purifier, your ' .
'definitions are not being cached. If this is OK, you can remove the ' .
'%$type.DefinitionRev and %$type.DefinitionID declaration. Otherwise, ' .
'modify your code to use maybeGetRawDefinition, and test if the returned ' .
'value is null before making any edits (if it is null, that means that a ' .
'cached version is available, and no raw operations are necessary). See ' .
'<a href="http://htmlpurifier.org/docs/enduser-customize.html#optimized">' .
'Customize</a> for more details',
E_USER_WARNING
);
} else {
$this->triggerError(
"Useless DefinitionID declaration",
E_USER_WARNING
);
}
}
}
// initialize it
$def = $this->initDefinition($type);
$def->optimized = $optimized;
return $def;
}
throw new HTMLPurifier_Exception("The impossible happened!");
}
/**
* Initialise definition
*
* @param string $type What type of definition to create
*
* @return HTMLPurifier_CSSDefinition|HTMLPurifier_HTMLDefinition|HTMLPurifier_URIDefinition
* @throws HTMLPurifier_Exception
*/
private function initDefinition($type)
{
// quick checks failed, let's create the object
if ($type == 'HTML') {
$def = new HTMLPurifier_HTMLDefinition();
} elseif ($type == 'CSS') {
$def = new HTMLPurifier_CSSDefinition();
} elseif ($type == 'URI') {
$def = new HTMLPurifier_URIDefinition();
} else {
throw new HTMLPurifier_Exception(
"Definition of $type type not supported"
);
}
$this->definitions[$type] = $def;
return $def;
}
public function maybeGetRawDefinition($name)
{
return $this->getDefinition($name, true, true);
}
/**
* @return HTMLPurifier_HTMLDefinition
*/
public function maybeGetRawHTMLDefinition()
{
return $this->getDefinition('HTML', true, true);
}
/**
* @return HTMLPurifier_CSSDefinition
*/
public function maybeGetRawCSSDefinition()
{
return $this->getDefinition('CSS', true, true);
}
/**
* @return HTMLPurifier_URIDefinition
*/
public function maybeGetRawURIDefinition()
{
return $this->getDefinition('URI', true, true);
}
/**
* Loads configuration values from an array with the following structure:
* Namespace.Directive => Value
*
* @param array $config_array Configuration associative array
*/
public function loadArray($config_array)
{
if ($this->isFinalized('Cannot load directives after finalization')) {
return;
}
foreach ($config_array as $key => $value) {
$key = str_replace('_', '.', $key);
if (strpos($key, '.') !== false) {
$this->set($key, $value);
} else {
$namespace = $key;
$namespace_values = $value;
foreach ($namespace_values as $directive => $value2) {
$this->set($namespace .'.'. $directive, $value2);
}
}
}
}
/**
* Returns a list of array(namespace, directive) for all directives
* that are allowed in a web-form context as per an allowed
* namespaces/directives list.
*
* @param array $allowed List of allowed namespaces/directives
* @param HTMLPurifier_ConfigSchema $schema Schema to use, if not global copy
*
* @return array
*/
public static function getAllowedDirectivesForForm($allowed, $schema = null)
{
if (!$schema) {
$schema = HTMLPurifier_ConfigSchema::instance();
}
if ($allowed !== true) {
if (is_string($allowed)) {
$allowed = array($allowed);
}
$allowed_ns = array();
$allowed_directives = array();
$blacklisted_directives = array();
foreach ($allowed as $ns_or_directive) {
if (strpos($ns_or_directive, '.') !== false) {
// directive
if ($ns_or_directive[0] == '-') {
$blacklisted_directives[substr($ns_or_directive, 1)] = true;
} else {
$allowed_directives[$ns_or_directive] = true;
}
} else {
// namespace
$allowed_ns[$ns_or_directive] = true;
}
}
}
$ret = array();
foreach ($schema->info as $key => $def) {
list($ns, $directive) = explode('.', $key, 2);
if ($allowed !== true) {
if (isset($blacklisted_directives["$ns.$directive"])) {
continue;
}
if (!isset($allowed_directives["$ns.$directive"]) && !isset($allowed_ns[$ns])) {
continue;
}
}
if (isset($def->isAlias)) {
continue;
}
if ($directive == 'DefinitionID' || $directive == 'DefinitionRev') {
continue;
}
$ret[] = array($ns, $directive);
}
return $ret;
}
/**
* Loads configuration values from $_GET/$_POST that were posted
* via ConfigForm
*
* @param array $array $_GET or $_POST array to import
* @param string|bool $index Index/name that the config variables are in
* @param array|bool $allowed List of allowed namespaces/directives
* @param bool $mq_fix Boolean whether or not to enable magic quotes fix
* @param HTMLPurifier_ConfigSchema $schema Schema to use, if not global copy
*
* @return mixed
*/
public static function loadArrayFromForm($array, $index = false, $allowed = true, $mq_fix = true, $schema = null)
{
$ret = HTMLPurifier_Config::prepareArrayFromForm($array, $index, $allowed, $mq_fix, $schema);
$config = HTMLPurifier_Config::create($ret, $schema);
return $config;
}
/**
* Merges in configuration values from $_GET/$_POST to object. NOT STATIC.
*
* @param array $array $_GET or $_POST array to import
* @param string|bool $index Index/name that the config variables are in
* @param array|bool $allowed List of allowed namespaces/directives
* @param bool $mq_fix Boolean whether or not to enable magic quotes fix
*/
public function mergeArrayFromForm($array, $index = false, $allowed = true, $mq_fix = true)
{
$ret = HTMLPurifier_Config::prepareArrayFromForm($array, $index, $allowed, $mq_fix, $this->def);
$this->loadArray($ret);
}
/**
* Prepares an array from a form into something usable for the more
* strict parts of HTMLPurifier_Config
*
* @param array $array $_GET or $_POST array to import
* @param string|bool $index Index/name that the config variables are in
* @param array|bool $allowed List of allowed namespaces/directives
* @param bool $mq_fix Boolean whether or not to enable magic quotes fix
* @param HTMLPurifier_ConfigSchema $schema Schema to use, if not global copy
*
* @return array
*/
public static function prepareArrayFromForm($array, $index = false, $allowed = true, $mq_fix = true, $schema = null)
{
if ($index !== false) {
$array = (isset($array[$index]) && is_array($array[$index])) ? $array[$index] : array();
}
$mq = $mq_fix && function_exists('get_magic_quotes_gpc') && get_magic_quotes_gpc();
$allowed = HTMLPurifier_Config::getAllowedDirectivesForForm($allowed, $schema);
$ret = array();
foreach ($allowed as $key) {
list($ns, $directive) = $key;
$skey = "$ns.$directive";
if (!empty($array["Null_$skey"])) {
$ret[$ns][$directive] = null;
continue;
}
if (!isset($array[$skey])) {
continue;
}
$value = $mq ? stripslashes($array[$skey]) : $array[$skey];
$ret[$ns][$directive] = $value;
}
return $ret;
}
/**
* Loads configuration values from an ini file
*
* @param string $filename Name of ini file
*/
public function loadIni($filename)
{
if ($this->isFinalized('Cannot load directives after finalization')) {
return;
}
$array = parse_ini_file($filename, true);
$this->loadArray($array);
}
/**
* Checks whether or not the configuration object is finalized.
*
* @param string|bool $error String error message, or false for no error
*
* @return bool
*/
public function isFinalized($error = false)
{
if ($this->finalized && $error) {
$this->triggerError($error, E_USER_ERROR);
}
return $this->finalized;
}
/**
* Finalizes configuration only if auto finalize is on and not
* already finalized
*/
public function autoFinalize()
{
if ($this->autoFinalize) {
$this->finalize();
} else {
$this->plist->squash(true);
}
}
/**
* Finalizes a configuration object, prohibiting further change
*/
public function finalize()
{
$this->finalized = true;
$this->parser = null;
}
/**
* Produces a nicely formatted error message by supplying the
* stack frame information OUTSIDE of HTMLPurifier_Config.
*
* @param string $msg An error message
* @param int $no An error number
*/
protected function triggerError($msg, $no)
{
// determine previous stack frame
$extra = '';
if ($this->chatty) {
$trace = debug_backtrace();
// zip(tail(trace), trace) -- but PHP is not Haskell har har
for ($i = 0, $c = count($trace); $i < $c - 1; $i++) {
// XXX this is not correct on some versions of HTML Purifier
if ($trace[$i + 1]['class'] === 'HTMLPurifier_Config') {
continue;
}
$frame = $trace[$i];
$extra = " invoked on line {$frame['line']} in file {$frame['file']}";
break;
}
}
trigger_error($msg . $extra, $no);
}
/**
* Returns a serialized form of the configuration object that can
* be reconstituted.
*
* @return string
*/
public function serialize()
{
$this->getDefinition('HTML');
$this->getDefinition('CSS');
$this->getDefinition('URI');
return serialize($this);
}
}
/**
* Configuration definition, defines directives and their defaults.
*/
class HTMLPurifier_ConfigSchema
{
/**
* Defaults of the directives and namespaces.
* @type array
* @note This shares the exact same structure as HTMLPurifier_Config::$conf
*/
public $defaults = array();
/**
* The default property list. Do not edit this property list.
* @type array
*/
public $defaultPlist;
/**
* Definition of the directives.
* The structure of this is: