Skip to content

Instantly share code, notes, and snippets.

@carbonphyber
Last active March 31, 2021 05:36
Show Gist options
  • Save carbonphyber/5087186 to your computer and use it in GitHub Desktop.
Save carbonphyber/5087186 to your computer and use it in GitHub Desktop.
BBCode parser replacement candidate for GaiaOnline
<?php
/**
* \DJW\BBCode
* A BBCode translation class
* Uses OWASP cheatsheet to prevent XSS
* @see https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet
*
* @author CarbonPhyber
*/
namespace DJW;
/**
* used for troubleshooting
*/
define('DEBUG', TRUE);
function dump($input) {
if (!DEBUG) return;
echo '<div style="white-space:pre;font-family:monospace;">' . HTML::entities(print_r($input, TRUE)) . '</div>' . "\n";
}
/**
* BBCode
*
* Usage:
* &lt;pre&gt;
* &lt;code&gt;
* $user_input = 'And the killer is...[spoiler="my spoiler"]your alternate personality![/spoiler]';
* DJW\BBCode::toHTML($user_input);
* &lt;/code&gt;
* &lt;/pre&gt;
*/
class BBCode {
/**
* constants.
* @todo: Change values to integers for production
*/
const POLARITY_OPEN = 'OPEN';
const POLARITY_CLOSE = 'CLOSED';
const TYPE_TEXT = 'TYPE_TEXT';
const TYPE_TAG = 'TYPE_TAG';
const TYPE_RENDERED = 'TYPE_RENDERED';
const OUTBOUND_URI = NULL;//'/outbound';
const OUTBOUND_QUERYPARAM = NULL;//'url';
const AMAZON_ASSOCIATE_ID = NULL;//'daviwort-20';
// constants. DO NOT MODIFY THESE AFTER self::init() RUNS.
/**
* $NOINNERRENDER_TAGS
* A list of the tags which should not render any other tags inside of themselves
* @access public
* @static
*/
protected static $NOINNERRENDER_TAGS = array('code', 'img', 'imgleft', 'imgright');
/**
* $BLOCK_LEVEL_TAGS
* A list of all block-level tags we support
* @access public
* @static
*/
protected static $BLOCK_LEVEL_TAGS = array('url', 'code', 'quote', 'list', 'center', 'right', 'left', 'justify', 'align', 'spoiler', 'indent');
/**
* $INLINE_LEVEL_TAGS
* A list of all inline-level tags we support
* @access public
* @static
*/
protected static $INLINE_LEVEL_TAGS = array('i', 'b', 'strike', 's', 'u', 'codein', 'img', 'imgleft', 'imgright', '*', 'size', 'color', 'hilite');
/**
* $ALL_TAGS
* A list of all tags we support (populated by the first run of "init()")
* @access public
* @static
*/
protected static $ALL_TAGS = NULL;
/**
* $NESTED_CLOSE
* For tags which _must_ be inside of other tags. Keys are the child/descendent, values are the parent/ancestor
* @access protected
* @static
*/
protected static $NESTED_CLOSE = array('*' => 'list');
/**
* $UNNESTABLE_BLOCK_TAGS
* These tags cannot be nested within one another, despite being block level
* @todo: actually enforce this
* @access public
* @static
*/
protected static $UNNESTABLE_BLOCK_TAGS = array('*', 'url');
/**
* $LONGEST_TAG_NAME_LEN
* Max-length memory so we can prevent looking too far ahead when sniffing the next tag name
* @access public
* @static
*/
protected static $LONGEST_TAG_NAME_LEN = 1;
/**
* $EMOTICONS
* A map of all emoticons
* @access public
* @static
*/
protected static $EMOTICONS = array();
/**
* $EMOTICON_PREG_PATTERN
* A regex pattern that will match any $MOTICON_KEYS
* @access public
* @static
*/
protected static $EMOTICON_PREG_PATTERN = array();
/**
* $LIST_MAP
* A map of all supported [list] parameters
* @access public
* @static
*/
protected static $LIST_MAP = array();
/**
* $LIST_PREG_PATTERN
* A regex pattern that will match any $LIST_MAP keys
* @access public
* @static
*/
protected static $LIST_PREG_PATTERN = array();
/**
* init
* Set up static variables
* @access protected
* @static
*/
protected static function init() {
// only run this function once. it's a setup
if(!empty(self::$ALL_TAGS)) return;
//
self::$ALL_TAGS = array_merge(self::$BLOCK_LEVEL_TAGS, self::$INLINE_LEVEL_TAGS);
// find out the string length of the longest tag name; used to prevent long lookaheads during parsing
for($i = 0; $i < count(self::$ALL_TAGS); $i++) {
$this_len = mb_strlen(self::$ALL_TAGS[$i]);
if($this_len > self::$LONGEST_TAG_NAME_LEN) {
self::$LONGEST_TAG_NAME_LEN = $this_len;
}
}
//
self::$EMOTICONS = array(
// CAT EMOTES
':cat_neutral:' => 'cat_neutral',
':cat_twisted:' => 'cat_twisted',
':cat_mrgreen:' => 'cat_mrgreen',
':cat_burning:' => 'cat_burning_eyes',
':cat_pirate:' => 'cat_pirate',
':cat_cheese:' => 'cat_cheese_whine',
':cat_scream:' => 'cat_scream',
':cat_heart:' => 'cat_heart',
':cat_sweat:' => 'cat_sweatdrop',
':cat_arrow:' => 'cat_arrow',
':cat_ninja:' => 'cat_ninja',
':cat_shock:' => 'cat_eek',
':cat_aie:' => 'cat_surprised',
':cat_smile:' => 'cat_smile',
':cat_stare:' => 'cat_stare',
':cat_oops:' => 'cat_redface',
':cat_rofl:' => 'cat_rofl',
':cat_cool:' => 'cat_cool',
':cat_roll:' => 'cat_rolleyes',
':cat_gonk:' => 'cat_gonk',
':cat_evil:' => 'cat_evil',
':cat_razz:' => 'cat_razz',
':cat_idea:' => 'cat_idea',
':cat_biggrin:' => 'cat_biggrin',
':cat_cute:' => 'cat_4laugh',
':cat_whee:' => 'cat_whee',
':cat_3nod:' => 'cat_3nodding',
':cat_vein:' => 'cat_stressed',
':cat_wink:' => 'cat_wink',
':cat_lol:' => 'cat_lol',
':cat_emo:' => 'cat_emo',
':cat_sad:' => 'cat_sad',
':cat_big:' => 'cat_blaugh',
':cat_crying:' => 'cat_crying',
':cat_cry:' => 'cat_cry',
':cat_XD:' => 'cat_xd',
':cat_mad:' => 'cat_mad',
':cat_???:' => 'cat_confused',
':cat_eek:' => 'cat_surprised',
':cat_xp:' => 'cat_xp',
':cat_!:' => 'cat_exclaim',
':cat_?:' => 'cat_question',
':cat_talk2hand:' => 'cat_talk2hand',
// Yummies emotes
':burger:' => 'yum_burger',
':cupcake:' => 'yum_cupcake',
':donut:' => 'yum_donut',
':hotdog:' => 'yum_hotdog',
':onigiri:' => 'yum_onigiri',
':pie:' => 'yum_pie',
':pizza:' => 'yum_pizza',
':puddi:' => 'yum_puddi',
':strawberry:' => 'yum_strawberry',
':tea:' => 'yum_tea',
':bacon:' => 'yum_bacon',
':coldone:' => 'yum_coldone',
':icecreampie:' => 'yum_icecreampie',
':pumpkinpie:' => 'yum_pumpkinpie',
':pumpkinpie:' => 'yum_pumpkinpie',
':salmon:' => 'yum_salmon',
':sausage:' => 'yum_sausage',
':shrimp:' => 'yum_shrimp',
':strawberrypie:' => 'yum_strawberrypie',
':tamago:' => 'yum_tamago',
':tuna:' => 'yum_tuna',
':wasabipie:' => 'yum_wasabipie',
// Emotion emotes
':awesome:' => 'emotion_awesome',
':dealwithit:' => 'emotion_dealwithit',
':facepalm:' => 'emotion_facepalm',
':bigheart:' => 'emotion_bigheart',
':jawdrop:' => 'emotion_jawdrop',
':kirakira:' => 'emotion_kirakira',
':omnomnom:' => 'emotion_omnomnom',
':puke:' => 'emotion_puke',
':sweatdrop:' => 'emotion_sweatdrop',
':bigvein:' => 'emotion_bigvein',
':yatta:' => 'emotion_yatta',
':0a0:' => 'emotion_0A0',
':8c:' => 'emotion_8c',
':bandaid:' => 'emotion_bandaid',
':brofist:' => 'emotion_brofist',
':c8:' => 'emotion_c8',
':donotwant:' => 'emotion_donotwant',
':dowant:' => 'emotion_dowant',
':drool:' => 'emotion_drool',
':eyebrow:' => 'emotion_eyebrow',
':hug:' => 'emotion_hug',
':zombie:' => 'emotion_zombie',
':zzz:' => 'emotion_zzz',
// Gaia emotes
':angelleft:' => 'gaia_angelleft',
':angelright:' => 'gaia_angelright',
':crown:' => 'gaia_crown',
':gaiagold:' => 'gaia_gaiagold',
':nitemareleft:' => 'gaia_nitemareleft',
':nitemareright:' => 'gaia_nitemareright',
':spoons:' => 'gaia_spoons',
':star:' => 'gaia_star',
':kittenstar:' => 'gaia_kittenstar',
':diamond:' => 'gaia_diamond',
// Secret emotes
':diedrich:' => 'classified_diedrich',
':fu:' => 'classified_fu',
':jet:' => 'classified_jet',
':mcsourface:' => 'classified_mcsourface',
':northkittenstar:' => 'classified_northkittenstar',
':poo:' => 'classified_poo',
':reve:' => 'classified_reve',
':bapecow:' => 'classified_bapecow',
':fullmoon:' => 'classified_fullmoon',
':omnomcoco:' => 'classified_omnomcoco',
':omnomgrunny:' => 'classified_omnomgrunny',
':omnomkiki:' => 'classified_omnomkiki',
':starseed:' => 'classified_starseed',
// Regular emotes
':wahmbulance:' => 'wahmbulance',
':dramallama:' => 'dramallama',
':talk2hand:' => 'talk2hand',
':neutral:' => 'neutral',
':twisted:' => 'twisted',
':mrgreen:' => 'mrgreen',
':burning:' => 'burning_eyes',
':pirate:' => 'pirate',
':cheese:' => 'cheese_whine',
':scream:' => 'scream',
':heart:' => 'heart',
':sweat:' => 'sweatdrop',
':arrow:' => 'arrow',
':ninja:' => 'ninja',
':shock:' => 'eek',
':smile:' => 'smile',
':stare:' => 'stare',
':oops:' => 'redface',
':rofl:' => 'rofl',
':cool:' => 'cool',
':roll:' => 'rolleyes',
':gonk:' => 'gonk',
':evil:' => 'evil',
':razz:' => 'razz',
':idea:' => 'idea',
':grin:' => 'biggrin',
':cute:' => '4laugh',
':whee:' => 'whee',
':3nod:' => '3nodding',
':domo:' => 'domokun',
':vein:' => 'stressed',
':wink:' => 'wink',
':lol:' => 'lol',
':emo:' => 'emo',
':sad:' => 'sad',
':big:' => 'blaugh',
':cry:' => 'cry',
':mad:' => 'mad',
':???:' => 'confused',
':eek:' => 'surprised',
':xp:' => 'xp',
':tab:' => 'tab',
//
':-)' => 'smile',
':?:' => 'question',
':-|' => 'neutral',
':-D' => 'biggrin',
':!:' => 'exclaim',
'T_T' => 'crying',
':-x' => 'mad',
':-P' => 'razz',
':-(' => 'sad',
';-)' => 'wink',
':XD' => 'xd',
':-?' => 'confused',
':-o' => 'surprised',
'8-)' => 'cool',
':D' => 'biggrin',
':x' => 'mad',
';)' => 'wink',
':(' => 'sad',
':P' => 'razz',
':|' => 'neutral',
':?' => 'confused',
':o' => 'surprised',
':)' => 'smile',
);
self::$EMOTICON_PREG_PATTERN = '#/' . implode('|', array_map('preg_quote', array_keys(self::$EMOTICONS))) . '/#';
self::$LIST_MAP = array(
'01' => 'decimal-leading-zero',
'1' => 'decimal', // this MUST be the second because the key is ctype_digit
'a' => 'lower-alpha',
'A' => 'upper-alpha',
'i' => 'lower-roman',
'I' => 'upper-roman',
'alpha' => 'lower-greek',
'α' => 'lower-greek',
'א' => 'hebrew',
'あ' => 'hiragana',
'い' => 'hiragana-iroha',
'ア' => 'katakana',
'イ' => 'katakana-iroha',
'一' => 'cjk-ideographic',
);
self::$LIST_PREG_PATTERN = '#^(|' . implode('|', array_map('preg_quote', array_keys(self::$LIST_MAP))) . ')$#eu';
}
/**
* toHtml
* Parse a BBCode string and convert it into HTML
* @param (string) $input The BBCoded input text we want to translate to HTML
* @param (assoc array) $config
* @access public
* @static
*/
public static function toHtml($input, $config=NULL) {
self::init();
if(empty($config) || !is_array($config)) $config = array();
$input = self::unicodeSanitize($input);
// standardize line endings
$input = preg_replace("/\r/", "\n", preg_replace("/\r\n/", "\n", $input));
// inject URL tags around lines that are nothing but a URL
$input = preg_replace("@\n[ \t\r\n]*(https?://[^ \t\r\n]+)[ \t\r\n]*\n@i", "\n[url]$1[/url]\n", $input);
$output_text = array();
$cursor = 0;
$prev_cursor = 0;
$tag_stack = array();
$input_len = mb_strlen($input);
// find the next substring "[url"...
while($cursor < $input_len && FALSE !== ($open_tag_open_pos = mb_strpos($input, '[', $cursor))) {
// it's only a valid open-tag if it is possibly closed
if(FALSE === ($open_tag_close_pos = mb_strpos($input, ']', $open_tag_open_pos))) {
++$cursor;
continue;
}
$is_open_tag = 0 !== strcmp('/', mb_substr($input, $open_tag_open_pos + 1, 1));
$param = NULL;
$poss_tag_name = mb_substr($input, $open_tag_open_pos + ($is_open_tag ? 1 : 2), $open_tag_close_pos - ($open_tag_open_pos + ($is_open_tag ? 1 : 2)));
// check to see if there is a parameter
if(FALSE !== ($open_tag_param_separator_pos = mb_strpos($input, '=', $open_tag_open_pos)) && $open_tag_param_separator_pos < $open_tag_close_pos) {
$poss_tag_name = mb_substr($input, $open_tag_open_pos + ($is_open_tag ? 1 : 2), $open_tag_param_separator_pos - ($open_tag_open_pos + ($is_open_tag ? 1 : 2)));
$char_after_separator = mb_substr($input, $open_tag_param_separator_pos + 1, 1);
if(in_array($char_after_separator, array('"', "'"))) {
$param_delineator = $char_after_separator;
} else {
$param_delineator = ']';
}
if(FALSE === ($param_ends_pos = mb_strpos($input, $param_delineator, $open_tag_param_separator_pos + 2))) {
++$cursor;
continue;
}
// handle parameters delineated by single-quotes, double-quotes, or no delineation
if(in_array($param_delineator, array('"', "'"))) {
$param = mb_substr($input, $open_tag_param_separator_pos + 2, $param_ends_pos - ($open_tag_param_separator_pos + 2));
$open_tag_close_pos = mb_strpos($input, ']', $open_tag_param_separator_pos + 2);
} else {
$param = mb_substr($input, $open_tag_param_separator_pos + 1, $param_ends_pos - ($open_tag_param_separator_pos + 1));
}
}
// handle any open-tag parameter
if(FALSE !== ($param_separator_pos = mb_strpos($poss_tag_name, '='))) {
// we don't support close-tags with params
if(!$is_open_tag) {
++$cursor;
continue;
}
$param = mb_substr($poss_tag_name, $param_separator_pos + 1);
$tag_name = mb_strtolower(mb_substr($poss_tag_name, 0, $param_separator_pos));
} else {
$tag_name = mb_strtolower($poss_tag_name);
}
// if it's not a recognized tag, skip this tag
// if the tag is a close-tag and a param was found, skip this tag
if(!in_array($tag_name, self::$ALL_TAGS) || (!empty($param) && !$is_open_tag)) {
++$cursor;
continue;
} else if($is_open_tag) { // if the parameter is valid for this tag type
if(empty($param) && in_array($tag_name, array('url', 'img', 'imgleft', 'imgright'))) {
// this case will be handled later... we will assume for now that the $param will come from the innerHTML
} else if(!self::isValidParamForTag($tag_name, $param)) {
++$cursor;
continue;
}
}
// if there is text before the found tag
if($prev_cursor < $open_tag_open_pos) {
// if we can merge this text with the previous node (if it is TYPE_TEXT)
if(FALSE !== ($tagstack_last_index = empty($tag_stack) ? FALSE : count($tag_stack) - 1) && self::TYPE_TEXT === $tag_stack[$tagstack_last_index]['type']) {
$tag_node = array_pop($tag_stack);
// if the top of the stack is a TYPE_TEXT, we can just extend the 'end' of the text substring
$tag_node['end'] = $open_tag_open_pos;
array_push($tag_stack, $tag_node);
} else {
// create a new TYPE_TEXT node and push on the $tag_tack
$next_node = array(
'type' => self::TYPE_TEXT,
'begin' => $prev_cursor,
'end' => $open_tag_open_pos,
);
// $next_node['text'] = mb_substr($input, $next_node['begin'], $next_node['end'] - $next_node['begin']);
array_push($tag_stack, $next_node);
}
}
$has_matching_tag = FALSE;
// deal with the complexities of close-tags
if(!$is_open_tag) {
$i = -1;
$stackKeys = array_keys($tag_stack);
for($i = count($tag_stack) - 1; $i >= 0; $i--) {
$this_node = $tag_stack[$stackKeys[$i]];
if($this_node['type'] === self::TYPE_TAG && 0 === strcmp($this_node['tag'], $tag_name)) {
$has_matching_tag = TRUE;
// $i is now the index of the open tag
break;
}
}
if(!$has_matching_tag) {
$prev_cursor = $cursor;
++$cursor;
continue;
}
// if there is an open tag to the current close-tag
if($i >= 0) {
$inner_text = array();
$done = FALSE;
$close_node = array(
'type' => self::TYPE_TAG,
'tag' => $tag_name,
'polarity' => $is_open_tag ? self::POLARITY_OPEN : self::POLARITY_CLOSE,
);
while(!$done) { // this is a hack so we can deal with a constantly-changing tag stack array
// pop from the tag stack
$last_node = array_pop($tag_stack);
// if the tag is an open-tag, render the (open tag), (2) text between the open and close, (3) close tag.
if(self::TYPE_TAG === $last_node['type']) {
if($close_node['tag'] === $last_node['tag']) {
// we found the open tag for our close tag, collapse all text in between and render
$new_rendered_node = array(
'type' => self::TYPE_RENDERED,
'text' => self::translate($input, $last_node) . implode('', $inner_text) . self::translate($input, $close_node)
);
$inner_text = array();
array_push($tag_stack, $new_rendered_node);
$done = TRUE;
} else {
$shim_close_node = array(
'type' => self::TYPE_TAG,
'tag' => $last_node['tag'],
'polarity' => self::POLARITY_CLOSE,
);
$new_rendered_node = array(
'type' => self::TYPE_RENDERED,
'text' => self::translate($input, $last_node) . implode('', $inner_text) . self::translate($input, $shim_close_node)
);
$inner_text = array();
// we are closing a tag with a different tag_name
array_push($tag_stack, $new_rendered_node);
}
} else { // TYPE_TEXT or TYPE_RENDERED
array_unshift($inner_text, self::translate($input, $last_node));
}
}
$prev_cursor = $cursor = $open_tag_close_pos + 1;
continue;
}
}
// open-tag; push onto the stack
$next_node = array(
'type' => self::TYPE_TAG,
'tag' => $tag_name,
'param' => $param,
'polarity' => $is_open_tag ? self::POLARITY_OPEN : self::POLARITY_CLOSE,
);
// if the tag is a close-tag, search the tagStack for an opening match.
// If none found, treat this close-tag as plaintext.
if($is_open_tag && 0 === strcmp($next_node['type'], self::TYPE_TAG)) {
if(in_array($next_node['tag'], array('code', 'codein', 'url', 'img', 'imgleft', 'imgright'))) {
if(FALSE !== ($close_tag_open_pos = mb_stripos($input, '[/' . $next_node['tag'] . ']', $open_tag_close_pos))) {
$tag_name = $next_node['tag'];
switch($tag_name) {
case 'img':
case 'imgleft':
case 'imgright':
if(empty($next_node['param'])) {
// use the innerHTML as the parameter
$param = mb_substr($input, $open_tag_close_pos + 1, $close_tag_open_pos - ($open_tag_close_pos + 1));
if(!self::isValidParamForTag($tag_name, $param)) {
// innerHTML is not a valid URL for IMG
++$cursor;
continue 2;
}
$next_node['param'] = $param;
array_push($tag_stack, $next_node);
$prev_cursor = $cursor = $close_tag_open_pos;
continue 2; // for some stupid reason, "continue" keyword seems to do the same as a "break" within a switch. This is to jump to the next iteration of the "$cursor" loop
} else {
// IMG tag must have EITHER an open-tag parameter or an inner-HTML parameter, but not both
if($open_tag_close_pos + 1 < $close_tag_open_pos) {
// IMG tag innerHTML must be empty if the parameter is set
++$cursor;
continue 2;
}
}
break;
case 'url':
if(empty($param)) {
$param = mb_substr($input, $open_tag_close_pos + 1, $close_tag_open_pos - ($open_tag_close_pos + 1));
if(!self::isValidParamForTag($tag_name, $param)) {
// innerHTML is not a valid URL for URL tag
++$cursor;
continue 2;
} else {
$next_node['param'] = $param;
array_push($tag_stack, $next_node);
$next_node = array(
'type' => self::TYPE_TEXT,
'begin' => $open_tag_close_pos + 1,
'end' => $close_tag_open_pos,
);
// $next_node['text'] = mb_substr($input, $next_node['begin'], $next_node['end'] - $next_node['begin']);
array_push($tag_stack, $next_node);
$prev_cursor = $cursor = $close_tag_open_pos;
continue 2;
}
}
break;
case 'code':
case 'codein':
// open-tag; push onto the stack
$next_node = array(
'type' => self::TYPE_TAG,
'tag' => $tag_name,
'param' => $param,
'polarity' => $is_open_tag ? self::POLARITY_OPEN : self::POLARITY_CLOSE,
);
array_push($tag_stack, $next_node);
$next_node = array(
'type' => self::TYPE_RENDERED,
// innerHTML of CODE tags should be raw text (no smilies or other tags)
'text' => self::translateText(mb_substr($input, $open_tag_close_pos + 1, $close_tag_open_pos - ($open_tag_close_pos + 1)), FALSE),
);
array_push($tag_stack, $next_node);
$prev_cursor = $cursor = $close_tag_open_pos;
continue 2;
break;
}
}
// if this is a tag that must be nested under another specific tag
} else if(in_array($tag_name, array_keys(static::$NESTED_CLOSE))) {
$ancestor_tag_required = static::$NESTED_CLOSE[$tag_name];
$is_required_ancestor_found = FALSE;
foreach($tag_stack as $this_node) {
if(self::TYPE_TAG === $this_node['type'] && self::POLARITY_OPEN === $this_node['polarity'] && 0 === strcmp($ancestor_tag_required, $this_node['tag'])) {
$is_required_ancestor_found = TRUE;
break;
}
}
if(!$is_required_ancestor_found) {
++$cursor;
continue;
}
}
}
array_push($tag_stack, $next_node);
$prev_cursor = $cursor = $open_tag_close_pos + 1;
}
if($input_len > $prev_cursor) {
if(FALSE !== ($tagstack_last_index = empty($tag_stack) ? FALSE : count($tag_stack) - 1) && self::TYPE_TEXT === $tag_stack[$tagstack_last_index]['type']) {
// re-use the previous TAG_TEXT node
$next_node = array_pop($tag_stack);
$next_node['end'] = $input_len;
} else {
// create a new TAG_TEXT node
$next_node = array(
'type' => self::TYPE_TEXT,
'begin' => $prev_cursor,
'end' => $input_len,
);
}
array_push($tag_stack, $next_node);
}
// create a list of any unclosed tags
$unclosed_tags = array();
foreach($tag_stack as $this_node) {
if(self::TYPE_TAG === $this_node['type'] && self::POLARITY_OPEN === $this_node['polarity']) {
array_unshift($unclosed_tags, $this_node['tag']);
}
}
while($this_node = array_pop($tag_stack)) {
$text = '';
if(self::TYPE_TEXT === $this_node['type']) {
$text = self::translate($input, $this_node);
} else if(self::TYPE_RENDERED === $this_node['type']) {
$text = $this_node['text'];
} else if(self::TYPE_TAG === $this_node['type']) {
$close_node = array(
'type' => self::TYPE_TAG,
'tag' => &$this_node['tag'],
'polarity' => self::POLARITY_CLOSE,
);
// TYPE_TAGs should happen as {open, inner, close} tuples
$output_text = array(
self::translate($input, $this_node) . implode('', $output_text) . self::translate($input, $close_node),
);
}
array_unshift($output_text, $text);
}
// implode an array of strings. This is optimal for more iterations than concatting strings.
return implode('', $output_text);
}
/**
* translate
* Given any $node, return the translated content
* @param (string) $input A reference to the full input passed into toHTML
* @param (array) $node The node which we want to translate
* @return (string) The translation of the $node
* @access public
* @static
*/
public static function translate(&$input, $node) {
if(self::TYPE_TAG === $node['type']) {
return self::translateTag($node);
} else if (self::TYPE_TEXT === $node['type']) {
return self::translateText(mb_substr($input, $node['begin'], $node['end'] - $node['begin']));
} else if(self::TYPE_RENDERED === $node['type']) {
return $node['text'];
}
}
/**
* translateTag
* @param (string) @tagName text of the tag
* @param (string) $polarity Open or close tag?
* @param (string) $param A BBCode tag parameter
* @return (string|NULL)
* @access public
* @static
*/
public static function translateTag($tag) {
$tag_name = $tag['tag'];
$polarity = $tag['polarity'];
$param = isset($tag['param']) ? $tag['param'] : NULL;
$translated_content = NULL;
switch($tag_name) {
case 'url':
if(!empty($param)) {
// outbound redirector URL... only use if these two contsts are defined
if(self::OUTBOUND_URI && self::OUTBOUND_QUERYPARAM) {
$the_href = self::OUTBOUND_URI . '?' . http_build_query(array(self::OUTBOUND_QUERYPARAM => self::cleanLinkUrl(trim($param))));
} else {
$the_href = self::cleanLinkUrl(trim($param));
}
$attributes = ' href="' . self::htmlentities($the_href) . '"';
} else {
$attributes = '';
}
$translated_content = ($polarity === self::POLARITY_OPEN) ? '<a' . (!empty($attributes) ? $attributes : '') . ' class="bbcode-tag-url">' : '</a>';
break;
case 'code':
$translated_content = ($polarity === self::POLARITY_OPEN) ? '<pre' . (!empty($attributes) ? $attributes : '') . ' class="bbcode-tag-code"><code>' : '</code></pre>';
break;
case 'codein':
$translated_content = ($polarity === self::POLARITY_OPEN) ? '<code class="bbcode-tag-codein">' : '</code>';
break;
case 'quote':
$attributes = '<div class="bbcode-tag-quotecite">' . self::htmlentities(empty($param) ? 'Quote:' : self::cleanLinkUrl($param)) . '</div>';
$translated_content = ($polarity === self::POLARITY_OPEN) ? '<div class="bbcode-tag-quote">' . (!empty($attributes) ? $attributes : '') . '<div class="bbcode-tag-quotequoted">' : '</div></div>';
break;
case 'size':
$attributes = empty($param) ? '' : 'style="font-size:' . intval(trim($param)) . 'px;"';
$translated_content = ($polarity === self::POLARITY_OPEN) ? '<span class="bbcode-tag-size" ' . $attributes . '>' : '</span>';
break;
case 'color':
$attributes = empty($param) ? '' : 'style="color:' . mb_strtolower(trim($param)) . ';"';
$translated_content = ($polarity === self::POLARITY_OPEN) ? '<span class="bbcode-tag-color" ' . $attributes . '>' : '</span>';
break;
case 'hilite':
$attributes = 'style="background-color:' . mb_strtolower(trim(empty($param) ? '#ff0' : $param)) . ';"';
$translated_content = ($polarity === self::POLARITY_OPEN) ? '<span class="bbcode-tag-hilite" ' . $attributes . '>' : '</span>';
break;
case 'align':
$attributes = empty($param) ? 'bbcode-tag-left' : 'bbcode-tag-' . mb_strtolower(trim($param));
$translated_content = ($polarity === self::POLARITY_OPEN) ? '<div class="' . $attributes . '">' : '</div>';
break;
case 'left':
$translated_content = ($polarity === self::POLARITY_OPEN) ? '<div class="bbcode-tag-left">' : '</div>';
break;
case 'center':
$translated_content = ($polarity === self::POLARITY_OPEN) ? '<div class="bbcode-tag-center">' : '</div>';
break;
case 'right':
$translated_content = ($polarity === self::POLARITY_OPEN) ? '<div class="bbcode-tag-right">' : '</div>';
break;
case 'justify':
$translated_content = ($polarity === self::POLARITY_OPEN) ? '<div class="bbcode-tag-justify">' : '</div>';
break;
case 'indent':
$direction = (!empty($param) && 0 === strcmp($param, 'right')) ? 'right' : 'left';
$translated_content = ($polarity === self::POLARITY_OPEN) ? '<div class="' . self::htmlentities('bbcode-tag-indent' . $direction) . '">' : '</div>';
break;
case 'spoiler':
if(empty($param)) $param = 'Spoiler';
$translated_content = ($polarity === self::POLARITY_OPEN) ? '<div class="bbcode-tag-spoiler-wrapper bbcode-tag-spoiler-hidden"><div class="spoiler-title"><button type="button" class="bbcode-tag-spoiler-control bbcode-tag-spoiler-control-show cta-button-sm gray-button"><span>Show ' . self::htmlentities($param) . '</span></button><button type="button" class="bbcode-tag-spoiler-control bbcode-tag-spoiler-control-hide cta-button-sm gray-button"><span>Hide ' . self::htmlentities($param) . '</span></button></div><div class="bbcode-tag-spoiler">' : '</div></div>';
break;
case 'list':
$attributes = empty($param) ? '' : ' style="list-style-type:' . self::htmlentities(self::$LIST_MAP[$param]) . ';"';
$translated_content = ($polarity === self::POLARITY_OPEN) ? '<ul' . (!empty($attributes) ? $attributes : '') . ' class="bbcode-tag-list">' : '</ul>';
break;
case 'img':
$attributes = empty($param) ? '' : ' src="' . self::htmlentities(self::cleanLinkUrl(trim($param))) . '"';
$translated_content = ($polarity === self::POLARITY_OPEN) ? '<img' . (!empty($attributes) ? $attributes : '') . ' class="bbcode-tag-img"' : ' />';
break;
case 'imgleft':
$attributes = empty($param) ? '' : ' src="' . self::htmlentities(self::cleanLinkUrl(trim($param))) . '"';
$translated_content = ($polarity === self::POLARITY_OPEN) ? '<img' . (!empty($attributes) ? $attributes : '') . ' class="bbcode-tag-img bbcode-tag-img-left"' : ' />';
break;
case 'imgright':
$attributes = empty($param) ? '' : ' src="' . self::htmlentities(self::cleanLinkUrl(trim($param))) . '"';
$translated_content = ($polarity === self::POLARITY_OPEN) ? '<img' . (!empty($attributes) ? $attributes : '') . ' class="bbcode-tag-img bbcode-tag-img-right"' : ' />';
break;
case '*':
$translated_content = ($polarity === self::POLARITY_OPEN) ? '<li class="bbcode-tag-listitem">' : '</li>';
break;
case 'strike':
case 's':
$translated_content = ($polarity === self::POLARITY_OPEN) ? '<del class="bbcode-tag-strike">' : '</del>';
break;
case 'u':
$translated_content = ($polarity === self::POLARITY_OPEN) ? '<span class="bbcode-tag-u">' : '</span>';
break;
case 'b':
$translated_content = ($polarity === self::POLARITY_OPEN) ? '<strong class="bbcode-tag-b">' : '</strong>';
break;
case 'i':
$translated_content = ($polarity === self::POLARITY_OPEN) ? '<em class="bbcode-tag-i">' : '</em>';
break;
default:
break;
}
return $translated_content;
}
/**
* translateText
* @param (string) $input
* @return (string)
* @access public
* @static
*/
public static function translateText($input, $with_smilies=TRUE) {
self::init();
// if emoticons are set, translate them!
$input = !empty($with_smilies) ? self::smiliesPass($input) : self::htmlentities($input);
// explode the input on line endings
$lines_with_emoticons = explode("\n", $input);
return implode("<br />", $lines_with_emoticons);
}
/**
* smiliesPass
* Parse the string for any smilies/emoticons.
* Translate any found emoticons and escape all other text.
* @param (string) $input
* @return (string)
* @access public
* @static
*/
public static function smiliesPass($input) {
self::init();
if(empty($input) || !is_string($input)) return;
$matches = preg_split(self::$EMOTICON_PREG_PATTERN, $input, -1, PREG_SPLIT_OFFSET_CAPTURE);
$input_len = mb_strlen($input);
$this_match = NULL;
$rendered_text = array();
$cursor = 0;
for($i = 0; $i < count($matches); $i++) {
$this_match = $matches[$i];
if($cursor < $this_match[1]) {
$the_emoticon = mb_substr($input, $cursor, $this_match[1] - $cursor);
$rendered_text []= '<span class="emoticon emoticon-' . self::$EMOTICONS[$the_emoticon] . ' phark">' . self::$EMOTICONS[$the_emoticon] . '</span>';
$cursor = $this_match[1];
}
$rendered_text []= self::htmlentities($this_match[0]);
$cursor += mb_strlen($this_match[0]);
unset($this_match);
}
$return_val = implode('', $rendered_text);
unset($matches, $rendered_text);
return $return_val;
}
/**
* isValidParamForTag
* @param (string) $tag_name
* @param (assoc array) $param
* @access public
* @static
*/
public static function isValidParamForTag(&$tag_name, &$param) {
$return_value = TRUE;
$param = self::unicodeSanitize($param);
$param = preg_replace("/&#(\d{2,7});?/e", "chr('$1')", $param);
$param = preg_replace("/&#[xX]([0-9a-fA-F]{2,2});?/e", "chr(intval('$1', 16))", $param);
switch($tag_name) {
case 'size':
// only natural numbers [1..128] are valid
$return_value = ctype_digit( (string) $param) && intval($param) >= 1 && intval($param) <= 128;
break;
case 'indent':
$return_value = preg_match('/^(|left|right)$/', $param);
break;
case 'color':
$return_value = preg_match('/^(#[0-9A-Fa-f]{3,3}|#[0-9A-Fa-f]{6,6}|(transparent|black|silver|gray|white|maroon|red|purple|fuchsia|green|lime|olive|yellow|navy|blue|teal|aqua|aliceblue|antiquewhite|aqua|aquamarine|azure|beige|bisque|black|blanchedalmond|blue|blueviolet|brown|burlywood|cadetblue|chartreuse|chocolate|coral|cornflowerblue|cornsilk|crimson|cyan|darkblue|darkcyan|darkgoldenrod|darkgray|darkgreen|darkgrey|darkkhaki|darkmagenta|darkolivegreen|darkorange|darkorchid|darkred|darksalmon|darkseagreen|darkslateblue|darkslategray|darkslategrey|darkturquoise|darkviolet|deeppink|deepskyblue|dimgray|dimgrey|dodgerblue|firebrick|floralwhite|forestgreen|fuchsia|gainsboro|ghostwhite|gold|goldenrod|gray|green|greenyellow|grey|honeydew|hotpink|indianred|indigo|ivory|khaki|lavender|lavenderblush|lawngreen|lemonchiffon|lightblue|lightcoral|lightcyan|lightgoldenrodyellow|lightgray|lightgreen|lightgrey|lightpink|lightsalmon|lightseagreen|lightskyblue|lightslategray|lightslategrey|lightsteelblue|lightyellow|lime|limegreen|linen|magenta|maroon|mediumaquamarine|mediumblue|mediumorchid|mediumpurple|mediumseagreen|mediumslateblue|mediumspringgreen|mediumturquoise|mediumvioletred|midnightblue|mintcream|mistyrose|moccasin|navajowhite|navy|oldlace|olive|olivedrab|orange|orangered|orchid|palegoldenrod|palegreen|paleturquoise|palevioletred|papayawhip|peachpuff|peru|pink|plum|powderblue|purple|red|rosybrown|royalblue|saddlebrown|salmon|sandybrown|seagreen|seashell|sienna|silver|skyblue|slateblue|slategray|slategrey|snow|springgreen|steelblue|tan|teal|thistle|tomato|turquoise|violet|wheat|white|whitesmoke|yellow|yellowgreen))$/i', mb_strtolower(trim($param)));
break;
case 'hilite':
$return_value = preg_match('/^(|#[0-9A-Fa-f]{3,3}|#[0-9A-Fa-f]{6,6}|(transparent|black|silver|gray|white|maroon|red|purple|fuchsia|green|lime|olive|yellow|navy|blue|teal|aqua|aliceblue|antiquewhite|aqua|aquamarine|azure|beige|bisque|black|blanchedalmond|blue|blueviolet|brown|burlywood|cadetblue|chartreuse|chocolate|coral|cornflowerblue|cornsilk|crimson|cyan|darkblue|darkcyan|darkgoldenrod|darkgray|darkgreen|darkgrey|darkkhaki|darkmagenta|darkolivegreen|darkorange|darkorchid|darkred|darksalmon|darkseagreen|darkslateblue|darkslategray|darkslategrey|darkturquoise|darkviolet|deeppink|deepskyblue|dimgray|dimgrey|dodgerblue|firebrick|floralwhite|forestgreen|fuchsia|gainsboro|ghostwhite|gold|goldenrod|gray|green|greenyellow|grey|honeydew|hotpink|indianred|indigo|ivory|khaki|lavender|lavenderblush|lawngreen|lemonchiffon|lightblue|lightcoral|lightcyan|lightgoldenrodyellow|lightgray|lightgreen|lightgrey|lightpink|lightsalmon|lightseagreen|lightskyblue|lightslategray|lightslategrey|lightsteelblue|lightyellow|lime|limegreen|linen|magenta|maroon|mediumaquamarine|mediumblue|mediumorchid|mediumpurple|mediumseagreen|mediumslateblue|mediumspringgreen|mediumturquoise|mediumvioletred|midnightblue|mintcream|mistyrose|moccasin|navajowhite|navy|oldlace|olive|olivedrab|orange|orangered|orchid|palegoldenrod|palegreen|paleturquoise|palevioletred|papayawhip|peachpuff|peru|pink|plum|powderblue|purple|red|rosybrown|royalblue|saddlebrown|salmon|sandybrown|seagreen|seashell|sienna|silver|skyblue|slateblue|slategray|slategrey|snow|springgreen|steelblue|tan|teal|thistle|tomato|turquoise|violet|wheat|white|whitesmoke|yellow|yellowgreen))$/i', mb_strtolower(trim($param)));
break;
case 'align':
$return_value = preg_match('/^\s*(left|center|right|justify)\s*$/i', $param);
break;
case 'list':
$return_value = preg_match(self::$LIST_PREG_PATTERN, $param);
break;
case 'spoiler':
$return_value = preg_match('/^(|[- _0-9a-zA-Z]+)$/i', $param) && mb_strlen($param) <= 64;
break;
case 'url':
case 'img':
case 'imgleft':
case 'imgright':
$param = trim($param);
// this is a little hacky. We don't actually want empty parameters in the final output, but we don't want to short circuit as soon as this function returns FALSE
if(empty($param)) return TRUE;
// these are obvious exploits -- any string that begins with "javascript:" or "data:" (or with whitespace/backslash shims or HTMLentities/UTF8 substitutes)
// html_entity_decode to convert substrings of the form "&STUFF;" to the raw characters they represent
// preg_replace to convert substrings of the form "&#STUFF;" to the raw characters they represent (including 2-char HEX, and 2-7 character Unicode)
// preg_replace to convert unicode substrings of the form "\u0123" with the raw character it represents
if(preg_match('/^(javascript|vbscript|livescript|data):/i', preg_replace('/[^a-zA-Z0-9:]/', '', html_entity_decode($param, ENT_QUOTES)))) return FALSE;
// accept URLs of the form:
// fully qualified ( "http://foobar.org..." and "https://foobar.org..." )
// protocol-relative ( "//foobar.org..." )
// root-relative / URI only ( "/path/to/file.html" )
// query-relative ( "/path/to/file.html" )
// anchor-relative ( "#to_the_top" )
$return_value = mb_strlen($param) <= 256 && preg_match('@^https?://[^ \t\r\n]+$@i', $param) || preg_match('@^//[^ \t\r\n]+$@', $param) || preg_match('@^/[^ \t\r\n]*$@', $param) || preg_match('@^\?[^ \t\r\n]+$@', $param) || preg_match('@^#[^ \t\r\n]+$@', $param);
break;
}
return $return_value;
}
/**
* cleanLinkUrl
* @param (string) $urlHref
* @return (string) A URL safe to link to. Non-HTTP(S) protocols and unsafe URLs are stripped or otherwise omitted
* @access public
* @static
*/
public static function cleanLinkUrl($url_href) {
/**
* If the input is an Amazon product URL, append your own tag to the URL
*/
if(self::AMAZON_ASSOCIATE_ID && preg_match('@^https?://((?:www\.)(?:amazon\.com|amazon\.co\.[a-z]{2,2}))/@i', $url_href, $matches)) {
$anchor_pos = mb_strpos($url_href, '#');
if(FALSE !== $anchor_pos) {
$anchor = mb_substr($url_href, $anchor_pos);
$url_href = mb_substr($url_href, 0, $anchor_pos);
} else {
$anchor = '';
}
unset($anchor_pos);
if($query_string_pos = mb_strpos($url_href, '?')) {
$params = mb_substr($url_href, $query_string_pos + 1);
parse_str($params, $params_array);
unset($params);
$url_href = mb_substr($url_href, 0, $query_string_pos - 1);
} else {
$params_array = array();
}
unset($query_string_pos);
$params_array['tag'] = self::AMAZON_ASSOCIATE_ID;
return $url_href . '?' . http_build_query($params_array) . $anchor;
}
return $url_href;
}
/**
* unicodeSanitize
* Take unicode escape sequences and convert them into single characters.
* Take unicode control sequences and neutralize them.
* @todo: Not entirely sure if this is necessary. - carbonphyber 2013-03-06
* @see http://www.w3.org/TR/unicode-xml/#Suitable
* @param (string) $input
* @access public
* @static
*/
public static function unicodeSanitize($input) {
/**
* @see http://stackoverflow.com/questions/2728070/how-do-i-replace-characters-not-in-range-0x5e10-0x7f35-with-in-php/2728372#2728372
*/
$input = preg_replace('/[\x{202a}-\x{202e}\x{feff}]/u', '', $input); // strip BOM and BiDi characters (LR0, LRE, RL0, RLE, etc.)
$input = preg_replace('/[\x{2028}-\x{2029}]/u', "\n", $input); // unicode line and paragraph separators
/**
*
*/
$input = preg_replace("/\\\\[uU](\d{4,7})/e", "chr('$1')", $input);
$input = preg_replace("/\\\\[xX]([0-9a-fA-F]{2,2})/e", "chr(intval('$1', 16))", $input);
return $input;
}
/**
* htmlentities
* @param (string) $input_text The text to escape
* @return (string) HTMLEntitized input
* @access public
* @static
*/
public static function htmlentities($input_text) {
return htmlentities($input_text, ENT_QUOTES, 'UTF-8');
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment