Last active
March 31, 2021 05:36
-
-
Save carbonphyber/5087186 to your computer and use it in GitHub Desktop.
BBCode parser replacement candidate for GaiaOnline
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* \DJW\BBCode | |
* A BBCode translation class | |
* Uses OWASP cheatsheet to prevent XSS | |
* @see https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet | |
* | |
* @author CarbonPhyber | |
*/ | |
namespace DJW; | |
/** | |
* used for troubleshooting | |
*/ | |
define('DEBUG', TRUE); | |
function dump($input) { | |
if (!DEBUG) return; | |
echo '<div style="white-space:pre;font-family:monospace;">' . HTML::entities(print_r($input, TRUE)) . '</div>' . "\n"; | |
} | |
/** | |
* BBCode | |
* | |
* Usage: | |
* <pre> | |
* <code> | |
* $user_input = 'And the killer is...[spoiler="my spoiler"]your alternate personality![/spoiler]'; | |
* DJW\BBCode::toHTML($user_input); | |
* </code> | |
* </pre> | |
*/ | |
class BBCode { | |
/** | |
* constants. | |
* @todo: Change values to integers for production | |
*/ | |
const POLARITY_OPEN = 'OPEN'; | |
const POLARITY_CLOSE = 'CLOSED'; | |
const TYPE_TEXT = 'TYPE_TEXT'; | |
const TYPE_TAG = 'TYPE_TAG'; | |
const TYPE_RENDERED = 'TYPE_RENDERED'; | |
const OUTBOUND_URI = NULL;//'/outbound'; | |
const OUTBOUND_QUERYPARAM = NULL;//'url'; | |
const AMAZON_ASSOCIATE_ID = NULL;//'daviwort-20'; | |
// constants. DO NOT MODIFY THESE AFTER self::init() RUNS. | |
/** | |
* $NOINNERRENDER_TAGS | |
* A list of the tags which should not render any other tags inside of themselves | |
* @access public | |
* @static | |
*/ | |
protected static $NOINNERRENDER_TAGS = array('code', 'img', 'imgleft', 'imgright'); | |
/** | |
* $BLOCK_LEVEL_TAGS | |
* A list of all block-level tags we support | |
* @access public | |
* @static | |
*/ | |
protected static $BLOCK_LEVEL_TAGS = array('url', 'code', 'quote', 'list', 'center', 'right', 'left', 'justify', 'align', 'spoiler', 'indent'); | |
/** | |
* $INLINE_LEVEL_TAGS | |
* A list of all inline-level tags we support | |
* @access public | |
* @static | |
*/ | |
protected static $INLINE_LEVEL_TAGS = array('i', 'b', 'strike', 's', 'u', 'codein', 'img', 'imgleft', 'imgright', '*', 'size', 'color', 'hilite'); | |
/** | |
* $ALL_TAGS | |
* A list of all tags we support (populated by the first run of "init()") | |
* @access public | |
* @static | |
*/ | |
protected static $ALL_TAGS = NULL; | |
/** | |
* $NESTED_CLOSE | |
* For tags which _must_ be inside of other tags. Keys are the child/descendent, values are the parent/ancestor | |
* @access protected | |
* @static | |
*/ | |
protected static $NESTED_CLOSE = array('*' => 'list'); | |
/** | |
* $UNNESTABLE_BLOCK_TAGS | |
* These tags cannot be nested within one another, despite being block level | |
* @todo: actually enforce this | |
* @access public | |
* @static | |
*/ | |
protected static $UNNESTABLE_BLOCK_TAGS = array('*', 'url'); | |
/** | |
* $LONGEST_TAG_NAME_LEN | |
* Max-length memory so we can prevent looking too far ahead when sniffing the next tag name | |
* @access public | |
* @static | |
*/ | |
protected static $LONGEST_TAG_NAME_LEN = 1; | |
/** | |
* $EMOTICONS | |
* A map of all emoticons | |
* @access public | |
* @static | |
*/ | |
protected static $EMOTICONS = array(); | |
/** | |
* $EMOTICON_PREG_PATTERN | |
* A regex pattern that will match any $MOTICON_KEYS | |
* @access public | |
* @static | |
*/ | |
protected static $EMOTICON_PREG_PATTERN = array(); | |
/** | |
* $LIST_MAP | |
* A map of all supported [list] parameters | |
* @access public | |
* @static | |
*/ | |
protected static $LIST_MAP = array(); | |
/** | |
* $LIST_PREG_PATTERN | |
* A regex pattern that will match any $LIST_MAP keys | |
* @access public | |
* @static | |
*/ | |
protected static $LIST_PREG_PATTERN = array(); | |
/** | |
* init | |
* Set up static variables | |
* @access protected | |
* @static | |
*/ | |
protected static function init() { | |
// only run this function once. it's a setup | |
if(!empty(self::$ALL_TAGS)) return; | |
// | |
self::$ALL_TAGS = array_merge(self::$BLOCK_LEVEL_TAGS, self::$INLINE_LEVEL_TAGS); | |
// find out the string length of the longest tag name; used to prevent long lookaheads during parsing | |
for($i = 0; $i < count(self::$ALL_TAGS); $i++) { | |
$this_len = mb_strlen(self::$ALL_TAGS[$i]); | |
if($this_len > self::$LONGEST_TAG_NAME_LEN) { | |
self::$LONGEST_TAG_NAME_LEN = $this_len; | |
} | |
} | |
// | |
self::$EMOTICONS = array( | |
// CAT EMOTES | |
':cat_neutral:' => 'cat_neutral', | |
':cat_twisted:' => 'cat_twisted', | |
':cat_mrgreen:' => 'cat_mrgreen', | |
':cat_burning:' => 'cat_burning_eyes', | |
':cat_pirate:' => 'cat_pirate', | |
':cat_cheese:' => 'cat_cheese_whine', | |
':cat_scream:' => 'cat_scream', | |
':cat_heart:' => 'cat_heart', | |
':cat_sweat:' => 'cat_sweatdrop', | |
':cat_arrow:' => 'cat_arrow', | |
':cat_ninja:' => 'cat_ninja', | |
':cat_shock:' => 'cat_eek', | |
':cat_aie:' => 'cat_surprised', | |
':cat_smile:' => 'cat_smile', | |
':cat_stare:' => 'cat_stare', | |
':cat_oops:' => 'cat_redface', | |
':cat_rofl:' => 'cat_rofl', | |
':cat_cool:' => 'cat_cool', | |
':cat_roll:' => 'cat_rolleyes', | |
':cat_gonk:' => 'cat_gonk', | |
':cat_evil:' => 'cat_evil', | |
':cat_razz:' => 'cat_razz', | |
':cat_idea:' => 'cat_idea', | |
':cat_biggrin:' => 'cat_biggrin', | |
':cat_cute:' => 'cat_4laugh', | |
':cat_whee:' => 'cat_whee', | |
':cat_3nod:' => 'cat_3nodding', | |
':cat_vein:' => 'cat_stressed', | |
':cat_wink:' => 'cat_wink', | |
':cat_lol:' => 'cat_lol', | |
':cat_emo:' => 'cat_emo', | |
':cat_sad:' => 'cat_sad', | |
':cat_big:' => 'cat_blaugh', | |
':cat_crying:' => 'cat_crying', | |
':cat_cry:' => 'cat_cry', | |
':cat_XD:' => 'cat_xd', | |
':cat_mad:' => 'cat_mad', | |
':cat_???:' => 'cat_confused', | |
':cat_eek:' => 'cat_surprised', | |
':cat_xp:' => 'cat_xp', | |
':cat_!:' => 'cat_exclaim', | |
':cat_?:' => 'cat_question', | |
':cat_talk2hand:' => 'cat_talk2hand', | |
// Yummies emotes | |
':burger:' => 'yum_burger', | |
':cupcake:' => 'yum_cupcake', | |
':donut:' => 'yum_donut', | |
':hotdog:' => 'yum_hotdog', | |
':onigiri:' => 'yum_onigiri', | |
':pie:' => 'yum_pie', | |
':pizza:' => 'yum_pizza', | |
':puddi:' => 'yum_puddi', | |
':strawberry:' => 'yum_strawberry', | |
':tea:' => 'yum_tea', | |
':bacon:' => 'yum_bacon', | |
':coldone:' => 'yum_coldone', | |
':icecreampie:' => 'yum_icecreampie', | |
':pumpkinpie:' => 'yum_pumpkinpie', | |
':pumpkinpie:' => 'yum_pumpkinpie', | |
':salmon:' => 'yum_salmon', | |
':sausage:' => 'yum_sausage', | |
':shrimp:' => 'yum_shrimp', | |
':strawberrypie:' => 'yum_strawberrypie', | |
':tamago:' => 'yum_tamago', | |
':tuna:' => 'yum_tuna', | |
':wasabipie:' => 'yum_wasabipie', | |
// Emotion emotes | |
':awesome:' => 'emotion_awesome', | |
':dealwithit:' => 'emotion_dealwithit', | |
':facepalm:' => 'emotion_facepalm', | |
':bigheart:' => 'emotion_bigheart', | |
':jawdrop:' => 'emotion_jawdrop', | |
':kirakira:' => 'emotion_kirakira', | |
':omnomnom:' => 'emotion_omnomnom', | |
':puke:' => 'emotion_puke', | |
':sweatdrop:' => 'emotion_sweatdrop', | |
':bigvein:' => 'emotion_bigvein', | |
':yatta:' => 'emotion_yatta', | |
':0a0:' => 'emotion_0A0', | |
':8c:' => 'emotion_8c', | |
':bandaid:' => 'emotion_bandaid', | |
':brofist:' => 'emotion_brofist', | |
':c8:' => 'emotion_c8', | |
':donotwant:' => 'emotion_donotwant', | |
':dowant:' => 'emotion_dowant', | |
':drool:' => 'emotion_drool', | |
':eyebrow:' => 'emotion_eyebrow', | |
':hug:' => 'emotion_hug', | |
':zombie:' => 'emotion_zombie', | |
':zzz:' => 'emotion_zzz', | |
// Gaia emotes | |
':angelleft:' => 'gaia_angelleft', | |
':angelright:' => 'gaia_angelright', | |
':crown:' => 'gaia_crown', | |
':gaiagold:' => 'gaia_gaiagold', | |
':nitemareleft:' => 'gaia_nitemareleft', | |
':nitemareright:' => 'gaia_nitemareright', | |
':spoons:' => 'gaia_spoons', | |
':star:' => 'gaia_star', | |
':kittenstar:' => 'gaia_kittenstar', | |
':diamond:' => 'gaia_diamond', | |
// Secret emotes | |
':diedrich:' => 'classified_diedrich', | |
':fu:' => 'classified_fu', | |
':jet:' => 'classified_jet', | |
':mcsourface:' => 'classified_mcsourface', | |
':northkittenstar:' => 'classified_northkittenstar', | |
':poo:' => 'classified_poo', | |
':reve:' => 'classified_reve', | |
':bapecow:' => 'classified_bapecow', | |
':fullmoon:' => 'classified_fullmoon', | |
':omnomcoco:' => 'classified_omnomcoco', | |
':omnomgrunny:' => 'classified_omnomgrunny', | |
':omnomkiki:' => 'classified_omnomkiki', | |
':starseed:' => 'classified_starseed', | |
// Regular emotes | |
':wahmbulance:' => 'wahmbulance', | |
':dramallama:' => 'dramallama', | |
':talk2hand:' => 'talk2hand', | |
':neutral:' => 'neutral', | |
':twisted:' => 'twisted', | |
':mrgreen:' => 'mrgreen', | |
':burning:' => 'burning_eyes', | |
':pirate:' => 'pirate', | |
':cheese:' => 'cheese_whine', | |
':scream:' => 'scream', | |
':heart:' => 'heart', | |
':sweat:' => 'sweatdrop', | |
':arrow:' => 'arrow', | |
':ninja:' => 'ninja', | |
':shock:' => 'eek', | |
':smile:' => 'smile', | |
':stare:' => 'stare', | |
':oops:' => 'redface', | |
':rofl:' => 'rofl', | |
':cool:' => 'cool', | |
':roll:' => 'rolleyes', | |
':gonk:' => 'gonk', | |
':evil:' => 'evil', | |
':razz:' => 'razz', | |
':idea:' => 'idea', | |
':grin:' => 'biggrin', | |
':cute:' => '4laugh', | |
':whee:' => 'whee', | |
':3nod:' => '3nodding', | |
':domo:' => 'domokun', | |
':vein:' => 'stressed', | |
':wink:' => 'wink', | |
':lol:' => 'lol', | |
':emo:' => 'emo', | |
':sad:' => 'sad', | |
':big:' => 'blaugh', | |
':cry:' => 'cry', | |
':mad:' => 'mad', | |
':???:' => 'confused', | |
':eek:' => 'surprised', | |
':xp:' => 'xp', | |
':tab:' => 'tab', | |
// | |
':-)' => 'smile', | |
':?:' => 'question', | |
':-|' => 'neutral', | |
':-D' => 'biggrin', | |
':!:' => 'exclaim', | |
'T_T' => 'crying', | |
':-x' => 'mad', | |
':-P' => 'razz', | |
':-(' => 'sad', | |
';-)' => 'wink', | |
':XD' => 'xd', | |
':-?' => 'confused', | |
':-o' => 'surprised', | |
'8-)' => 'cool', | |
':D' => 'biggrin', | |
':x' => 'mad', | |
';)' => 'wink', | |
':(' => 'sad', | |
':P' => 'razz', | |
':|' => 'neutral', | |
':?' => 'confused', | |
':o' => 'surprised', | |
':)' => 'smile', | |
); | |
self::$EMOTICON_PREG_PATTERN = '#/' . implode('|', array_map('preg_quote', array_keys(self::$EMOTICONS))) . '/#'; | |
self::$LIST_MAP = array( | |
'01' => 'decimal-leading-zero', | |
'1' => 'decimal', // this MUST be the second because the key is ctype_digit | |
'a' => 'lower-alpha', | |
'A' => 'upper-alpha', | |
'i' => 'lower-roman', | |
'I' => 'upper-roman', | |
'alpha' => 'lower-greek', | |
'α' => 'lower-greek', | |
'א' => 'hebrew', | |
'あ' => 'hiragana', | |
'い' => 'hiragana-iroha', | |
'ア' => 'katakana', | |
'イ' => 'katakana-iroha', | |
'一' => 'cjk-ideographic', | |
); | |
self::$LIST_PREG_PATTERN = '#^(|' . implode('|', array_map('preg_quote', array_keys(self::$LIST_MAP))) . ')$#eu'; | |
} | |
/** | |
* toHtml | |
* Parse a BBCode string and convert it into HTML | |
* @param (string) $input The BBCoded input text we want to translate to HTML | |
* @param (assoc array) $config | |
* @access public | |
* @static | |
*/ | |
public static function toHtml($input, $config=NULL) { | |
self::init(); | |
if(empty($config) || !is_array($config)) $config = array(); | |
$input = self::unicodeSanitize($input); | |
// standardize line endings | |
$input = preg_replace("/\r/", "\n", preg_replace("/\r\n/", "\n", $input)); | |
// inject URL tags around lines that are nothing but a URL | |
$input = preg_replace("@\n[ \t\r\n]*(https?://[^ \t\r\n]+)[ \t\r\n]*\n@i", "\n[url]$1[/url]\n", $input); | |
$output_text = array(); | |
$cursor = 0; | |
$prev_cursor = 0; | |
$tag_stack = array(); | |
$input_len = mb_strlen($input); | |
// find the next substring "[url"... | |
while($cursor < $input_len && FALSE !== ($open_tag_open_pos = mb_strpos($input, '[', $cursor))) { | |
// it's only a valid open-tag if it is possibly closed | |
if(FALSE === ($open_tag_close_pos = mb_strpos($input, ']', $open_tag_open_pos))) { | |
++$cursor; | |
continue; | |
} | |
$is_open_tag = 0 !== strcmp('/', mb_substr($input, $open_tag_open_pos + 1, 1)); | |
$param = NULL; | |
$poss_tag_name = mb_substr($input, $open_tag_open_pos + ($is_open_tag ? 1 : 2), $open_tag_close_pos - ($open_tag_open_pos + ($is_open_tag ? 1 : 2))); | |
// check to see if there is a parameter | |
if(FALSE !== ($open_tag_param_separator_pos = mb_strpos($input, '=', $open_tag_open_pos)) && $open_tag_param_separator_pos < $open_tag_close_pos) { | |
$poss_tag_name = mb_substr($input, $open_tag_open_pos + ($is_open_tag ? 1 : 2), $open_tag_param_separator_pos - ($open_tag_open_pos + ($is_open_tag ? 1 : 2))); | |
$char_after_separator = mb_substr($input, $open_tag_param_separator_pos + 1, 1); | |
if(in_array($char_after_separator, array('"', "'"))) { | |
$param_delineator = $char_after_separator; | |
} else { | |
$param_delineator = ']'; | |
} | |
if(FALSE === ($param_ends_pos = mb_strpos($input, $param_delineator, $open_tag_param_separator_pos + 2))) { | |
++$cursor; | |
continue; | |
} | |
// handle parameters delineated by single-quotes, double-quotes, or no delineation | |
if(in_array($param_delineator, array('"', "'"))) { | |
$param = mb_substr($input, $open_tag_param_separator_pos + 2, $param_ends_pos - ($open_tag_param_separator_pos + 2)); | |
$open_tag_close_pos = mb_strpos($input, ']', $open_tag_param_separator_pos + 2); | |
} else { | |
$param = mb_substr($input, $open_tag_param_separator_pos + 1, $param_ends_pos - ($open_tag_param_separator_pos + 1)); | |
} | |
} | |
// handle any open-tag parameter | |
if(FALSE !== ($param_separator_pos = mb_strpos($poss_tag_name, '='))) { | |
// we don't support close-tags with params | |
if(!$is_open_tag) { | |
++$cursor; | |
continue; | |
} | |
$param = mb_substr($poss_tag_name, $param_separator_pos + 1); | |
$tag_name = mb_strtolower(mb_substr($poss_tag_name, 0, $param_separator_pos)); | |
} else { | |
$tag_name = mb_strtolower($poss_tag_name); | |
} | |
// if it's not a recognized tag, skip this tag | |
// if the tag is a close-tag and a param was found, skip this tag | |
if(!in_array($tag_name, self::$ALL_TAGS) || (!empty($param) && !$is_open_tag)) { | |
++$cursor; | |
continue; | |
} else if($is_open_tag) { // if the parameter is valid for this tag type | |
if(empty($param) && in_array($tag_name, array('url', 'img', 'imgleft', 'imgright'))) { | |
// this case will be handled later... we will assume for now that the $param will come from the innerHTML | |
} else if(!self::isValidParamForTag($tag_name, $param)) { | |
++$cursor; | |
continue; | |
} | |
} | |
// if there is text before the found tag | |
if($prev_cursor < $open_tag_open_pos) { | |
// if we can merge this text with the previous node (if it is TYPE_TEXT) | |
if(FALSE !== ($tagstack_last_index = empty($tag_stack) ? FALSE : count($tag_stack) - 1) && self::TYPE_TEXT === $tag_stack[$tagstack_last_index]['type']) { | |
$tag_node = array_pop($tag_stack); | |
// if the top of the stack is a TYPE_TEXT, we can just extend the 'end' of the text substring | |
$tag_node['end'] = $open_tag_open_pos; | |
array_push($tag_stack, $tag_node); | |
} else { | |
// create a new TYPE_TEXT node and push on the $tag_tack | |
$next_node = array( | |
'type' => self::TYPE_TEXT, | |
'begin' => $prev_cursor, | |
'end' => $open_tag_open_pos, | |
); | |
// $next_node['text'] = mb_substr($input, $next_node['begin'], $next_node['end'] - $next_node['begin']); | |
array_push($tag_stack, $next_node); | |
} | |
} | |
$has_matching_tag = FALSE; | |
// deal with the complexities of close-tags | |
if(!$is_open_tag) { | |
$i = -1; | |
$stackKeys = array_keys($tag_stack); | |
for($i = count($tag_stack) - 1; $i >= 0; $i--) { | |
$this_node = $tag_stack[$stackKeys[$i]]; | |
if($this_node['type'] === self::TYPE_TAG && 0 === strcmp($this_node['tag'], $tag_name)) { | |
$has_matching_tag = TRUE; | |
// $i is now the index of the open tag | |
break; | |
} | |
} | |
if(!$has_matching_tag) { | |
$prev_cursor = $cursor; | |
++$cursor; | |
continue; | |
} | |
// if there is an open tag to the current close-tag | |
if($i >= 0) { | |
$inner_text = array(); | |
$done = FALSE; | |
$close_node = array( | |
'type' => self::TYPE_TAG, | |
'tag' => $tag_name, | |
'polarity' => $is_open_tag ? self::POLARITY_OPEN : self::POLARITY_CLOSE, | |
); | |
while(!$done) { // this is a hack so we can deal with a constantly-changing tag stack array | |
// pop from the tag stack | |
$last_node = array_pop($tag_stack); | |
// if the tag is an open-tag, render the (open tag), (2) text between the open and close, (3) close tag. | |
if(self::TYPE_TAG === $last_node['type']) { | |
if($close_node['tag'] === $last_node['tag']) { | |
// we found the open tag for our close tag, collapse all text in between and render | |
$new_rendered_node = array( | |
'type' => self::TYPE_RENDERED, | |
'text' => self::translate($input, $last_node) . implode('', $inner_text) . self::translate($input, $close_node) | |
); | |
$inner_text = array(); | |
array_push($tag_stack, $new_rendered_node); | |
$done = TRUE; | |
} else { | |
$shim_close_node = array( | |
'type' => self::TYPE_TAG, | |
'tag' => $last_node['tag'], | |
'polarity' => self::POLARITY_CLOSE, | |
); | |
$new_rendered_node = array( | |
'type' => self::TYPE_RENDERED, | |
'text' => self::translate($input, $last_node) . implode('', $inner_text) . self::translate($input, $shim_close_node) | |
); | |
$inner_text = array(); | |
// we are closing a tag with a different tag_name | |
array_push($tag_stack, $new_rendered_node); | |
} | |
} else { // TYPE_TEXT or TYPE_RENDERED | |
array_unshift($inner_text, self::translate($input, $last_node)); | |
} | |
} | |
$prev_cursor = $cursor = $open_tag_close_pos + 1; | |
continue; | |
} | |
} | |
// open-tag; push onto the stack | |
$next_node = array( | |
'type' => self::TYPE_TAG, | |
'tag' => $tag_name, | |
'param' => $param, | |
'polarity' => $is_open_tag ? self::POLARITY_OPEN : self::POLARITY_CLOSE, | |
); | |
// if the tag is a close-tag, search the tagStack for an opening match. | |
// If none found, treat this close-tag as plaintext. | |
if($is_open_tag && 0 === strcmp($next_node['type'], self::TYPE_TAG)) { | |
if(in_array($next_node['tag'], array('code', 'codein', 'url', 'img', 'imgleft', 'imgright'))) { | |
if(FALSE !== ($close_tag_open_pos = mb_stripos($input, '[/' . $next_node['tag'] . ']', $open_tag_close_pos))) { | |
$tag_name = $next_node['tag']; | |
switch($tag_name) { | |
case 'img': | |
case 'imgleft': | |
case 'imgright': | |
if(empty($next_node['param'])) { | |
// use the innerHTML as the parameter | |
$param = mb_substr($input, $open_tag_close_pos + 1, $close_tag_open_pos - ($open_tag_close_pos + 1)); | |
if(!self::isValidParamForTag($tag_name, $param)) { | |
// innerHTML is not a valid URL for IMG | |
++$cursor; | |
continue 2; | |
} | |
$next_node['param'] = $param; | |
array_push($tag_stack, $next_node); | |
$prev_cursor = $cursor = $close_tag_open_pos; | |
continue 2; // for some stupid reason, "continue" keyword seems to do the same as a "break" within a switch. This is to jump to the next iteration of the "$cursor" loop | |
} else { | |
// IMG tag must have EITHER an open-tag parameter or an inner-HTML parameter, but not both | |
if($open_tag_close_pos + 1 < $close_tag_open_pos) { | |
// IMG tag innerHTML must be empty if the parameter is set | |
++$cursor; | |
continue 2; | |
} | |
} | |
break; | |
case 'url': | |
if(empty($param)) { | |
$param = mb_substr($input, $open_tag_close_pos + 1, $close_tag_open_pos - ($open_tag_close_pos + 1)); | |
if(!self::isValidParamForTag($tag_name, $param)) { | |
// innerHTML is not a valid URL for URL tag | |
++$cursor; | |
continue 2; | |
} else { | |
$next_node['param'] = $param; | |
array_push($tag_stack, $next_node); | |
$next_node = array( | |
'type' => self::TYPE_TEXT, | |
'begin' => $open_tag_close_pos + 1, | |
'end' => $close_tag_open_pos, | |
); | |
// $next_node['text'] = mb_substr($input, $next_node['begin'], $next_node['end'] - $next_node['begin']); | |
array_push($tag_stack, $next_node); | |
$prev_cursor = $cursor = $close_tag_open_pos; | |
continue 2; | |
} | |
} | |
break; | |
case 'code': | |
case 'codein': | |
// open-tag; push onto the stack | |
$next_node = array( | |
'type' => self::TYPE_TAG, | |
'tag' => $tag_name, | |
'param' => $param, | |
'polarity' => $is_open_tag ? self::POLARITY_OPEN : self::POLARITY_CLOSE, | |
); | |
array_push($tag_stack, $next_node); | |
$next_node = array( | |
'type' => self::TYPE_RENDERED, | |
// innerHTML of CODE tags should be raw text (no smilies or other tags) | |
'text' => self::translateText(mb_substr($input, $open_tag_close_pos + 1, $close_tag_open_pos - ($open_tag_close_pos + 1)), FALSE), | |
); | |
array_push($tag_stack, $next_node); | |
$prev_cursor = $cursor = $close_tag_open_pos; | |
continue 2; | |
break; | |
} | |
} | |
// if this is a tag that must be nested under another specific tag | |
} else if(in_array($tag_name, array_keys(static::$NESTED_CLOSE))) { | |
$ancestor_tag_required = static::$NESTED_CLOSE[$tag_name]; | |
$is_required_ancestor_found = FALSE; | |
foreach($tag_stack as $this_node) { | |
if(self::TYPE_TAG === $this_node['type'] && self::POLARITY_OPEN === $this_node['polarity'] && 0 === strcmp($ancestor_tag_required, $this_node['tag'])) { | |
$is_required_ancestor_found = TRUE; | |
break; | |
} | |
} | |
if(!$is_required_ancestor_found) { | |
++$cursor; | |
continue; | |
} | |
} | |
} | |
array_push($tag_stack, $next_node); | |
$prev_cursor = $cursor = $open_tag_close_pos + 1; | |
} | |
if($input_len > $prev_cursor) { | |
if(FALSE !== ($tagstack_last_index = empty($tag_stack) ? FALSE : count($tag_stack) - 1) && self::TYPE_TEXT === $tag_stack[$tagstack_last_index]['type']) { | |
// re-use the previous TAG_TEXT node | |
$next_node = array_pop($tag_stack); | |
$next_node['end'] = $input_len; | |
} else { | |
// create a new TAG_TEXT node | |
$next_node = array( | |
'type' => self::TYPE_TEXT, | |
'begin' => $prev_cursor, | |
'end' => $input_len, | |
); | |
} | |
array_push($tag_stack, $next_node); | |
} | |
// create a list of any unclosed tags | |
$unclosed_tags = array(); | |
foreach($tag_stack as $this_node) { | |
if(self::TYPE_TAG === $this_node['type'] && self::POLARITY_OPEN === $this_node['polarity']) { | |
array_unshift($unclosed_tags, $this_node['tag']); | |
} | |
} | |
while($this_node = array_pop($tag_stack)) { | |
$text = ''; | |
if(self::TYPE_TEXT === $this_node['type']) { | |
$text = self::translate($input, $this_node); | |
} else if(self::TYPE_RENDERED === $this_node['type']) { | |
$text = $this_node['text']; | |
} else if(self::TYPE_TAG === $this_node['type']) { | |
$close_node = array( | |
'type' => self::TYPE_TAG, | |
'tag' => &$this_node['tag'], | |
'polarity' => self::POLARITY_CLOSE, | |
); | |
// TYPE_TAGs should happen as {open, inner, close} tuples | |
$output_text = array( | |
self::translate($input, $this_node) . implode('', $output_text) . self::translate($input, $close_node), | |
); | |
} | |
array_unshift($output_text, $text); | |
} | |
// implode an array of strings. This is optimal for more iterations than concatting strings. | |
return implode('', $output_text); | |
} | |
/** | |
* translate | |
* Given any $node, return the translated content | |
* @param (string) $input A reference to the full input passed into toHTML | |
* @param (array) $node The node which we want to translate | |
* @return (string) The translation of the $node | |
* @access public | |
* @static | |
*/ | |
public static function translate(&$input, $node) { | |
if(self::TYPE_TAG === $node['type']) { | |
return self::translateTag($node); | |
} else if (self::TYPE_TEXT === $node['type']) { | |
return self::translateText(mb_substr($input, $node['begin'], $node['end'] - $node['begin'])); | |
} else if(self::TYPE_RENDERED === $node['type']) { | |
return $node['text']; | |
} | |
} | |
/** | |
* translateTag | |
* @param (string) @tagName text of the tag | |
* @param (string) $polarity Open or close tag? | |
* @param (string) $param A BBCode tag parameter | |
* @return (string|NULL) | |
* @access public | |
* @static | |
*/ | |
public static function translateTag($tag) { | |
$tag_name = $tag['tag']; | |
$polarity = $tag['polarity']; | |
$param = isset($tag['param']) ? $tag['param'] : NULL; | |
$translated_content = NULL; | |
switch($tag_name) { | |
case 'url': | |
if(!empty($param)) { | |
// outbound redirector URL... only use if these two contsts are defined | |
if(self::OUTBOUND_URI && self::OUTBOUND_QUERYPARAM) { | |
$the_href = self::OUTBOUND_URI . '?' . http_build_query(array(self::OUTBOUND_QUERYPARAM => self::cleanLinkUrl(trim($param)))); | |
} else { | |
$the_href = self::cleanLinkUrl(trim($param)); | |
} | |
$attributes = ' href="' . self::htmlentities($the_href) . '"'; | |
} else { | |
$attributes = ''; | |
} | |
$translated_content = ($polarity === self::POLARITY_OPEN) ? '<a' . (!empty($attributes) ? $attributes : '') . ' class="bbcode-tag-url">' : '</a>'; | |
break; | |
case 'code': | |
$translated_content = ($polarity === self::POLARITY_OPEN) ? '<pre' . (!empty($attributes) ? $attributes : '') . ' class="bbcode-tag-code"><code>' : '</code></pre>'; | |
break; | |
case 'codein': | |
$translated_content = ($polarity === self::POLARITY_OPEN) ? '<code class="bbcode-tag-codein">' : '</code>'; | |
break; | |
case 'quote': | |
$attributes = '<div class="bbcode-tag-quotecite">' . self::htmlentities(empty($param) ? 'Quote:' : self::cleanLinkUrl($param)) . '</div>'; | |
$translated_content = ($polarity === self::POLARITY_OPEN) ? '<div class="bbcode-tag-quote">' . (!empty($attributes) ? $attributes : '') . '<div class="bbcode-tag-quotequoted">' : '</div></div>'; | |
break; | |
case 'size': | |
$attributes = empty($param) ? '' : 'style="font-size:' . intval(trim($param)) . 'px;"'; | |
$translated_content = ($polarity === self::POLARITY_OPEN) ? '<span class="bbcode-tag-size" ' . $attributes . '>' : '</span>'; | |
break; | |
case 'color': | |
$attributes = empty($param) ? '' : 'style="color:' . mb_strtolower(trim($param)) . ';"'; | |
$translated_content = ($polarity === self::POLARITY_OPEN) ? '<span class="bbcode-tag-color" ' . $attributes . '>' : '</span>'; | |
break; | |
case 'hilite': | |
$attributes = 'style="background-color:' . mb_strtolower(trim(empty($param) ? '#ff0' : $param)) . ';"'; | |
$translated_content = ($polarity === self::POLARITY_OPEN) ? '<span class="bbcode-tag-hilite" ' . $attributes . '>' : '</span>'; | |
break; | |
case 'align': | |
$attributes = empty($param) ? 'bbcode-tag-left' : 'bbcode-tag-' . mb_strtolower(trim($param)); | |
$translated_content = ($polarity === self::POLARITY_OPEN) ? '<div class="' . $attributes . '">' : '</div>'; | |
break; | |
case 'left': | |
$translated_content = ($polarity === self::POLARITY_OPEN) ? '<div class="bbcode-tag-left">' : '</div>'; | |
break; | |
case 'center': | |
$translated_content = ($polarity === self::POLARITY_OPEN) ? '<div class="bbcode-tag-center">' : '</div>'; | |
break; | |
case 'right': | |
$translated_content = ($polarity === self::POLARITY_OPEN) ? '<div class="bbcode-tag-right">' : '</div>'; | |
break; | |
case 'justify': | |
$translated_content = ($polarity === self::POLARITY_OPEN) ? '<div class="bbcode-tag-justify">' : '</div>'; | |
break; | |
case 'indent': | |
$direction = (!empty($param) && 0 === strcmp($param, 'right')) ? 'right' : 'left'; | |
$translated_content = ($polarity === self::POLARITY_OPEN) ? '<div class="' . self::htmlentities('bbcode-tag-indent' . $direction) . '">' : '</div>'; | |
break; | |
case 'spoiler': | |
if(empty($param)) $param = 'Spoiler'; | |
$translated_content = ($polarity === self::POLARITY_OPEN) ? '<div class="bbcode-tag-spoiler-wrapper bbcode-tag-spoiler-hidden"><div class="spoiler-title"><button type="button" class="bbcode-tag-spoiler-control bbcode-tag-spoiler-control-show cta-button-sm gray-button"><span>Show ' . self::htmlentities($param) . '</span></button><button type="button" class="bbcode-tag-spoiler-control bbcode-tag-spoiler-control-hide cta-button-sm gray-button"><span>Hide ' . self::htmlentities($param) . '</span></button></div><div class="bbcode-tag-spoiler">' : '</div></div>'; | |
break; | |
case 'list': | |
$attributes = empty($param) ? '' : ' style="list-style-type:' . self::htmlentities(self::$LIST_MAP[$param]) . ';"'; | |
$translated_content = ($polarity === self::POLARITY_OPEN) ? '<ul' . (!empty($attributes) ? $attributes : '') . ' class="bbcode-tag-list">' : '</ul>'; | |
break; | |
case 'img': | |
$attributes = empty($param) ? '' : ' src="' . self::htmlentities(self::cleanLinkUrl(trim($param))) . '"'; | |
$translated_content = ($polarity === self::POLARITY_OPEN) ? '<img' . (!empty($attributes) ? $attributes : '') . ' class="bbcode-tag-img"' : ' />'; | |
break; | |
case 'imgleft': | |
$attributes = empty($param) ? '' : ' src="' . self::htmlentities(self::cleanLinkUrl(trim($param))) . '"'; | |
$translated_content = ($polarity === self::POLARITY_OPEN) ? '<img' . (!empty($attributes) ? $attributes : '') . ' class="bbcode-tag-img bbcode-tag-img-left"' : ' />'; | |
break; | |
case 'imgright': | |
$attributes = empty($param) ? '' : ' src="' . self::htmlentities(self::cleanLinkUrl(trim($param))) . '"'; | |
$translated_content = ($polarity === self::POLARITY_OPEN) ? '<img' . (!empty($attributes) ? $attributes : '') . ' class="bbcode-tag-img bbcode-tag-img-right"' : ' />'; | |
break; | |
case '*': | |
$translated_content = ($polarity === self::POLARITY_OPEN) ? '<li class="bbcode-tag-listitem">' : '</li>'; | |
break; | |
case 'strike': | |
case 's': | |
$translated_content = ($polarity === self::POLARITY_OPEN) ? '<del class="bbcode-tag-strike">' : '</del>'; | |
break; | |
case 'u': | |
$translated_content = ($polarity === self::POLARITY_OPEN) ? '<span class="bbcode-tag-u">' : '</span>'; | |
break; | |
case 'b': | |
$translated_content = ($polarity === self::POLARITY_OPEN) ? '<strong class="bbcode-tag-b">' : '</strong>'; | |
break; | |
case 'i': | |
$translated_content = ($polarity === self::POLARITY_OPEN) ? '<em class="bbcode-tag-i">' : '</em>'; | |
break; | |
default: | |
break; | |
} | |
return $translated_content; | |
} | |
/** | |
* translateText | |
* @param (string) $input | |
* @return (string) | |
* @access public | |
* @static | |
*/ | |
public static function translateText($input, $with_smilies=TRUE) { | |
self::init(); | |
// if emoticons are set, translate them! | |
$input = !empty($with_smilies) ? self::smiliesPass($input) : self::htmlentities($input); | |
// explode the input on line endings | |
$lines_with_emoticons = explode("\n", $input); | |
return implode("<br />", $lines_with_emoticons); | |
} | |
/** | |
* smiliesPass | |
* Parse the string for any smilies/emoticons. | |
* Translate any found emoticons and escape all other text. | |
* @param (string) $input | |
* @return (string) | |
* @access public | |
* @static | |
*/ | |
public static function smiliesPass($input) { | |
self::init(); | |
if(empty($input) || !is_string($input)) return; | |
$matches = preg_split(self::$EMOTICON_PREG_PATTERN, $input, -1, PREG_SPLIT_OFFSET_CAPTURE); | |
$input_len = mb_strlen($input); | |
$this_match = NULL; | |
$rendered_text = array(); | |
$cursor = 0; | |
for($i = 0; $i < count($matches); $i++) { | |
$this_match = $matches[$i]; | |
if($cursor < $this_match[1]) { | |
$the_emoticon = mb_substr($input, $cursor, $this_match[1] - $cursor); | |
$rendered_text []= '<span class="emoticon emoticon-' . self::$EMOTICONS[$the_emoticon] . ' phark">' . self::$EMOTICONS[$the_emoticon] . '</span>'; | |
$cursor = $this_match[1]; | |
} | |
$rendered_text []= self::htmlentities($this_match[0]); | |
$cursor += mb_strlen($this_match[0]); | |
unset($this_match); | |
} | |
$return_val = implode('', $rendered_text); | |
unset($matches, $rendered_text); | |
return $return_val; | |
} | |
/** | |
* isValidParamForTag | |
* @param (string) $tag_name | |
* @param (assoc array) $param | |
* @access public | |
* @static | |
*/ | |
public static function isValidParamForTag(&$tag_name, &$param) { | |
$return_value = TRUE; | |
$param = self::unicodeSanitize($param); | |
$param = preg_replace("/&#(\d{2,7});?/e", "chr('$1')", $param); | |
$param = preg_replace("/&#[xX]([0-9a-fA-F]{2,2});?/e", "chr(intval('$1', 16))", $param); | |
switch($tag_name) { | |
case 'size': | |
// only natural numbers [1..128] are valid | |
$return_value = ctype_digit( (string) $param) && intval($param) >= 1 && intval($param) <= 128; | |
break; | |
case 'indent': | |
$return_value = preg_match('/^(|left|right)$/', $param); | |
break; | |
case 'color': | |
$return_value = preg_match('/^(#[0-9A-Fa-f]{3,3}|#[0-9A-Fa-f]{6,6}|(transparent|black|silver|gray|white|maroon|red|purple|fuchsia|green|lime|olive|yellow|navy|blue|teal|aqua|aliceblue|antiquewhite|aqua|aquamarine|azure|beige|bisque|black|blanchedalmond|blue|blueviolet|brown|burlywood|cadetblue|chartreuse|chocolate|coral|cornflowerblue|cornsilk|crimson|cyan|darkblue|darkcyan|darkgoldenrod|darkgray|darkgreen|darkgrey|darkkhaki|darkmagenta|darkolivegreen|darkorange|darkorchid|darkred|darksalmon|darkseagreen|darkslateblue|darkslategray|darkslategrey|darkturquoise|darkviolet|deeppink|deepskyblue|dimgray|dimgrey|dodgerblue|firebrick|floralwhite|forestgreen|fuchsia|gainsboro|ghostwhite|gold|goldenrod|gray|green|greenyellow|grey|honeydew|hotpink|indianred|indigo|ivory|khaki|lavender|lavenderblush|lawngreen|lemonchiffon|lightblue|lightcoral|lightcyan|lightgoldenrodyellow|lightgray|lightgreen|lightgrey|lightpink|lightsalmon|lightseagreen|lightskyblue|lightslategray|lightslategrey|lightsteelblue|lightyellow|lime|limegreen|linen|magenta|maroon|mediumaquamarine|mediumblue|mediumorchid|mediumpurple|mediumseagreen|mediumslateblue|mediumspringgreen|mediumturquoise|mediumvioletred|midnightblue|mintcream|mistyrose|moccasin|navajowhite|navy|oldlace|olive|olivedrab|orange|orangered|orchid|palegoldenrod|palegreen|paleturquoise|palevioletred|papayawhip|peachpuff|peru|pink|plum|powderblue|purple|red|rosybrown|royalblue|saddlebrown|salmon|sandybrown|seagreen|seashell|sienna|silver|skyblue|slateblue|slategray|slategrey|snow|springgreen|steelblue|tan|teal|thistle|tomato|turquoise|violet|wheat|white|whitesmoke|yellow|yellowgreen))$/i', mb_strtolower(trim($param))); | |
break; | |
case 'hilite': | |
$return_value = preg_match('/^(|#[0-9A-Fa-f]{3,3}|#[0-9A-Fa-f]{6,6}|(transparent|black|silver|gray|white|maroon|red|purple|fuchsia|green|lime|olive|yellow|navy|blue|teal|aqua|aliceblue|antiquewhite|aqua|aquamarine|azure|beige|bisque|black|blanchedalmond|blue|blueviolet|brown|burlywood|cadetblue|chartreuse|chocolate|coral|cornflowerblue|cornsilk|crimson|cyan|darkblue|darkcyan|darkgoldenrod|darkgray|darkgreen|darkgrey|darkkhaki|darkmagenta|darkolivegreen|darkorange|darkorchid|darkred|darksalmon|darkseagreen|darkslateblue|darkslategray|darkslategrey|darkturquoise|darkviolet|deeppink|deepskyblue|dimgray|dimgrey|dodgerblue|firebrick|floralwhite|forestgreen|fuchsia|gainsboro|ghostwhite|gold|goldenrod|gray|green|greenyellow|grey|honeydew|hotpink|indianred|indigo|ivory|khaki|lavender|lavenderblush|lawngreen|lemonchiffon|lightblue|lightcoral|lightcyan|lightgoldenrodyellow|lightgray|lightgreen|lightgrey|lightpink|lightsalmon|lightseagreen|lightskyblue|lightslategray|lightslategrey|lightsteelblue|lightyellow|lime|limegreen|linen|magenta|maroon|mediumaquamarine|mediumblue|mediumorchid|mediumpurple|mediumseagreen|mediumslateblue|mediumspringgreen|mediumturquoise|mediumvioletred|midnightblue|mintcream|mistyrose|moccasin|navajowhite|navy|oldlace|olive|olivedrab|orange|orangered|orchid|palegoldenrod|palegreen|paleturquoise|palevioletred|papayawhip|peachpuff|peru|pink|plum|powderblue|purple|red|rosybrown|royalblue|saddlebrown|salmon|sandybrown|seagreen|seashell|sienna|silver|skyblue|slateblue|slategray|slategrey|snow|springgreen|steelblue|tan|teal|thistle|tomato|turquoise|violet|wheat|white|whitesmoke|yellow|yellowgreen))$/i', mb_strtolower(trim($param))); | |
break; | |
case 'align': | |
$return_value = preg_match('/^\s*(left|center|right|justify)\s*$/i', $param); | |
break; | |
case 'list': | |
$return_value = preg_match(self::$LIST_PREG_PATTERN, $param); | |
break; | |
case 'spoiler': | |
$return_value = preg_match('/^(|[- _0-9a-zA-Z]+)$/i', $param) && mb_strlen($param) <= 64; | |
break; | |
case 'url': | |
case 'img': | |
case 'imgleft': | |
case 'imgright': | |
$param = trim($param); | |
// this is a little hacky. We don't actually want empty parameters in the final output, but we don't want to short circuit as soon as this function returns FALSE | |
if(empty($param)) return TRUE; | |
// these are obvious exploits -- any string that begins with "javascript:" or "data:" (or with whitespace/backslash shims or HTMLentities/UTF8 substitutes) | |
// html_entity_decode to convert substrings of the form "&STUFF;" to the raw characters they represent | |
// preg_replace to convert substrings of the form "&#STUFF;" to the raw characters they represent (including 2-char HEX, and 2-7 character Unicode) | |
// preg_replace to convert unicode substrings of the form "\u0123" with the raw character it represents | |
if(preg_match('/^(javascript|vbscript|livescript|data):/i', preg_replace('/[^a-zA-Z0-9:]/', '', html_entity_decode($param, ENT_QUOTES)))) return FALSE; | |
// accept URLs of the form: | |
// fully qualified ( "http://foobar.org..." and "https://foobar.org..." ) | |
// protocol-relative ( "//foobar.org..." ) | |
// root-relative / URI only ( "/path/to/file.html" ) | |
// query-relative ( "/path/to/file.html" ) | |
// anchor-relative ( "#to_the_top" ) | |
$return_value = mb_strlen($param) <= 256 && preg_match('@^https?://[^ \t\r\n]+$@i', $param) || preg_match('@^//[^ \t\r\n]+$@', $param) || preg_match('@^/[^ \t\r\n]*$@', $param) || preg_match('@^\?[^ \t\r\n]+$@', $param) || preg_match('@^#[^ \t\r\n]+$@', $param); | |
break; | |
} | |
return $return_value; | |
} | |
/** | |
* cleanLinkUrl | |
* @param (string) $urlHref | |
* @return (string) A URL safe to link to. Non-HTTP(S) protocols and unsafe URLs are stripped or otherwise omitted | |
* @access public | |
* @static | |
*/ | |
public static function cleanLinkUrl($url_href) { | |
/** | |
* If the input is an Amazon product URL, append your own tag to the URL | |
*/ | |
if(self::AMAZON_ASSOCIATE_ID && preg_match('@^https?://((?:www\.)(?:amazon\.com|amazon\.co\.[a-z]{2,2}))/@i', $url_href, $matches)) { | |
$anchor_pos = mb_strpos($url_href, '#'); | |
if(FALSE !== $anchor_pos) { | |
$anchor = mb_substr($url_href, $anchor_pos); | |
$url_href = mb_substr($url_href, 0, $anchor_pos); | |
} else { | |
$anchor = ''; | |
} | |
unset($anchor_pos); | |
if($query_string_pos = mb_strpos($url_href, '?')) { | |
$params = mb_substr($url_href, $query_string_pos + 1); | |
parse_str($params, $params_array); | |
unset($params); | |
$url_href = mb_substr($url_href, 0, $query_string_pos - 1); | |
} else { | |
$params_array = array(); | |
} | |
unset($query_string_pos); | |
$params_array['tag'] = self::AMAZON_ASSOCIATE_ID; | |
return $url_href . '?' . http_build_query($params_array) . $anchor; | |
} | |
return $url_href; | |
} | |
/** | |
* unicodeSanitize | |
* Take unicode escape sequences and convert them into single characters. | |
* Take unicode control sequences and neutralize them. | |
* @todo: Not entirely sure if this is necessary. - carbonphyber 2013-03-06 | |
* @see http://www.w3.org/TR/unicode-xml/#Suitable | |
* @param (string) $input | |
* @access public | |
* @static | |
*/ | |
public static function unicodeSanitize($input) { | |
/** | |
* @see http://stackoverflow.com/questions/2728070/how-do-i-replace-characters-not-in-range-0x5e10-0x7f35-with-in-php/2728372#2728372 | |
*/ | |
$input = preg_replace('/[\x{202a}-\x{202e}\x{feff}]/u', '', $input); // strip BOM and BiDi characters (LR0, LRE, RL0, RLE, etc.) | |
$input = preg_replace('/[\x{2028}-\x{2029}]/u', "\n", $input); // unicode line and paragraph separators | |
/** | |
* | |
*/ | |
$input = preg_replace("/\\\\[uU](\d{4,7})/e", "chr('$1')", $input); | |
$input = preg_replace("/\\\\[xX]([0-9a-fA-F]{2,2})/e", "chr(intval('$1', 16))", $input); | |
return $input; | |
} | |
/** | |
* htmlentities | |
* @param (string) $input_text The text to escape | |
* @return (string) HTMLEntitized input | |
* @access public | |
* @static | |
*/ | |
public static function htmlentities($input_text) { | |
return htmlentities($input_text, ENT_QUOTES, 'UTF-8'); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment