/72135.diff Secret
Created
May 16, 2016 06:28
Star
You must be signed in to star a gist
Patch for 72135
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
commit 0da8b8b801f9276359262f1ef8274c7812d3dfda | |
Author: Stanislav Malyshev <stas@php.net> | |
Date: Sun May 15 23:26:51 2016 -0700 | |
Fix bug #72135 - don't create strings with lengths outside int range | |
diff --git a/ext/standard/html.c b/ext/standard/html.c | |
index 72423b5..81d8aff 100644 | |
--- a/ext/standard/html.c | |
+++ b/ext/standard/html.c | |
@@ -163,7 +163,7 @@ static inline unsigned int get_next_char( | |
else | |
MB_FAILURE(pos, 4); | |
} | |
- | |
+ | |
this_char = ((c & 0x07) << 18) | ((str[pos + 1] & 0x3f) << 12) | ((str[pos + 2] & 0x3f) << 6) | (str[pos + 3] & 0x3f); | |
if (this_char < 0x10000 || this_char > 0x10FFFF) { /* non-shortest form or outside range */ | |
MB_FAILURE(pos, 4); | |
@@ -437,7 +437,7 @@ det_charset: | |
if (charset_hint) { | |
int found = 0; | |
- | |
+ | |
/* now walk the charset map and look for the codeset */ | |
for (i = 0; charset_map[i].codeset; i++) { | |
if (len == strlen(charset_map[i].codeset) && strncasecmp(charset_hint, charset_map[i].codeset, len) == 0) { | |
@@ -545,7 +545,7 @@ static inline unsigned char unimap_bsearch(const uni_to_enc *table, unsigned cod | |
return 0; | |
code_key = (unsigned short) code_key_a; | |
- | |
+ | |
while (l <= h) { | |
m = l + (h - l) / 2; | |
if (code_key < m->un_code_point) | |
@@ -571,7 +571,7 @@ static inline int map_from_unicode(unsigned code, enum entity_charset charset, u | |
/* identity mapping of code points to unicode */ | |
if (code > 0xFF) { | |
return FAILURE; | |
- } | |
+ } | |
*res = code; | |
break; | |
@@ -590,7 +590,7 @@ static inline int map_from_unicode(unsigned code, enum entity_charset charset, u | |
return FAILURE; | |
} | |
break; | |
- | |
+ | |
case cs_8859_15: | |
if (code < 0xA4 || (code > 0xBE && code <= 0xFF)) { | |
*res = code; | |
@@ -634,7 +634,7 @@ static inline int map_from_unicode(unsigned code, enum entity_charset charset, u | |
case cs_cp866: | |
table = unimap_cp866; | |
table_size = sizeof(unimap_cp866) / sizeof(*unimap_cp866); | |
- | |
+ | |
table_over_7F: | |
if (code <= 0x7F) { | |
*res = code; | |
@@ -710,7 +710,7 @@ static inline int unicode_cp_is_allowed(unsigned uni_cp, int document_type) | |
* Not sure this is the relevant part for HTML 5, though. I opted to | |
* disallow the characters that would result in a parse error when | |
* preprocessing of the input stream. See also section 8.1.3. | |
- * | |
+ * | |
* It's unclear if XHTML 1.0 allows C1 characters. I'll opt to apply to | |
* XHTML 1.0 the same rules as for XML 1.0. | |
* See <http://cmsmcq.com/2007/C1.xml>. | |
@@ -774,7 +774,7 @@ static inline int numeric_entity_is_allowed(unsigned uni_cp, int document_type) | |
/* {{{ process_numeric_entity | |
* Auxiliary function to traverse_for_entities. | |
* On input, *buf should point to the first character after # and on output, it's the last | |
- * byte read, no matter if there was success or insuccess. | |
+ * byte read, no matter if there was success or insuccess. | |
*/ | |
static inline int process_numeric_entity(const char **buf, unsigned *code_point) | |
{ | |
@@ -784,7 +784,7 @@ static inline int process_numeric_entity(const char **buf, unsigned *code_point) | |
if (hexadecimal && (**buf != '\0')) | |
(*buf)++; | |
- | |
+ | |
/* strtol allows whitespace and other stuff in the beginning | |
* we're not interested */ | |
if ((hexadecimal && !isxdigit(**buf)) || | |
@@ -969,7 +969,7 @@ static void traverse_for_entities( | |
goto invalid_code; | |
/* are we allowed to decode this entity in this document type? | |
- * HTML 5 is the only that has a character that cannot be used in | |
+ * HTML 5 is the only that has a character that cannot be used in | |
* a numeric entity but is allowed literally (U+000D). The | |
* unoptimized version would be ... || !numeric_entity_is_allowed(code) */ | |
if (!unicode_cp_is_allowed(code, doctype) || | |
@@ -996,9 +996,9 @@ static void traverse_for_entities( | |
} | |
} | |
} | |
- | |
+ | |
assert(*next == ';'); | |
- | |
+ | |
if (((code == '\'' && !(flags & ENT_HTML_QUOTE_SINGLE)) || | |
(code == '"' && !(flags & ENT_HTML_QUOTE_DOUBLE))) | |
/* && code2 == '\0' always true for current maps */) | |
@@ -1026,7 +1026,7 @@ invalid_code: | |
*(q++) = *p; | |
} | |
} | |
- | |
+ | |
*q = '\0'; | |
*retlen = (size_t)(q - ret); | |
} | |
@@ -1066,7 +1066,7 @@ static entity_table_opt determine_entity_table(int all, int doctype) | |
entity_table_opt retval = {NULL}; | |
assert(!(doctype == ENT_HTML_DOC_XML1 && all)); | |
- | |
+ | |
if (all) { | |
retval.ms_table = (doctype == ENT_HTML_DOC_HTML5) ? | |
entity_ms_table_html5 : entity_ms_table_html4; | |
@@ -1111,13 +1111,13 @@ PHPAPI char *php_unescape_html_entities(unsigned char *old, size_t oldlen, size_ | |
if (retlen == 0) { | |
goto empty_source; | |
} | |
- | |
+ | |
inverse_map = unescape_inverse_map(all, flags); | |
- | |
+ | |
/* replace numeric entities */ | |
traverse_for_entities(old, oldlen, ret, &retlen, all, flags, inverse_map, charset); | |
-empty_source: | |
+empty_source: | |
*newlen = retlen; | |
return ret; | |
} | |
@@ -1141,7 +1141,7 @@ static inline void find_entity_for_char( | |
{ | |
unsigned stage1_idx = ENT_STAGE1_INDEX(k); | |
const entity_stage3_row *c; | |
- | |
+ | |
if (stage1_idx > 0x1D) { | |
*entity = NULL; | |
*entity_len = 0; | |
@@ -1162,7 +1162,7 @@ static inline void find_entity_for_char( | |
if (!(*cursor < oldlen)) | |
goto no_suitable_2nd; | |
- next_char = get_next_char(charset, old, oldlen, cursor, &status); | |
+ next_char = get_next_char(charset, old, oldlen, cursor, &status); | |
if (status == FAILURE) | |
goto no_suitable_2nd; | |
@@ -1187,7 +1187,7 @@ no_suitable_2nd: | |
*entity = (const unsigned char *) | |
c->data.multicodepoint_table[0].leading_entry.default_entity; | |
*entity_len = c->data.multicodepoint_table[0].leading_entry.default_entity_len; | |
- } | |
+ } | |
} | |
/* }}} */ | |
@@ -1255,7 +1255,7 @@ PHPAPI char *php_escape_html_entities_ex(unsigned char *old, size_t oldlen, size | |
/* initial estimate */ | |
if (oldlen < 64) { | |
- maxlen = 128; | |
+ maxlen = 128; | |
} else { | |
maxlen = 2 * oldlen; | |
if (maxlen < oldlen) { | |
@@ -1444,6 +1444,10 @@ static void php_html_entities(INTERNAL_FUNCTION_PARAMETERS, int all) | |
} | |
replaced = php_escape_html_entities_ex(str, str_len, &new_len, all, (int) flags, hint_charset, double_encode TSRMLS_CC); | |
+ if (new_len > INT_MAX) { | |
+ efree(replaced); | |
+ RETURN_FALSE; | |
+ } | |
RETVAL_STRINGL(replaced, (int)new_len, 0); | |
} | |
/* }}} */ | |
@@ -1577,7 +1581,7 @@ static inline void write_s3row_data( | |
} else { | |
spe_cp = uni_cp; | |
} | |
- | |
+ | |
written_k2 = write_octet_sequence(&key[written_k1], charset, spe_cp); | |
memcpy(&entity[1], mcpr[i].normal_entry.entity, l); | |
entity[l + 1] = ';'; | |
@@ -1615,7 +1619,7 @@ PHP_FUNCTION(get_html_translation_table) | |
LIMIT_ALL(all, doctype, charset); | |
array_init(return_value); | |
- | |
+ | |
entity_table = determine_entity_table(all, doctype); | |
if (all && !CHARSET_UNICODE_COMPAT(charset)) { | |
to_uni_table = enc_to_uni_index[charset]; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment