Created
December 5, 2011 01:56
-
-
Save do-aki/1431980 to your computer and use it in GitHub Desktop.
Difference between 5.3.8 and 5.4.0RC2
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- php-5.3.8/Zend/zend_language_scanner.l Mon Jan 03 23:39:48 2011 | |
+++ php-5.4.0RC2/Zend/zend_language_scanner.l Tue Sep 13 22:29:35 2011 | |
@@ -21,7 +21,7 @@ | |
+----------------------------------------------------------------------+ | |
*/ | |
-/* $Id: zend_language_scanner.l 307028 2011-01-03 14:39:48Z iliaa $ */ | |
+/* $Id: zend_language_scanner.l 316627 2011-09-13 13:29:35Z dmitry $ */ | |
#if 0 | |
# define YYDEBUG(s, c) printf("state: %d char: %c\n", s, c) | |
@@ -120,6 +120,33 @@ | |
BEGIN_EXTERN_C() | |
+static size_t encoding_filter_script_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC) | |
+{ | |
+ const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C); | |
+ assert(internal_encoding && zend_multibyte_check_lexer_compatibility(internal_encoding)); | |
+ return zend_multibyte_encoding_converter(to, to_length, from, from_length, internal_encoding, LANG_SCNG(script_encoding) TSRMLS_CC); | |
+} | |
+ | |
+static size_t encoding_filter_script_to_intermediate(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC) | |
+{ | |
+ return zend_multibyte_encoding_converter(to, to_length, from, from_length, zend_multibyte_encoding_utf8, LANG_SCNG(script_encoding) TSRMLS_CC); | |
+} | |
+ | |
+static size_t encoding_filter_intermediate_to_script(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC) | |
+{ | |
+ return zend_multibyte_encoding_converter(to, to_length, from, from_length, | |
+LANG_SCNG(script_encoding), zend_multibyte_encoding_utf8 TSRMLS_CC); | |
+} | |
+ | |
+static size_t encoding_filter_intermediate_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC) | |
+{ | |
+ const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C); | |
+ assert(internal_encoding && zend_multibyte_check_lexer_compatibility(internal_encoding)); | |
+ return zend_multibyte_encoding_converter(to, to_length, from, from_length, | |
+internal_encoding, zend_multibyte_encoding_utf8 TSRMLS_CC); | |
+} | |
+ | |
+ | |
static void _yy_push_state(int new_state TSRMLS_DC) | |
{ | |
zend_stack_push(&SCNG(state_stack), (void *) &YYGETCONDITION(), sizeof(int)); | |
@@ -147,6 +174,7 @@ | |
void startup_scanner(TSRMLS_D) | |
{ | |
+ CG(parse_error) = 0; | |
CG(heredoc) = NULL; | |
CG(heredoc_len) = 0; | |
CG(doc_comment) = NULL; | |
@@ -160,6 +188,7 @@ | |
efree(CG(heredoc)); | |
CG(heredoc_len)=0; | |
} | |
+ CG(parse_error) = 0; | |
zend_stack_destroy(&SCNG(state_stack)); | |
RESET_DOC_COMMENT(); | |
} | |
@@ -181,7 +210,6 @@ | |
lex_state->filename = zend_get_compiled_filename(TSRMLS_C); | |
lex_state->lineno = CG(zend_lineno); | |
-#ifdef ZEND_MULTIBYTE | |
lex_state->script_org = SCNG(script_org); | |
lex_state->script_org_size = SCNG(script_org_size); | |
lex_state->script_filtered = SCNG(script_filtered); | |
@@ -189,8 +217,6 @@ | |
lex_state->input_filter = SCNG(input_filter); | |
lex_state->output_filter = SCNG(output_filter); | |
lex_state->script_encoding = SCNG(script_encoding); | |
- lex_state->internal_encoding = SCNG(internal_encoding); | |
-#endif /* ZEND_MULTIBYTE */ | |
} | |
ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state TSRMLS_DC) | |
@@ -209,11 +235,7 @@ | |
YYSETCONDITION(lex_state->yy_state); | |
CG(zend_lineno) = lex_state->lineno; | |
zend_restore_compiled_filename(lex_state->filename TSRMLS_CC); | |
-#ifdef ZEND_MULTIBYTE | |
- if (SCNG(script_org)) { | |
- efree(SCNG(script_org)); | |
- SCNG(script_org) = NULL; | |
- } | |
+ | |
if (SCNG(script_filtered)) { | |
efree(SCNG(script_filtered)); | |
SCNG(script_filtered) = NULL; | |
@@ -225,8 +247,6 @@ | |
SCNG(input_filter) = lex_state->input_filter; | |
SCNG(output_filter) = lex_state->output_filter; | |
SCNG(script_encoding) = lex_state->script_encoding; | |
- SCNG(internal_encoding) = lex_state->internal_encoding; | |
-#endif /* ZEND_MULTIBYTE */ | |
if (CG(heredoc)) { | |
efree(CG(heredoc)); | |
@@ -245,10 +265,212 @@ | |
} | |
} | |
+#define BOM_UTF32_BE "\x00\x00\xfe\xff" | |
+#define BOM_UTF32_LE "\xff\xfe\x00\x00" | |
+#define BOM_UTF16_BE "\xfe\xff" | |
+#define BOM_UTF16_LE "\xff\xfe" | |
+#define BOM_UTF8 "\xef\xbb\xbf" | |
+ | |
+static const zend_encoding *zend_multibyte_detect_utf_encoding(const unsigned char *script, size_t script_size TSRMLS_DC) | |
+{ | |
+ const unsigned char *p; | |
+ int wchar_size = 2; | |
+ int le = 0; | |
+ | |
+ /* utf-16 or utf-32? */ | |
+ p = script; | |
+ while ((p-script) < script_size) { | |
+ p = memchr(p, 0, script_size-(p-script)-2); | |
+ if (!p) { | |
+ break; | |
+ } | |
+ if (*(p+1) == '\0' && *(p+2) == '\0') { | |
+ wchar_size = 4; | |
+ break; | |
+ } | |
+ | |
+ /* searching for UTF-32 specific byte orders, so this will do */ | |
+ p += 4; | |
+ } | |
+ | |
+ /* BE or LE? */ | |
+ p = script; | |
+ while ((p-script) < script_size) { | |
+ if (*p == '\0' && *(p+wchar_size-1) != '\0') { | |
+ /* BE */ | |
+ le = 0; | |
+ break; | |
+ } else if (*p != '\0' && *(p+wchar_size-1) == '\0') { | |
+ /* LE* */ | |
+ le = 1; | |
+ break; | |
+ } | |
+ p += wchar_size; | |
+ } | |
+ | |
+ if (wchar_size == 2) { | |
+ return le ? zend_multibyte_encoding_utf16le : zend_multibyte_encoding_utf16be; | |
+ } else { | |
+ return le ? zend_multibyte_encoding_utf32le : zend_multibyte_encoding_utf32be; | |
+ } | |
+ | |
+ return NULL; | |
+} | |
+ | |
+static const zend_encoding* zend_multibyte_detect_unicode(TSRMLS_D) | |
+{ | |
+ const zend_encoding *script_encoding = NULL; | |
+ int bom_size; | |
+ unsigned char *pos1, *pos2; | |
+ | |
+ if (LANG_SCNG(script_org_size) < sizeof(BOM_UTF32_LE)-1) { | |
+ return NULL; | |
+ } | |
+ | |
+ /* check out BOM */ | |
+ if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_BE, sizeof(BOM_UTF32_BE)-1)) { | |
+ script_encoding = zend_multibyte_encoding_utf32be; | |
+ bom_size = sizeof(BOM_UTF32_BE)-1; | |
+ } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_LE, sizeof(BOM_UTF32_LE)-1)) { | |
+ script_encoding = zend_multibyte_encoding_utf32le; | |
+ bom_size = sizeof(BOM_UTF32_LE)-1; | |
+ } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_BE, sizeof(BOM_UTF16_BE)-1)) { | |
+ script_encoding = zend_multibyte_encoding_utf16be; | |
+ bom_size = sizeof(BOM_UTF16_BE)-1; | |
+ } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_LE, sizeof(BOM_UTF16_LE)-1)) { | |
+ script_encoding = zend_multibyte_encoding_utf16le; | |
+ bom_size = sizeof(BOM_UTF16_LE)-1; | |
+ } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF8, sizeof(BOM_UTF8)-1)) { | |
+ script_encoding = zend_multibyte_encoding_utf8; | |
+ bom_size = sizeof(BOM_UTF8)-1; | |
+ } | |
+ | |
+ if (script_encoding) { | |
+ /* remove BOM */ | |
+ LANG_SCNG(script_org) += bom_size; | |
+ LANG_SCNG(script_org_size) -= bom_size; | |
+ | |
+ return script_encoding; | |
+ } | |
+ | |
+ /* script contains NULL bytes -> auto-detection */ | |
+ if ((pos1 = memchr(LANG_SCNG(script_org), 0, LANG_SCNG(script_org_size)))) { | |
+ /* check if the NULL byte is after the __HALT_COMPILER(); */ | |
+ pos2 = LANG_SCNG(script_org); | |
+ | |
+ while (pos1 - pos2 >= sizeof("__HALT_COMPILER();")-1) { | |
+ pos2 = memchr(pos2, '_', pos1 - pos2); | |
+ if (!pos2) break; | |
+ pos2++; | |
+ if (strncasecmp((char*)pos2, "_HALT_COMPILER", sizeof("_HALT_COMPILER")-1) == 0) { | |
+ pos2 += sizeof("_HALT_COMPILER")-1; | |
+ while (*pos2 == ' ' || | |
+ *pos2 == '\t' || | |
+ *pos2 == '\r' || | |
+ *pos2 == '\n') { | |
+ pos2++; | |
+ } | |
+ if (*pos2 == '(') { | |
+ pos2++; | |
+ while (*pos2 == ' ' || | |
+ *pos2 == '\t' || | |
+ *pos2 == '\r' || | |
+ *pos2 == '\n') { | |
+ pos2++; | |
+ } | |
+ if (*pos2 == ')') { | |
+ pos2++; | |
+ while (*pos2 == ' ' || | |
+ *pos2 == '\t' || | |
+ *pos2 == '\r' || | |
+ *pos2 == '\n') { | |
+ pos2++; | |
+ } | |
+ if (*pos2 == ';') { | |
+ return NULL; | |
+ } | |
+ } | |
+ } | |
+ } | |
+ } | |
+ /* make best effort if BOM is missing */ | |
+ return zend_multibyte_detect_utf_encoding(LANG_SCNG(script_org), LANG_SCNG(script_org_size) TSRMLS_CC); | |
+ } | |
+ | |
+ return NULL; | |
+} | |
+ | |
+static const zend_encoding* zend_multibyte_find_script_encoding(TSRMLS_D) | |
+{ | |
+ const zend_encoding *script_encoding; | |
+ | |
+ if (CG(detect_unicode)) { | |
+ /* check out bom(byte order mark) and see if containing wchars */ | |
+ script_encoding = zend_multibyte_detect_unicode(TSRMLS_C); | |
+ if (script_encoding != NULL) { | |
+ /* bom or wchar detection is prior to 'script_encoding' option */ | |
+ return script_encoding; | |
+ } | |
+ } | |
+ | |
+ /* if no script_encoding specified, just leave alone */ | |
+ if (!CG(script_encoding_list) || !CG(script_encoding_list_size)) { | |
+ return NULL; | |
+ } | |
+ | |
+ /* if multiple encodings specified, detect automagically */ | |
+ if (CG(script_encoding_list_size) > 1) { | |
+ return zend_multibyte_encoding_detector(LANG_SCNG(script_org), LANG_SCNG(script_org_size), CG(script_encoding_list), CG(script_encoding_list_size) TSRMLS_CC); | |
+ } | |
+ | |
+ return CG(script_encoding_list)[0]; | |
+} | |
+ | |
+ZEND_API int zend_multibyte_set_filter(const zend_encoding *onetime_encoding TSRMLS_DC) | |
+{ | |
+ const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C); | |
+ const zend_encoding *script_encoding = onetime_encoding ? onetime_encoding: zend_multibyte_find_script_encoding(TSRMLS_C); | |
+ | |
+ if (!script_encoding) { | |
+ return FAILURE; | |
+ } | |
+ | |
+ /* judge input/output filter */ | |
+ LANG_SCNG(script_encoding) = script_encoding; | |
+ LANG_SCNG(input_filter) = NULL; | |
+ LANG_SCNG(output_filter) = NULL; | |
+ | |
+ if (!internal_encoding || LANG_SCNG(script_encoding) == internal_encoding) { | |
+ if (!zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) { | |
+ /* and if not, work around w/ script_encoding -> utf-8 -> script_encoding conversion */ | |
+ LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate; | |
+ LANG_SCNG(output_filter) = encoding_filter_intermediate_to_script; | |
+ } else { | |
+ LANG_SCNG(input_filter) = NULL; | |
+ LANG_SCNG(output_filter) = NULL; | |
+ } | |
+ return SUCCESS; | |
+ } | |
+ | |
+ if (zend_multibyte_check_lexer_compatibility(internal_encoding)) { | |
+ LANG_SCNG(input_filter) = encoding_filter_script_to_internal; | |
+ LANG_SCNG(output_filter) = NULL; | |
+ } else if (zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) { | |
+ LANG_SCNG(input_filter) = NULL; | |
+ LANG_SCNG(output_filter) = encoding_filter_script_to_internal; | |
+ } else { | |
+ /* both script and internal encodings are incompatible w/ flex */ | |
+ LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate; | |
+ LANG_SCNG(output_filter) = encoding_filter_intermediate_to_internal; | |
+ } | |
+ | |
+ return 0; | |
+} | |
ZEND_API int open_file_for_scanning(zend_file_handle *file_handle TSRMLS_DC) | |
{ | |
- char *file_path = NULL, *buf; | |
+ const char *file_path = NULL; | |
+ char *buf; | |
size_t size, offset = 0; | |
/* The shebang line was read, get the current position to obtain the buffer start */ | |
@@ -275,32 +497,24 @@ | |
SCNG(yy_start) = NULL; | |
if (size != -1) { | |
-#ifdef ZEND_MULTIBYTE | |
- if (zend_multibyte_read_script((unsigned char *)buf, size TSRMLS_CC) != 0) { | |
- return FAILURE; | |
- } | |
- | |
- SCNG(yy_in) = NULL; | |
+ if (CG(multibyte)) { | |
+ SCNG(script_org) = (unsigned char*)buf; | |
+ SCNG(script_org_size) = size; | |
+ SCNG(script_filtered) = NULL; | |
zend_multibyte_set_filter(NULL TSRMLS_CC); | |
- if (!SCNG(input_filter)) { | |
- SCNG(script_filtered) = (unsigned char*)emalloc(SCNG(script_org_size)+1); | |
- memcpy(SCNG(script_filtered), SCNG(script_org), SCNG(script_org_size)+1); | |
- SCNG(script_filtered_size) = SCNG(script_org_size); | |
- } else { | |
- SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC); | |
- if (SCNG(script_filtered) == NULL) { | |
+ if (SCNG(input_filter)) { | |
+ if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) { | |
zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected " | |
- "encoding \"%s\" to a compatible encoding", LANG_SCNG(script_encoding)->name); | |
+ "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding))); | |
} | |
+ buf = (char*)SCNG(script_filtered); | |
+ size = SCNG(script_filtered_size); | |
} | |
- SCNG(yy_start) = SCNG(script_filtered) - offset; | |
- yy_scan_buffer((char *)SCNG(script_filtered), SCNG(script_filtered_size) TSRMLS_CC); | |
-#else /* !ZEND_MULTIBYTE */ | |
- SCNG(yy_start) = buf - offset; | |
+ } | |
+ SCNG(yy_start) = (unsigned char *)buf - offset; | |
yy_scan_buffer(buf, size TSRMLS_CC); | |
-#endif /* ZEND_MULTIBYTE */ | |
} else { | |
zend_error_noreturn(E_COMPILE_ERROR, "zend_stream_mmap() failed"); | |
} | |
@@ -361,6 +575,7 @@ | |
init_op_array(op_array, ZEND_USER_FUNCTION, INITIAL_OP_ARRAY_SIZE TSRMLS_CC); | |
CG(in_compilation) = 1; | |
CG(active_op_array) = op_array; | |
+ zend_init_compiler_context(TSRMLS_C); | |
compiler_result = zendparse(TSRMLS_C); | |
zend_do_return(&retval_znode, 0 TSRMLS_CC); | |
CG(in_compilation) = original_in_compilation; | |
@@ -428,32 +643,44 @@ | |
ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename TSRMLS_DC) | |
{ | |
+ char *buf; | |
+ size_t size; | |
+ | |
/* enforce two trailing NULLs for flex... */ | |
+ if (IS_INTERNED(str->value.str.val)) { | |
+ char *tmp = safe_emalloc(1, str->value.str.len, ZEND_MMAP_AHEAD); | |
+ memcpy(tmp, str->value.str.val, str->value.str.len + ZEND_MMAP_AHEAD); | |
+ str->value.str.val = tmp; | |
+ } else { | |
str->value.str.val = safe_erealloc(str->value.str.val, 1, str->value.str.len, ZEND_MMAP_AHEAD); | |
+ } | |
memset(str->value.str.val + str->value.str.len, 0, ZEND_MMAP_AHEAD); | |
- SCNG(yy_in)=NULL; | |
+ SCNG(yy_in) = NULL; | |
SCNG(yy_start) = NULL; | |
-#ifdef ZEND_MULTIBYTE | |
- SCNG(script_org) = (unsigned char *)estrdup(str->value.str.val); | |
- SCNG(script_org_size) = str->value.str.len; | |
+ buf = str->value.str.val; | |
+ size = str->value.str.len; | |
+ | |
+ if (CG(multibyte)) { | |
+ SCNG(script_org) = (unsigned char*)buf; | |
+ SCNG(script_org_size) = size; | |
+ SCNG(script_filtered) = NULL; | |
- zend_multibyte_set_filter(CG(internal_encoding) TSRMLS_CC); | |
+ zend_multibyte_set_filter(zend_multibyte_get_internal_encoding(TSRMLS_C) TSRMLS_CC); | |
- if (!SCNG(input_filter)) { | |
- SCNG(script_filtered) = (unsigned char*)emalloc(SCNG(script_org_size)+1); | |
- memcpy(SCNG(script_filtered), SCNG(script_org), SCNG(script_org_size)+1); | |
- SCNG(script_filtered_size) = SCNG(script_org_size); | |
- } else { | |
- SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC); | |
+ if (SCNG(input_filter)) { | |
+ if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) { | |
+ zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected " | |
+ "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding))); | |
+ } | |
+ buf = (char*)SCNG(script_filtered); | |
+ size = SCNG(script_filtered_size); | |
+ } | |
} | |
- yy_scan_buffer((char *)SCNG(script_filtered), SCNG(script_filtered_size) TSRMLS_CC); | |
-#else /* !ZEND_MULTIBYTE */ | |
- yy_scan_buffer(str->value.str.val, str->value.str.len TSRMLS_CC); | |
-#endif /* ZEND_MULTIBYTE */ | |
+ yy_scan_buffer(buf, size TSRMLS_CC); | |
zend_set_compiled_filename(filename TSRMLS_CC); | |
CG(zend_lineno) = 1; | |
@@ -465,13 +692,12 @@ | |
ZEND_API size_t zend_get_scanned_file_offset(TSRMLS_D) | |
{ | |
size_t offset = SCNG(yy_cursor) - SCNG(yy_start); | |
-#ifdef ZEND_MULTIBYTE | |
if (SCNG(input_filter)) { | |
- size_t original_offset = offset, length = 0; do { | |
+ size_t original_offset = offset, length = 0; | |
+ do { | |
unsigned char *p = NULL; | |
- SCNG(input_filter)(&p, &length, SCNG(script_org), offset TSRMLS_CC); | |
- if (!p) { | |
- break; | |
+ if ((size_t)-1 == SCNG(input_filter)(&p, &length, SCNG(script_org), offset TSRMLS_CC)) { | |
+ return (size_t)-1; | |
} | |
efree(p); | |
if (length > original_offset) { | |
@@ -481,7 +707,6 @@ | |
} | |
} while (original_offset != length); | |
} | |
-#endif | |
return offset; | |
} | |
@@ -519,19 +744,14 @@ | |
init_op_array(op_array, ZEND_EVAL_CODE, INITIAL_OP_ARRAY_SIZE TSRMLS_CC); | |
CG(interactive) = orig_interactive; | |
CG(active_op_array) = op_array; | |
+ zend_init_compiler_context(TSRMLS_C); | |
BEGIN(ST_IN_SCRIPTING); | |
compiler_result = zendparse(TSRMLS_C); | |
-#ifdef ZEND_MULTIBYTE | |
- if (SCNG(script_org)) { | |
- efree(SCNG(script_org)); | |
- SCNG(script_org) = NULL; | |
- } | |
if (SCNG(script_filtered)) { | |
efree(SCNG(script_filtered)); | |
SCNG(script_filtered) = NULL; | |
} | |
-#endif /* ZEND_MULTIBYTE */ | |
if (compiler_result==1) { | |
CG(active_op_array) = original_active_op_array; | |
@@ -569,16 +789,10 @@ | |
return FAILURE; | |
} | |
zend_highlight(syntax_highlighter_ini TSRMLS_CC); | |
-#ifdef ZEND_MULTIBYTE | |
- if (SCNG(script_org)) { | |
- efree(SCNG(script_org)); | |
- SCNG(script_org) = NULL; | |
- } | |
if (SCNG(script_filtered)) { | |
efree(SCNG(script_filtered)); | |
SCNG(script_filtered) = NULL; | |
} | |
-#endif /* ZEND_MULTIBYTE */ | |
zend_destroy_file_handle(&file_handle TSRMLS_CC); | |
zend_restore_lexical_state(&original_lex_state TSRMLS_CC); | |
return SUCCESS; | |
@@ -598,122 +812,44 @@ | |
} | |
BEGIN(INITIAL); | |
zend_highlight(syntax_highlighter_ini TSRMLS_CC); | |
-#ifdef ZEND_MULTIBYTE | |
- if (SCNG(script_org)) { | |
- efree(SCNG(script_org)); | |
- SCNG(script_org) = NULL; | |
- } | |
if (SCNG(script_filtered)) { | |
efree(SCNG(script_filtered)); | |
SCNG(script_filtered) = NULL; | |
} | |
-#endif /* ZEND_MULTIBYTE */ | |
zend_restore_lexical_state(&original_lex_state TSRMLS_CC); | |
zval_dtor(str); | |
return SUCCESS; | |
} | |
-END_EXTERN_C() | |
-#ifdef ZEND_MULTIBYTE | |
- | |
-BEGIN_EXTERN_C() | |
-ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, zend_encoding *old_encoding TSRMLS_DC) | |
+ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, const zend_encoding *old_encoding TSRMLS_DC) | |
{ | |
- size_t original_offset, offset, free_flag, new_len, length; | |
- unsigned char *p; | |
- | |
- /* calculate current position */ | |
- offset = original_offset = YYCURSOR - SCNG(yy_start); | |
- if (old_input_filter && offset > 0) { | |
- zend_encoding *new_encoding = SCNG(script_encoding); | |
- zend_encoding_filter new_filter = SCNG(input_filter); | |
- SCNG(script_encoding) = old_encoding; | |
- SCNG(input_filter) = old_input_filter; | |
- offset = zend_get_scanned_file_offset(TSRMLS_C); | |
- SCNG(script_encoding) = new_encoding; | |
- SCNG(input_filter) = new_filter; | |
- } | |
+ size_t length; | |
+ unsigned char *new_yy_start; | |
/* convert and set */ | |
if (!SCNG(input_filter)) { | |
- length = SCNG(script_org_size) - offset; | |
- p = SCNG(script_org) + offset; | |
- free_flag = 0; | |
- } else { | |
- SCNG(input_filter)(&p, &length, SCNG(script_org) + offset, SCNG(script_org_size) - offset TSRMLS_CC); | |
- free_flag = 1; | |
- } | |
- | |
- new_len = original_offset + length; | |
- | |
- if (new_len > YYLIMIT - SCNG(yy_start)) { | |
- unsigned char *new_yy_start = erealloc(SCNG(yy_start), new_len); | |
- SCNG(yy_cursor) = new_yy_start + (SCNG(yy_cursor) - SCNG(yy_start)); | |
- SCNG(yy_marker) = new_yy_start + (SCNG(yy_marker) - SCNG(yy_start)); | |
- SCNG(yy_text) = new_yy_start + (SCNG(yy_text) - SCNG(yy_start)); | |
- SCNG(yy_start) = new_yy_start; | |
- SCNG(script_filtered) = new_yy_start; | |
- SCNG(script_filtered_size) = new_len; | |
- } | |
- | |
- SCNG(yy_limit) = SCNG(yy_start) + new_len; | |
- memmove(SCNG(yy_start) + original_offset, p, length); | |
- | |
- if (free_flag) { | |
- efree(p); | |
- } | |
-} | |
- | |
- | |
-ZEND_API int zend_multibyte_yyinput(zend_file_handle *file_handle, char *buf, size_t len TSRMLS_DC) | |
-{ | |
- size_t n; | |
- | |
- if (CG(interactive) == 0) { | |
- if (zend_stream_fixup(file_handle, &buf, &len TSRMLS_CC) == FAILURE) { | |
- return FAILURE; | |
- } | |
- n = len; | |
- return n; | |
- } | |
- | |
- /* interactive */ | |
- if (SCNG(script_org)) { | |
- efree(SCNG(script_org)); | |
- } | |
if (SCNG(script_filtered)) { | |
efree(SCNG(script_filtered)); | |
+ SCNG(script_filtered) = NULL; | |
} | |
- SCNG(script_org) = NULL; | |
- SCNG(script_org_size) = 0; | |
- | |
- /* TODO: support widechars */ | |
- if (zend_stream_fixup(file_handle, &buf, &len TSRMLS_CC) == FAILURE) { | |
- return FAILURE; | |
+ SCNG(script_filtered_size) = 0; | |
+ length = SCNG(script_org_size); | |
+ new_yy_start = SCNG(script_org); | |
+ } else { | |
+ if ((size_t)-1 == SCNG(input_filter)(&new_yy_start, &length, SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) { | |
+ zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected " | |
+ "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding))); | |
} | |
- n = len; | |
- | |
- SCNG(script_org_size) = n; | |
- SCNG(script_org) = (unsigned char*)emalloc(SCNG(script_org_size) + 1); | |
- memcpy(SCNG(script_org), buf, n); | |
- | |
- return n; | |
-} | |
- | |
- | |
-ZEND_API int zend_multibyte_read_script(unsigned char *buf, size_t n TSRMLS_DC) | |
-{ | |
- if (SCNG(script_org)) { | |
- efree(SCNG(script_org)); | |
- SCNG(script_org) = NULL; | |
+ SCNG(script_filtered) = new_yy_start; | |
+ SCNG(script_filtered_size) = length; | |
} | |
- SCNG(script_org_size) = n; | |
- SCNG(script_org) = (unsigned char*)emalloc(SCNG(script_org_size) + 1); | |
- memcpy(SCNG(script_org), buf, n); | |
- *(SCNG(script_org)+SCNG(script_org_size)) = '\0'; | |
+ SCNG(yy_cursor) = new_yy_start + (SCNG(yy_cursor) - SCNG(yy_start)); | |
+ SCNG(yy_marker) = new_yy_start + (SCNG(yy_marker) - SCNG(yy_start)); | |
+ SCNG(yy_text) = new_yy_start + (SCNG(yy_text) - SCNG(yy_start)); | |
+ SCNG(yy_limit) = new_yy_start + (SCNG(yy_limit) - SCNG(yy_start)); | |
- return 0; | |
+ SCNG(yy_start) = new_yy_start; | |
} | |
@@ -726,11 +862,6 @@ | |
zendlval->value.str.val = (char *) estrndup(yytext, yyleng); \ | |
zendlval->value.str.len = yyleng; \ | |
} | |
-#else /* ZEND_MULTIBYTE */ | |
-# define zend_copy_value(zendlval, yytext, yyleng) \ | |
- zendlval->value.str.val = (char *)estrndup(yytext, yyleng); \ | |
- zendlval->value.str.len = yyleng; | |
-#endif /* ZEND_MULTIBYTE */ | |
static void zend_scan_escape_string(zval *zendlval, char *str, int len, char quote_type TSRMLS_DC) | |
{ | |
@@ -834,7 +965,6 @@ | |
s++; | |
} | |
*t = 0; | |
-#ifdef ZEND_MULTIBYTE | |
if (SCNG(output_filter)) { | |
size_t sz = 0; | |
s = zendlval->value.str.val; | |
@@ -842,7 +972,6 @@ | |
zendlval->value.str.len = sz; | |
efree(s); | |
} | |
-#endif /* ZEND_MULTIBYTE */ | |
} | |
@@ -859,6 +988,7 @@ | |
DNUM ([0-9]*"."[0-9]+)|([0-9]+"."[0-9]*) | |
EXPONENT_DNUM (({LNUM}|{DNUM})[eE][+-]?{LNUM}) | |
HNUM "0x"[0-9a-fA-F]+ | |
+BNUM "0b"[01]+ | |
LABEL [a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]* | |
WHITESPACE [ \n\r\t]+ | |
TABS_AND_SPACES [ \t]* | |
@@ -1006,6 +1136,10 @@ | |
return T_INTERFACE; | |
} | |
+<ST_IN_SCRIPTING>"trait" { | |
+ return T_TRAIT; | |
+} | |
+ | |
<ST_IN_SCRIPTING>"extends" { | |
return T_EXTENDS; | |
} | |
@@ -1072,11 +1206,7 @@ | |
return T_DOUBLE_CAST; | |
} | |
-<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"string"{TABS_AND_SPACES}")" { | |
- return T_STRING_CAST; | |
-} | |
- | |
-<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"binary"{TABS_AND_SPACES}")" { | |
+<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("string"|"binary"){TABS_AND_SPACES}")" { | |
return T_STRING_CAST; | |
} | |
@@ -1124,6 +1254,10 @@ | |
return T_USE; | |
} | |
+<ST_IN_SCRIPTING>"insteadof" { | |
+ return T_INSTEADOF; | |
+} | |
+ | |
<ST_IN_SCRIPTING>"global" { | |
return T_GLOBAL; | |
} | |
@@ -1180,6 +1314,10 @@ | |
return T_ARRAY; | |
} | |
+<ST_IN_SCRIPTING>"callable" { | |
+ return T_CALLABLE; | |
+} | |
+ | |
<ST_IN_SCRIPTING>"++" { | |
return T_INC; | |
} | |
@@ -1326,6 +1464,26 @@ | |
goto restart; | |
} | |
+<ST_IN_SCRIPTING>{BNUM} { | |
+ char *bin = yytext + 2; /* Skip "0b" */ | |
+ int len = yyleng - 2; | |
+ | |
+ /* Skip any leading 0s */ | |
+ while (*bin == '0') { | |
+ ++bin; | |
+ --len; | |
+ } | |
+ | |
+ if (len < SIZEOF_LONG * 8) { | |
+ zendlval->value.lval = strtol(bin, NULL, 2); | |
+ zendlval->type = IS_LONG; | |
+ return T_LNUMBER; | |
+ } else { | |
+ zendlval->value.dval = zend_bin_strtod(bin, NULL); | |
+ zendlval->type = IS_DOUBLE; | |
+ return T_DNUMBER; | |
+ } | |
+} | |
<ST_IN_SCRIPTING>{LNUM} { | |
if (yyleng < MAX_LENGTH_OF_LONG - 1) { /* Won't overflow */ | |
@@ -1381,7 +1539,7 @@ | |
return T_NUM_STRING; | |
} | |
-<ST_VAR_OFFSET>{LNUM}|{HNUM} { /* Offset must be treated as a string */ | |
+<ST_VAR_OFFSET>{LNUM}|{HNUM}|{BNUM} { /* Offset must be treated as a string */ | |
zendlval->value.str.val = (char *)estrndup(yytext, yyleng); | |
zendlval->value.str.len = yyleng; | |
zendlval->type = IS_STRING; | |
@@ -1395,8 +1553,17 @@ | |
} | |
<ST_IN_SCRIPTING>"__CLASS__" { | |
- char *class_name = NULL; | |
+ const char *class_name = NULL; | |
+ if (CG(active_class_entry) | |
+ && (ZEND_ACC_TRAIT == | |
+ (CG(active_class_entry)->ce_flags & ZEND_ACC_TRAIT))) { | |
+ // This is a hack, we abuse IS_NULL to indicate an invalid value | |
+ // if __CLASS__ is encountered in a trait, however, we also not that we | |
+ // should fix it up when we copy the method into an actual class | |
+ zendlval->value.lval = ZEND_ACC_TRAIT; | |
+ zendlval->type = IS_NULL; | |
+ } else { | |
if (CG(active_class_entry)) { | |
class_name = CG(active_class_entry)->name; | |
} | |
@@ -1404,14 +1571,36 @@ | |
if (!class_name) { | |
class_name = ""; | |
} | |
+ | |
zendlval->value.str.len = strlen(class_name); | |
zendlval->value.str.val = estrndup(class_name, zendlval->value.str.len); | |
zendlval->type = IS_STRING; | |
+ } | |
return T_CLASS_C; | |
} | |
+<ST_IN_SCRIPTING>"__TRAIT__" { | |
+ const char *trait_name = NULL; | |
+ | |
+ if (CG(active_class_entry) | |
+ && (ZEND_ACC_TRAIT == | |
+ (CG(active_class_entry)->ce_flags & ZEND_ACC_TRAIT))) { | |
+ trait_name = CG(active_class_entry)->name; | |
+ } | |
+ | |
+ if (!trait_name) { | |
+ trait_name = ""; | |
+ } | |
+ | |
+ zendlval->value.str.len = strlen(trait_name); | |
+ zendlval->value.str.val = estrndup(trait_name, zendlval->value.str.len); | |
+ zendlval->type = IS_STRING; | |
+ | |
+ return T_TRAIT_C; | |
+} | |
+ | |
<ST_IN_SCRIPTING>"__FUNCTION__" { | |
- char *func_name = NULL; | |
+ const char *func_name = NULL; | |
if (CG(active_op_array)) { | |
func_name = CG(active_op_array)->function_name; | |
@@ -1427,8 +1616,8 @@ | |
} | |
<ST_IN_SCRIPTING>"__METHOD__" { | |
- char *class_name = CG(active_class_entry) ? CG(active_class_entry)->name : NULL; | |
- char *func_name = CG(active_op_array)? CG(active_op_array)->function_name : NULL; | |
+ const char *class_name = CG(active_class_entry) ? CG(active_class_entry)->name : NULL; | |
+ const char *func_name = CG(active_op_array)? CG(active_op_array)->function_name : NULL; | |
size_t len = 0; | |
if (class_name) { | |
@@ -1503,7 +1692,7 @@ | |
} | |
<INITIAL>"<script"{WHITESPACE}+"language"{WHITESPACE}*"="{WHITESPACE}*("php"|"\"php\""|"'php'"){WHITESPACE}*">" { | |
- YYCTYPE *bracket = zend_memrchr(yytext, '<', yyleng - (sizeof("script language=php>") - 1)); | |
+ YYCTYPE *bracket = (YYCTYPE*)zend_memrchr(yytext, '<', yyleng - (sizeof("script language=php>") - 1)); | |
if (bracket != SCNG(yy_text)) { | |
/* Handle previously scanned HTML, as possible <script> tags found are assumed to not be PHP's */ | |
@@ -1534,15 +1723,11 @@ | |
<INITIAL>"<?=" { | |
- if (CG(short_tags)) { | |
zendlval->value.str.val = yytext; /* no copying - intentional */ | |
zendlval->value.str.len = yyleng; | |
zendlval->type = IS_STRING; | |
BEGIN(ST_IN_SCRIPTING); | |
return T_OPEN_TAG_WITH_ECHO; | |
- } else { | |
- goto inline_char_handler; | |
- } | |
} | |
@@ -1596,7 +1781,7 @@ | |
if (YYCURSOR < YYLIMIT) { | |
switch (*YYCURSOR) { | |
case '?': | |
- if (CG(short_tags) || !strncasecmp(YYCURSOR + 1, "php", 3)) { /* Assume [ \t\n\r] follows "php" */ | |
+ if (CG(short_tags) || !strncasecmp((char*)YYCURSOR + 1, "php", 3) || (*(YYCURSOR + 1) == '=')) { /* Assume [ \t\n\r] follows "php" */ | |
break; | |
} | |
continue; | |
@@ -1624,7 +1809,6 @@ | |
inline_html: | |
yyleng = YYCURSOR - SCNG(yy_text); | |
-#ifdef ZEND_MULTIBYTE | |
if (SCNG(output_filter)) { | |
int readsize; | |
size_t sz = 0; | |
@@ -1637,10 +1821,6 @@ | |
zendlval->value.str.val = (char *) estrndup(yytext, yyleng); | |
zendlval->value.str.len = yyleng; | |
} | |
-#else /* !ZEND_MULTIBYTE */ | |
- zendlval->value.str.val = (char *) estrndup(yytext, yyleng); | |
- zendlval->value.str.len = yyleng; | |
-#endif | |
zendlval->type = IS_STRING; | |
HANDLE_NEWLINES(yytext, yyleng); | |
return T_INLINE_HTML; | |
@@ -1847,7 +2027,6 @@ | |
} | |
*t = 0; | |
-#ifdef ZEND_MULTIBYTE | |
if (SCNG(output_filter)) { | |
size_t sz = 0; | |
s = zendlval->value.str.val; | |
@@ -1855,7 +2034,6 @@ | |
zendlval->value.str.len = sz; | |
efree(s); | |
} | |
-#endif /* ZEND_MULTIBYTE */ | |
return T_CONSTANT_ENCAPSED_STRING; | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment