Skip to content

Instantly share code, notes, and snippets.

@do-aki
Created December 5, 2011 01:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save do-aki/1431980 to your computer and use it in GitHub Desktop.
Save do-aki/1431980 to your computer and use it in GitHub Desktop.
Difference between 5.3.8 and 5.4.0RC2
--- php-5.3.8/Zend/zend_language_scanner.l Mon Jan 03 23:39:48 2011
+++ php-5.4.0RC2/Zend/zend_language_scanner.l Tue Sep 13 22:29:35 2011
@@ -21,7 +21,7 @@
+----------------------------------------------------------------------+
*/
-/* $Id: zend_language_scanner.l 307028 2011-01-03 14:39:48Z iliaa $ */
+/* $Id: zend_language_scanner.l 316627 2011-09-13 13:29:35Z dmitry $ */
#if 0
# define YYDEBUG(s, c) printf("state: %d char: %c\n", s, c)
@@ -120,6 +120,33 @@
BEGIN_EXTERN_C()
+static size_t encoding_filter_script_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
+{
+ const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C);
+ assert(internal_encoding && zend_multibyte_check_lexer_compatibility(internal_encoding));
+ return zend_multibyte_encoding_converter(to, to_length, from, from_length, internal_encoding, LANG_SCNG(script_encoding) TSRMLS_CC);
+}
+
+static size_t encoding_filter_script_to_intermediate(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
+{
+ return zend_multibyte_encoding_converter(to, to_length, from, from_length, zend_multibyte_encoding_utf8, LANG_SCNG(script_encoding) TSRMLS_CC);
+}
+
+static size_t encoding_filter_intermediate_to_script(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
+{
+ return zend_multibyte_encoding_converter(to, to_length, from, from_length,
+LANG_SCNG(script_encoding), zend_multibyte_encoding_utf8 TSRMLS_CC);
+}
+
+static size_t encoding_filter_intermediate_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length TSRMLS_DC)
+{
+ const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C);
+ assert(internal_encoding && zend_multibyte_check_lexer_compatibility(internal_encoding));
+ return zend_multibyte_encoding_converter(to, to_length, from, from_length,
+internal_encoding, zend_multibyte_encoding_utf8 TSRMLS_CC);
+}
+
+
static void _yy_push_state(int new_state TSRMLS_DC)
{
zend_stack_push(&SCNG(state_stack), (void *) &YYGETCONDITION(), sizeof(int));
@@ -147,6 +174,7 @@
void startup_scanner(TSRMLS_D)
{
+ CG(parse_error) = 0;
CG(heredoc) = NULL;
CG(heredoc_len) = 0;
CG(doc_comment) = NULL;
@@ -160,6 +188,7 @@
efree(CG(heredoc));
CG(heredoc_len)=0;
}
+ CG(parse_error) = 0;
zend_stack_destroy(&SCNG(state_stack));
RESET_DOC_COMMENT();
}
@@ -181,7 +210,6 @@
lex_state->filename = zend_get_compiled_filename(TSRMLS_C);
lex_state->lineno = CG(zend_lineno);
-#ifdef ZEND_MULTIBYTE
lex_state->script_org = SCNG(script_org);
lex_state->script_org_size = SCNG(script_org_size);
lex_state->script_filtered = SCNG(script_filtered);
@@ -189,8 +217,6 @@
lex_state->input_filter = SCNG(input_filter);
lex_state->output_filter = SCNG(output_filter);
lex_state->script_encoding = SCNG(script_encoding);
- lex_state->internal_encoding = SCNG(internal_encoding);
-#endif /* ZEND_MULTIBYTE */
}
ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state TSRMLS_DC)
@@ -209,11 +235,7 @@
YYSETCONDITION(lex_state->yy_state);
CG(zend_lineno) = lex_state->lineno;
zend_restore_compiled_filename(lex_state->filename TSRMLS_CC);
-#ifdef ZEND_MULTIBYTE
- if (SCNG(script_org)) {
- efree(SCNG(script_org));
- SCNG(script_org) = NULL;
- }
+
if (SCNG(script_filtered)) {
efree(SCNG(script_filtered));
SCNG(script_filtered) = NULL;
@@ -225,8 +247,6 @@
SCNG(input_filter) = lex_state->input_filter;
SCNG(output_filter) = lex_state->output_filter;
SCNG(script_encoding) = lex_state->script_encoding;
- SCNG(internal_encoding) = lex_state->internal_encoding;
-#endif /* ZEND_MULTIBYTE */
if (CG(heredoc)) {
efree(CG(heredoc));
@@ -245,10 +265,212 @@
}
}
+#define BOM_UTF32_BE "\x00\x00\xfe\xff"
+#define BOM_UTF32_LE "\xff\xfe\x00\x00"
+#define BOM_UTF16_BE "\xfe\xff"
+#define BOM_UTF16_LE "\xff\xfe"
+#define BOM_UTF8 "\xef\xbb\xbf"
+
+static const zend_encoding *zend_multibyte_detect_utf_encoding(const unsigned char *script, size_t script_size TSRMLS_DC)
+{
+ const unsigned char *p;
+ int wchar_size = 2;
+ int le = 0;
+
+ /* utf-16 or utf-32? */
+ p = script;
+ while ((p-script) < script_size) {
+ p = memchr(p, 0, script_size-(p-script)-2);
+ if (!p) {
+ break;
+ }
+ if (*(p+1) == '\0' && *(p+2) == '\0') {
+ wchar_size = 4;
+ break;
+ }
+
+ /* searching for UTF-32 specific byte orders, so this will do */
+ p += 4;
+ }
+
+ /* BE or LE? */
+ p = script;
+ while ((p-script) < script_size) {
+ if (*p == '\0' && *(p+wchar_size-1) != '\0') {
+ /* BE */
+ le = 0;
+ break;
+ } else if (*p != '\0' && *(p+wchar_size-1) == '\0') {
+ /* LE* */
+ le = 1;
+ break;
+ }
+ p += wchar_size;
+ }
+
+ if (wchar_size == 2) {
+ return le ? zend_multibyte_encoding_utf16le : zend_multibyte_encoding_utf16be;
+ } else {
+ return le ? zend_multibyte_encoding_utf32le : zend_multibyte_encoding_utf32be;
+ }
+
+ return NULL;
+}
+
+static const zend_encoding* zend_multibyte_detect_unicode(TSRMLS_D)
+{
+ const zend_encoding *script_encoding = NULL;
+ int bom_size;
+ unsigned char *pos1, *pos2;
+
+ if (LANG_SCNG(script_org_size) < sizeof(BOM_UTF32_LE)-1) {
+ return NULL;
+ }
+
+ /* check out BOM */
+ if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_BE, sizeof(BOM_UTF32_BE)-1)) {
+ script_encoding = zend_multibyte_encoding_utf32be;
+ bom_size = sizeof(BOM_UTF32_BE)-1;
+ } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF32_LE, sizeof(BOM_UTF32_LE)-1)) {
+ script_encoding = zend_multibyte_encoding_utf32le;
+ bom_size = sizeof(BOM_UTF32_LE)-1;
+ } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_BE, sizeof(BOM_UTF16_BE)-1)) {
+ script_encoding = zend_multibyte_encoding_utf16be;
+ bom_size = sizeof(BOM_UTF16_BE)-1;
+ } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF16_LE, sizeof(BOM_UTF16_LE)-1)) {
+ script_encoding = zend_multibyte_encoding_utf16le;
+ bom_size = sizeof(BOM_UTF16_LE)-1;
+ } else if (!memcmp(LANG_SCNG(script_org), BOM_UTF8, sizeof(BOM_UTF8)-1)) {
+ script_encoding = zend_multibyte_encoding_utf8;
+ bom_size = sizeof(BOM_UTF8)-1;
+ }
+
+ if (script_encoding) {
+ /* remove BOM */
+ LANG_SCNG(script_org) += bom_size;
+ LANG_SCNG(script_org_size) -= bom_size;
+
+ return script_encoding;
+ }
+
+ /* script contains NULL bytes -> auto-detection */
+ if ((pos1 = memchr(LANG_SCNG(script_org), 0, LANG_SCNG(script_org_size)))) {
+ /* check if the NULL byte is after the __HALT_COMPILER(); */
+ pos2 = LANG_SCNG(script_org);
+
+ while (pos1 - pos2 >= sizeof("__HALT_COMPILER();")-1) {
+ pos2 = memchr(pos2, '_', pos1 - pos2);
+ if (!pos2) break;
+ pos2++;
+ if (strncasecmp((char*)pos2, "_HALT_COMPILER", sizeof("_HALT_COMPILER")-1) == 0) {
+ pos2 += sizeof("_HALT_COMPILER")-1;
+ while (*pos2 == ' ' ||
+ *pos2 == '\t' ||
+ *pos2 == '\r' ||
+ *pos2 == '\n') {
+ pos2++;
+ }
+ if (*pos2 == '(') {
+ pos2++;
+ while (*pos2 == ' ' ||
+ *pos2 == '\t' ||
+ *pos2 == '\r' ||
+ *pos2 == '\n') {
+ pos2++;
+ }
+ if (*pos2 == ')') {
+ pos2++;
+ while (*pos2 == ' ' ||
+ *pos2 == '\t' ||
+ *pos2 == '\r' ||
+ *pos2 == '\n') {
+ pos2++;
+ }
+ if (*pos2 == ';') {
+ return NULL;
+ }
+ }
+ }
+ }
+ }
+ /* make best effort if BOM is missing */
+ return zend_multibyte_detect_utf_encoding(LANG_SCNG(script_org), LANG_SCNG(script_org_size) TSRMLS_CC);
+ }
+
+ return NULL;
+}
+
+static const zend_encoding* zend_multibyte_find_script_encoding(TSRMLS_D)
+{
+ const zend_encoding *script_encoding;
+
+ if (CG(detect_unicode)) {
+ /* check out bom(byte order mark) and see if containing wchars */
+ script_encoding = zend_multibyte_detect_unicode(TSRMLS_C);
+ if (script_encoding != NULL) {
+ /* bom or wchar detection is prior to 'script_encoding' option */
+ return script_encoding;
+ }
+ }
+
+ /* if no script_encoding specified, just leave alone */
+ if (!CG(script_encoding_list) || !CG(script_encoding_list_size)) {
+ return NULL;
+ }
+
+ /* if multiple encodings specified, detect automagically */
+ if (CG(script_encoding_list_size) > 1) {
+ return zend_multibyte_encoding_detector(LANG_SCNG(script_org), LANG_SCNG(script_org_size), CG(script_encoding_list), CG(script_encoding_list_size) TSRMLS_CC);
+ }
+
+ return CG(script_encoding_list)[0];
+}
+
+ZEND_API int zend_multibyte_set_filter(const zend_encoding *onetime_encoding TSRMLS_DC)
+{
+ const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(TSRMLS_C);
+ const zend_encoding *script_encoding = onetime_encoding ? onetime_encoding: zend_multibyte_find_script_encoding(TSRMLS_C);
+
+ if (!script_encoding) {
+ return FAILURE;
+ }
+
+ /* judge input/output filter */
+ LANG_SCNG(script_encoding) = script_encoding;
+ LANG_SCNG(input_filter) = NULL;
+ LANG_SCNG(output_filter) = NULL;
+
+ if (!internal_encoding || LANG_SCNG(script_encoding) == internal_encoding) {
+ if (!zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
+ /* and if not, work around w/ script_encoding -> utf-8 -> script_encoding conversion */
+ LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
+ LANG_SCNG(output_filter) = encoding_filter_intermediate_to_script;
+ } else {
+ LANG_SCNG(input_filter) = NULL;
+ LANG_SCNG(output_filter) = NULL;
+ }
+ return SUCCESS;
+ }
+
+ if (zend_multibyte_check_lexer_compatibility(internal_encoding)) {
+ LANG_SCNG(input_filter) = encoding_filter_script_to_internal;
+ LANG_SCNG(output_filter) = NULL;
+ } else if (zend_multibyte_check_lexer_compatibility(LANG_SCNG(script_encoding))) {
+ LANG_SCNG(input_filter) = NULL;
+ LANG_SCNG(output_filter) = encoding_filter_script_to_internal;
+ } else {
+ /* both script and internal encodings are incompatible w/ flex */
+ LANG_SCNG(input_filter) = encoding_filter_script_to_intermediate;
+ LANG_SCNG(output_filter) = encoding_filter_intermediate_to_internal;
+ }
+
+ return 0;
+}
ZEND_API int open_file_for_scanning(zend_file_handle *file_handle TSRMLS_DC)
{
- char *file_path = NULL, *buf;
+ const char *file_path = NULL;
+ char *buf;
size_t size, offset = 0;
/* The shebang line was read, get the current position to obtain the buffer start */
@@ -275,32 +497,24 @@
SCNG(yy_start) = NULL;
if (size != -1) {
-#ifdef ZEND_MULTIBYTE
- if (zend_multibyte_read_script((unsigned char *)buf, size TSRMLS_CC) != 0) {
- return FAILURE;
- }
-
- SCNG(yy_in) = NULL;
+ if (CG(multibyte)) {
+ SCNG(script_org) = (unsigned char*)buf;
+ SCNG(script_org_size) = size;
+ SCNG(script_filtered) = NULL;
zend_multibyte_set_filter(NULL TSRMLS_CC);
- if (!SCNG(input_filter)) {
- SCNG(script_filtered) = (unsigned char*)emalloc(SCNG(script_org_size)+1);
- memcpy(SCNG(script_filtered), SCNG(script_org), SCNG(script_org_size)+1);
- SCNG(script_filtered_size) = SCNG(script_org_size);
- } else {
- SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC);
- if (SCNG(script_filtered) == NULL) {
+ if (SCNG(input_filter)) {
+ if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) {
zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
- "encoding \"%s\" to a compatible encoding", LANG_SCNG(script_encoding)->name);
+ "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
}
+ buf = (char*)SCNG(script_filtered);
+ size = SCNG(script_filtered_size);
}
- SCNG(yy_start) = SCNG(script_filtered) - offset;
- yy_scan_buffer((char *)SCNG(script_filtered), SCNG(script_filtered_size) TSRMLS_CC);
-#else /* !ZEND_MULTIBYTE */
- SCNG(yy_start) = buf - offset;
+ }
+ SCNG(yy_start) = (unsigned char *)buf - offset;
yy_scan_buffer(buf, size TSRMLS_CC);
-#endif /* ZEND_MULTIBYTE */
} else {
zend_error_noreturn(E_COMPILE_ERROR, "zend_stream_mmap() failed");
}
@@ -361,6 +575,7 @@
init_op_array(op_array, ZEND_USER_FUNCTION, INITIAL_OP_ARRAY_SIZE TSRMLS_CC);
CG(in_compilation) = 1;
CG(active_op_array) = op_array;
+ zend_init_compiler_context(TSRMLS_C);
compiler_result = zendparse(TSRMLS_C);
zend_do_return(&retval_znode, 0 TSRMLS_CC);
CG(in_compilation) = original_in_compilation;
@@ -428,32 +643,44 @@
ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename TSRMLS_DC)
{
+ char *buf;
+ size_t size;
+
/* enforce two trailing NULLs for flex... */
+ if (IS_INTERNED(str->value.str.val)) {
+ char *tmp = safe_emalloc(1, str->value.str.len, ZEND_MMAP_AHEAD);
+ memcpy(tmp, str->value.str.val, str->value.str.len + ZEND_MMAP_AHEAD);
+ str->value.str.val = tmp;
+ } else {
str->value.str.val = safe_erealloc(str->value.str.val, 1, str->value.str.len, ZEND_MMAP_AHEAD);
+ }
memset(str->value.str.val + str->value.str.len, 0, ZEND_MMAP_AHEAD);
- SCNG(yy_in)=NULL;
+ SCNG(yy_in) = NULL;
SCNG(yy_start) = NULL;
-#ifdef ZEND_MULTIBYTE
- SCNG(script_org) = (unsigned char *)estrdup(str->value.str.val);
- SCNG(script_org_size) = str->value.str.len;
+ buf = str->value.str.val;
+ size = str->value.str.len;
+
+ if (CG(multibyte)) {
+ SCNG(script_org) = (unsigned char*)buf;
+ SCNG(script_org_size) = size;
+ SCNG(script_filtered) = NULL;
- zend_multibyte_set_filter(CG(internal_encoding) TSRMLS_CC);
+ zend_multibyte_set_filter(zend_multibyte_get_internal_encoding(TSRMLS_C) TSRMLS_CC);
- if (!SCNG(input_filter)) {
- SCNG(script_filtered) = (unsigned char*)emalloc(SCNG(script_org_size)+1);
- memcpy(SCNG(script_filtered), SCNG(script_org), SCNG(script_org_size)+1);
- SCNG(script_filtered_size) = SCNG(script_org_size);
- } else {
- SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC);
+ if (SCNG(input_filter)) {
+ if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) {
+ zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
+ "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
+ }
+ buf = (char*)SCNG(script_filtered);
+ size = SCNG(script_filtered_size);
+ }
}
- yy_scan_buffer((char *)SCNG(script_filtered), SCNG(script_filtered_size) TSRMLS_CC);
-#else /* !ZEND_MULTIBYTE */
- yy_scan_buffer(str->value.str.val, str->value.str.len TSRMLS_CC);
-#endif /* ZEND_MULTIBYTE */
+ yy_scan_buffer(buf, size TSRMLS_CC);
zend_set_compiled_filename(filename TSRMLS_CC);
CG(zend_lineno) = 1;
@@ -465,13 +692,12 @@
ZEND_API size_t zend_get_scanned_file_offset(TSRMLS_D)
{
size_t offset = SCNG(yy_cursor) - SCNG(yy_start);
-#ifdef ZEND_MULTIBYTE
if (SCNG(input_filter)) {
- size_t original_offset = offset, length = 0; do {
+ size_t original_offset = offset, length = 0;
+ do {
unsigned char *p = NULL;
- SCNG(input_filter)(&p, &length, SCNG(script_org), offset TSRMLS_CC);
- if (!p) {
- break;
+ if ((size_t)-1 == SCNG(input_filter)(&p, &length, SCNG(script_org), offset TSRMLS_CC)) {
+ return (size_t)-1;
}
efree(p);
if (length > original_offset) {
@@ -481,7 +707,6 @@
}
} while (original_offset != length);
}
-#endif
return offset;
}
@@ -519,19 +744,14 @@
init_op_array(op_array, ZEND_EVAL_CODE, INITIAL_OP_ARRAY_SIZE TSRMLS_CC);
CG(interactive) = orig_interactive;
CG(active_op_array) = op_array;
+ zend_init_compiler_context(TSRMLS_C);
BEGIN(ST_IN_SCRIPTING);
compiler_result = zendparse(TSRMLS_C);
-#ifdef ZEND_MULTIBYTE
- if (SCNG(script_org)) {
- efree(SCNG(script_org));
- SCNG(script_org) = NULL;
- }
if (SCNG(script_filtered)) {
efree(SCNG(script_filtered));
SCNG(script_filtered) = NULL;
}
-#endif /* ZEND_MULTIBYTE */
if (compiler_result==1) {
CG(active_op_array) = original_active_op_array;
@@ -569,16 +789,10 @@
return FAILURE;
}
zend_highlight(syntax_highlighter_ini TSRMLS_CC);
-#ifdef ZEND_MULTIBYTE
- if (SCNG(script_org)) {
- efree(SCNG(script_org));
- SCNG(script_org) = NULL;
- }
if (SCNG(script_filtered)) {
efree(SCNG(script_filtered));
SCNG(script_filtered) = NULL;
}
-#endif /* ZEND_MULTIBYTE */
zend_destroy_file_handle(&file_handle TSRMLS_CC);
zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
return SUCCESS;
@@ -598,122 +812,44 @@
}
BEGIN(INITIAL);
zend_highlight(syntax_highlighter_ini TSRMLS_CC);
-#ifdef ZEND_MULTIBYTE
- if (SCNG(script_org)) {
- efree(SCNG(script_org));
- SCNG(script_org) = NULL;
- }
if (SCNG(script_filtered)) {
efree(SCNG(script_filtered));
SCNG(script_filtered) = NULL;
}
-#endif /* ZEND_MULTIBYTE */
zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
zval_dtor(str);
return SUCCESS;
}
-END_EXTERN_C()
-#ifdef ZEND_MULTIBYTE
-
-BEGIN_EXTERN_C()
-ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, zend_encoding *old_encoding TSRMLS_DC)
+ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, const zend_encoding *old_encoding TSRMLS_DC)
{
- size_t original_offset, offset, free_flag, new_len, length;
- unsigned char *p;
-
- /* calculate current position */
- offset = original_offset = YYCURSOR - SCNG(yy_start);
- if (old_input_filter && offset > 0) {
- zend_encoding *new_encoding = SCNG(script_encoding);
- zend_encoding_filter new_filter = SCNG(input_filter);
- SCNG(script_encoding) = old_encoding;
- SCNG(input_filter) = old_input_filter;
- offset = zend_get_scanned_file_offset(TSRMLS_C);
- SCNG(script_encoding) = new_encoding;
- SCNG(input_filter) = new_filter;
- }
+ size_t length;
+ unsigned char *new_yy_start;
/* convert and set */
if (!SCNG(input_filter)) {
- length = SCNG(script_org_size) - offset;
- p = SCNG(script_org) + offset;
- free_flag = 0;
- } else {
- SCNG(input_filter)(&p, &length, SCNG(script_org) + offset, SCNG(script_org_size) - offset TSRMLS_CC);
- free_flag = 1;
- }
-
- new_len = original_offset + length;
-
- if (new_len > YYLIMIT - SCNG(yy_start)) {
- unsigned char *new_yy_start = erealloc(SCNG(yy_start), new_len);
- SCNG(yy_cursor) = new_yy_start + (SCNG(yy_cursor) - SCNG(yy_start));
- SCNG(yy_marker) = new_yy_start + (SCNG(yy_marker) - SCNG(yy_start));
- SCNG(yy_text) = new_yy_start + (SCNG(yy_text) - SCNG(yy_start));
- SCNG(yy_start) = new_yy_start;
- SCNG(script_filtered) = new_yy_start;
- SCNG(script_filtered_size) = new_len;
- }
-
- SCNG(yy_limit) = SCNG(yy_start) + new_len;
- memmove(SCNG(yy_start) + original_offset, p, length);
-
- if (free_flag) {
- efree(p);
- }
-}
-
-
-ZEND_API int zend_multibyte_yyinput(zend_file_handle *file_handle, char *buf, size_t len TSRMLS_DC)
-{
- size_t n;
-
- if (CG(interactive) == 0) {
- if (zend_stream_fixup(file_handle, &buf, &len TSRMLS_CC) == FAILURE) {
- return FAILURE;
- }
- n = len;
- return n;
- }
-
- /* interactive */
- if (SCNG(script_org)) {
- efree(SCNG(script_org));
- }
if (SCNG(script_filtered)) {
efree(SCNG(script_filtered));
+ SCNG(script_filtered) = NULL;
}
- SCNG(script_org) = NULL;
- SCNG(script_org_size) = 0;
-
- /* TODO: support widechars */
- if (zend_stream_fixup(file_handle, &buf, &len TSRMLS_CC) == FAILURE) {
- return FAILURE;
+ SCNG(script_filtered_size) = 0;
+ length = SCNG(script_org_size);
+ new_yy_start = SCNG(script_org);
+ } else {
+ if ((size_t)-1 == SCNG(input_filter)(&new_yy_start, &length, SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) {
+ zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
+ "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
}
- n = len;
-
- SCNG(script_org_size) = n;
- SCNG(script_org) = (unsigned char*)emalloc(SCNG(script_org_size) + 1);
- memcpy(SCNG(script_org), buf, n);
-
- return n;
-}
-
-
-ZEND_API int zend_multibyte_read_script(unsigned char *buf, size_t n TSRMLS_DC)
-{
- if (SCNG(script_org)) {
- efree(SCNG(script_org));
- SCNG(script_org) = NULL;
+ SCNG(script_filtered) = new_yy_start;
+ SCNG(script_filtered_size) = length;
}
- SCNG(script_org_size) = n;
- SCNG(script_org) = (unsigned char*)emalloc(SCNG(script_org_size) + 1);
- memcpy(SCNG(script_org), buf, n);
- *(SCNG(script_org)+SCNG(script_org_size)) = '\0';
+ SCNG(yy_cursor) = new_yy_start + (SCNG(yy_cursor) - SCNG(yy_start));
+ SCNG(yy_marker) = new_yy_start + (SCNG(yy_marker) - SCNG(yy_start));
+ SCNG(yy_text) = new_yy_start + (SCNG(yy_text) - SCNG(yy_start));
+ SCNG(yy_limit) = new_yy_start + (SCNG(yy_limit) - SCNG(yy_start));
- return 0;
+ SCNG(yy_start) = new_yy_start;
}
@@ -726,11 +862,6 @@
zendlval->value.str.val = (char *) estrndup(yytext, yyleng); \
zendlval->value.str.len = yyleng; \
}
-#else /* ZEND_MULTIBYTE */
-# define zend_copy_value(zendlval, yytext, yyleng) \
- zendlval->value.str.val = (char *)estrndup(yytext, yyleng); \
- zendlval->value.str.len = yyleng;
-#endif /* ZEND_MULTIBYTE */
static void zend_scan_escape_string(zval *zendlval, char *str, int len, char quote_type TSRMLS_DC)
{
@@ -834,7 +965,6 @@
s++;
}
*t = 0;
-#ifdef ZEND_MULTIBYTE
if (SCNG(output_filter)) {
size_t sz = 0;
s = zendlval->value.str.val;
@@ -842,7 +972,6 @@
zendlval->value.str.len = sz;
efree(s);
}
-#endif /* ZEND_MULTIBYTE */
}
@@ -859,6 +988,7 @@
DNUM ([0-9]*"."[0-9]+)|([0-9]+"."[0-9]*)
EXPONENT_DNUM (({LNUM}|{DNUM})[eE][+-]?{LNUM})
HNUM "0x"[0-9a-fA-F]+
+BNUM "0b"[01]+
LABEL [a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*
WHITESPACE [ \n\r\t]+
TABS_AND_SPACES [ \t]*
@@ -1006,6 +1136,10 @@
return T_INTERFACE;
}
+<ST_IN_SCRIPTING>"trait" {
+ return T_TRAIT;
+}
+
<ST_IN_SCRIPTING>"extends" {
return T_EXTENDS;
}
@@ -1072,11 +1206,7 @@
return T_DOUBLE_CAST;
}
-<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"string"{TABS_AND_SPACES}")" {
- return T_STRING_CAST;
-}
-
-<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"binary"{TABS_AND_SPACES}")" {
+<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("string"|"binary"){TABS_AND_SPACES}")" {
return T_STRING_CAST;
}
@@ -1124,6 +1254,10 @@
return T_USE;
}
+<ST_IN_SCRIPTING>"insteadof" {
+ return T_INSTEADOF;
+}
+
<ST_IN_SCRIPTING>"global" {
return T_GLOBAL;
}
@@ -1180,6 +1314,10 @@
return T_ARRAY;
}
+<ST_IN_SCRIPTING>"callable" {
+ return T_CALLABLE;
+}
+
<ST_IN_SCRIPTING>"++" {
return T_INC;
}
@@ -1326,6 +1464,26 @@
goto restart;
}
+<ST_IN_SCRIPTING>{BNUM} {
+ char *bin = yytext + 2; /* Skip "0b" */
+ int len = yyleng - 2;
+
+ /* Skip any leading 0s */
+ while (*bin == '0') {
+ ++bin;
+ --len;
+ }
+
+ if (len < SIZEOF_LONG * 8) {
+ zendlval->value.lval = strtol(bin, NULL, 2);
+ zendlval->type = IS_LONG;
+ return T_LNUMBER;
+ } else {
+ zendlval->value.dval = zend_bin_strtod(bin, NULL);
+ zendlval->type = IS_DOUBLE;
+ return T_DNUMBER;
+ }
+}
<ST_IN_SCRIPTING>{LNUM} {
if (yyleng < MAX_LENGTH_OF_LONG - 1) { /* Won't overflow */
@@ -1381,7 +1539,7 @@
return T_NUM_STRING;
}
-<ST_VAR_OFFSET>{LNUM}|{HNUM} { /* Offset must be treated as a string */
+<ST_VAR_OFFSET>{LNUM}|{HNUM}|{BNUM} { /* Offset must be treated as a string */
zendlval->value.str.val = (char *)estrndup(yytext, yyleng);
zendlval->value.str.len = yyleng;
zendlval->type = IS_STRING;
@@ -1395,8 +1553,17 @@
}
<ST_IN_SCRIPTING>"__CLASS__" {
- char *class_name = NULL;
+ const char *class_name = NULL;
+ if (CG(active_class_entry)
+ && (ZEND_ACC_TRAIT ==
+ (CG(active_class_entry)->ce_flags & ZEND_ACC_TRAIT))) {
+ // This is a hack, we abuse IS_NULL to indicate an invalid value
+ // if __CLASS__ is encountered in a trait, however, we also not that we
+ // should fix it up when we copy the method into an actual class
+ zendlval->value.lval = ZEND_ACC_TRAIT;
+ zendlval->type = IS_NULL;
+ } else {
if (CG(active_class_entry)) {
class_name = CG(active_class_entry)->name;
}
@@ -1404,14 +1571,36 @@
if (!class_name) {
class_name = "";
}
+
zendlval->value.str.len = strlen(class_name);
zendlval->value.str.val = estrndup(class_name, zendlval->value.str.len);
zendlval->type = IS_STRING;
+ }
return T_CLASS_C;
}
+<ST_IN_SCRIPTING>"__TRAIT__" {
+ const char *trait_name = NULL;
+
+ if (CG(active_class_entry)
+ && (ZEND_ACC_TRAIT ==
+ (CG(active_class_entry)->ce_flags & ZEND_ACC_TRAIT))) {
+ trait_name = CG(active_class_entry)->name;
+ }
+
+ if (!trait_name) {
+ trait_name = "";
+ }
+
+ zendlval->value.str.len = strlen(trait_name);
+ zendlval->value.str.val = estrndup(trait_name, zendlval->value.str.len);
+ zendlval->type = IS_STRING;
+
+ return T_TRAIT_C;
+}
+
<ST_IN_SCRIPTING>"__FUNCTION__" {
- char *func_name = NULL;
+ const char *func_name = NULL;
if (CG(active_op_array)) {
func_name = CG(active_op_array)->function_name;
@@ -1427,8 +1616,8 @@
}
<ST_IN_SCRIPTING>"__METHOD__" {
- char *class_name = CG(active_class_entry) ? CG(active_class_entry)->name : NULL;
- char *func_name = CG(active_op_array)? CG(active_op_array)->function_name : NULL;
+ const char *class_name = CG(active_class_entry) ? CG(active_class_entry)->name : NULL;
+ const char *func_name = CG(active_op_array)? CG(active_op_array)->function_name : NULL;
size_t len = 0;
if (class_name) {
@@ -1503,7 +1692,7 @@
}
<INITIAL>"<script"{WHITESPACE}+"language"{WHITESPACE}*"="{WHITESPACE}*("php"|"\"php\""|"'php'"){WHITESPACE}*">" {
- YYCTYPE *bracket = zend_memrchr(yytext, '<', yyleng - (sizeof("script language=php>") - 1));
+ YYCTYPE *bracket = (YYCTYPE*)zend_memrchr(yytext, '<', yyleng - (sizeof("script language=php>") - 1));
if (bracket != SCNG(yy_text)) {
/* Handle previously scanned HTML, as possible <script> tags found are assumed to not be PHP's */
@@ -1534,15 +1723,11 @@
<INITIAL>"<?=" {
- if (CG(short_tags)) {
zendlval->value.str.val = yytext; /* no copying - intentional */
zendlval->value.str.len = yyleng;
zendlval->type = IS_STRING;
BEGIN(ST_IN_SCRIPTING);
return T_OPEN_TAG_WITH_ECHO;
- } else {
- goto inline_char_handler;
- }
}
@@ -1596,7 +1781,7 @@
if (YYCURSOR < YYLIMIT) {
switch (*YYCURSOR) {
case '?':
- if (CG(short_tags) || !strncasecmp(YYCURSOR + 1, "php", 3)) { /* Assume [ \t\n\r] follows "php" */
+ if (CG(short_tags) || !strncasecmp((char*)YYCURSOR + 1, "php", 3) || (*(YYCURSOR + 1) == '=')) { /* Assume [ \t\n\r] follows "php" */
break;
}
continue;
@@ -1624,7 +1809,6 @@
inline_html:
yyleng = YYCURSOR - SCNG(yy_text);
-#ifdef ZEND_MULTIBYTE
if (SCNG(output_filter)) {
int readsize;
size_t sz = 0;
@@ -1637,10 +1821,6 @@
zendlval->value.str.val = (char *) estrndup(yytext, yyleng);
zendlval->value.str.len = yyleng;
}
-#else /* !ZEND_MULTIBYTE */
- zendlval->value.str.val = (char *) estrndup(yytext, yyleng);
- zendlval->value.str.len = yyleng;
-#endif
zendlval->type = IS_STRING;
HANDLE_NEWLINES(yytext, yyleng);
return T_INLINE_HTML;
@@ -1847,7 +2027,6 @@
}
*t = 0;
-#ifdef ZEND_MULTIBYTE
if (SCNG(output_filter)) {
size_t sz = 0;
s = zendlval->value.str.val;
@@ -1855,7 +2034,6 @@
zendlval->value.str.len = sz;
efree(s);
}
-#endif /* ZEND_MULTIBYTE */
return T_CONSTANT_ENCAPSED_STRING;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment