From de9857f75d8f9d0047d33e724cda04d74a182140 Mon Sep 17 00:00:00 2001 From: Aliaksandr Bystry Date: Wed, 15 Sep 2021 00:52:44 +0200 Subject: [PATCH 1/3] Fix #70962: xml_parse_into_struct strips embedded whitespace with XML_OPTION_SKIP_WHITE. --- ext/xml/tests/bug70962.phpt | 33 +++++++++ ext/xml/xml.c | 141 ++++++++++++++++++------------------ 2 files changed, 105 insertions(+), 69 deletions(-) create mode 100644 ext/xml/tests/bug70962.phpt diff --git a/ext/xml/tests/bug70962.phpt b/ext/xml/tests/bug70962.phpt new file mode 100644 index 0000000000000..c0e976e69d0cf --- /dev/null +++ b/ext/xml/tests/bug70962.phpt @@ -0,0 +1,33 @@ +--TEST-- +Bug #70962: xml_parse_into_struct strips embedded whitespace with XML_OPTION_SKIP_WHITE +--SKIPIF-- + +--FILE-- +<d>\n <e>\n \t"; + +$parsed = parseAndOutput($xml); + +// Check embedded whitespace is not getting skipped. +echo $parsed[1]['value'] . "\n"; + +// Check XML_OPTION_SKIP_WHITE ignores values of tags containing whitespace characters only. +var_dump(isset($parsed[2]['value'])); + +?> +--EXPECT-- + + +bool(false) \ No newline at end of file diff --git a/ext/xml/xml.c b/ext/xml/xml.c index fd8aebe03a524..45fd06971d626 100644 --- a/ext/xml/xml.c +++ b/ext/xml/xml.c @@ -881,78 +881,81 @@ void _xml_characterDataHandler(void *userData, const XML_Char *s, int len) } if (!Z_ISUNDEF(parser->data)) { - size_t i; - int doprint = 0; zend_string *decoded_value; decoded_value = xml_utf8_decode(s, len, parser->target_encoding); - for (i = 0; i < ZSTR_LEN(decoded_value); i++) { - switch (ZSTR_VAL(decoded_value)[i]) { - case ' ': - case '\t': - case '\n': - continue; - default: - doprint = 1; - break; - } - if (doprint) { - break; - } - } - if (doprint || (! parser->skipwhite)) { - if (parser->lastwasopen) { - zval *myval; - - /* check if the current tag already has a value - if yes append to that! */ - if ((myval = zend_hash_str_find(Z_ARRVAL_P(parser->ctag), "value", sizeof("value") - 1))) { - int newlen = Z_STRLEN_P(myval) + ZSTR_LEN(decoded_value); - Z_STR_P(myval) = zend_string_extend(Z_STR_P(myval), newlen, 0); - strncpy(Z_STRVAL_P(myval) + Z_STRLEN_P(myval) - ZSTR_LEN(decoded_value), - ZSTR_VAL(decoded_value), ZSTR_LEN(decoded_value) + 1); - zend_string_release_ex(decoded_value, 0); - } else { - add_assoc_str(parser->ctag, "value", decoded_value); - } - - } else { - zval tag; - zval *curtag, *mytype, *myval; - - ZEND_HASH_REVERSE_FOREACH_VAL(Z_ARRVAL(parser->data), curtag) { - if ((mytype = zend_hash_str_find(Z_ARRVAL_P(curtag),"type", sizeof("type") - 1))) { - if (!strcmp(Z_STRVAL_P(mytype), "cdata")) { - if ((myval = zend_hash_str_find(Z_ARRVAL_P(curtag), "value", sizeof("value") - 1))) { - int newlen = Z_STRLEN_P(myval) + ZSTR_LEN(decoded_value); - Z_STR_P(myval) = zend_string_extend(Z_STR_P(myval), newlen, 0); - strncpy(Z_STRVAL_P(myval) + Z_STRLEN_P(myval) - ZSTR_LEN(decoded_value), - ZSTR_VAL(decoded_value), ZSTR_LEN(decoded_value) + 1); - zend_string_release_ex(decoded_value, 0); - return; - } - } - } - break; - } ZEND_HASH_FOREACH_END(); - - if (parser->level <= XML_MAXLEVEL && parser->level > 0) { - array_init(&tag); - - _xml_add_to_info(parser,SKIP_TAGSTART(parser->ltags[parser->level-1])); - - add_assoc_string(&tag, "tag", SKIP_TAGSTART(parser->ltags[parser->level-1])); - add_assoc_str(&tag, "value", decoded_value); - add_assoc_string(&tag, "type", "cdata"); - add_assoc_long(&tag, "level", parser->level); - - zend_hash_next_index_insert(Z_ARRVAL(parser->data), &tag); - } else if (parser->level == (XML_MAXLEVEL + 1)) { - php_error_docref(NULL, E_WARNING, "Maximum depth exceeded - Results truncated"); - } - } - } else { - zend_string_release_ex(decoded_value, 0); - } + if (parser->lastwasopen) { + zval *myval; + + /* check if the current tag already has a value - if yes append to that! */ + if ((myval = zend_hash_str_find(Z_ARRVAL_P(parser->ctag), "value", sizeof("value") - 1))) { + int newlen = Z_STRLEN_P(myval) + ZSTR_LEN(decoded_value); + Z_STR_P(myval) = zend_string_extend(Z_STR_P(myval), newlen, 0); + strncpy(Z_STRVAL_P(myval) + Z_STRLEN_P(myval) - ZSTR_LEN(decoded_value), + ZSTR_VAL(decoded_value), ZSTR_LEN(decoded_value) + 1); + zend_string_release_ex(decoded_value, 0); + } else { + size_t i; + int doprint = 0; + if (parser->skipwhite) { + for (i = 0; i < ZSTR_LEN(decoded_value); i++) { + switch (ZSTR_VAL(decoded_value)[i]) { + case ' ': + case '\t': + case '\n': + continue; + default: + doprint = 1; + break; + } + if (doprint) { + break; + } + } + } + + if (doprint || !parser->skipwhite) { + add_assoc_str(parser->ctag, "value", decoded_value); + } else { + zend_string_release_ex(decoded_value, 0); + } + } + + } else { + zval tag; + zval *curtag, *mytype, *myval; + + ZEND_HASH_REVERSE_FOREACH_VAL(Z_ARRVAL(parser->data), curtag) { + if ((mytype = zend_hash_str_find(Z_ARRVAL_P(curtag),"type", sizeof("type") - 1))) { + if (!strcmp(Z_STRVAL_P(mytype), "cdata")) { + if ((myval = zend_hash_str_find(Z_ARRVAL_P(curtag), "value", sizeof("value") - 1))) { + int newlen = Z_STRLEN_P(myval) + ZSTR_LEN(decoded_value); + Z_STR_P(myval) = zend_string_extend(Z_STR_P(myval), newlen, 0); + strncpy(Z_STRVAL_P(myval) + Z_STRLEN_P(myval) - ZSTR_LEN(decoded_value), + ZSTR_VAL(decoded_value), ZSTR_LEN(decoded_value) + 1); + zend_string_release_ex(decoded_value, 0); + return; + } + } + } + break; + } ZEND_HASH_FOREACH_END(); + + if (parser->level <= XML_MAXLEVEL && parser->level > 0) { + array_init(&tag); + + _xml_add_to_info(parser,SKIP_TAGSTART(parser->ltags[parser->level-1])); + + add_assoc_string(&tag, "tag", SKIP_TAGSTART(parser->ltags[parser->level-1])); + add_assoc_str(&tag, "value", decoded_value); + add_assoc_string(&tag, "type", "cdata"); + add_assoc_long(&tag, "level", parser->level); + + zend_hash_next_index_insert(Z_ARRVAL(parser->data), &tag); + } else if (parser->level == (XML_MAXLEVEL + 1)) { + php_error_docref(NULL, E_WARNING, "Maximum depth exceeded - Results truncated"); + } + } } } } From 12c2f34be2509eb79b09e9b131261e80cd4bb5fc Mon Sep 17 00:00:00 2001 From: Aliaksandr Bystry Date: Wed, 15 Sep 2021 15:19:47 +0200 Subject: [PATCH 2/3] Fix according to PR comments. --- ext/xml/tests/bug70962.phpt | 2 +- ext/xml/xml.c | 144 ++++++++++++++++++------------------ 2 files changed, 73 insertions(+), 73 deletions(-) diff --git a/ext/xml/tests/bug70962.phpt b/ext/xml/tests/bug70962.phpt index c0e976e69d0cf..f22a866ef30a4 100644 --- a/ext/xml/tests/bug70962.phpt +++ b/ext/xml/tests/bug70962.phpt @@ -30,4 +30,4 @@ var_dump(isset($parsed[2]['value'])); --EXPECT-- -bool(false) \ No newline at end of file +bool(false) diff --git a/ext/xml/xml.c b/ext/xml/xml.c index 45fd06971d626..d654a2230a111 100644 --- a/ext/xml/xml.c +++ b/ext/xml/xml.c @@ -884,78 +884,78 @@ void _xml_characterDataHandler(void *userData, const XML_Char *s, int len) zend_string *decoded_value; decoded_value = xml_utf8_decode(s, len, parser->target_encoding); - if (parser->lastwasopen) { - zval *myval; - - /* check if the current tag already has a value - if yes append to that! */ - if ((myval = zend_hash_str_find(Z_ARRVAL_P(parser->ctag), "value", sizeof("value") - 1))) { - int newlen = Z_STRLEN_P(myval) + ZSTR_LEN(decoded_value); - Z_STR_P(myval) = zend_string_extend(Z_STR_P(myval), newlen, 0); - strncpy(Z_STRVAL_P(myval) + Z_STRLEN_P(myval) - ZSTR_LEN(decoded_value), - ZSTR_VAL(decoded_value), ZSTR_LEN(decoded_value) + 1); - zend_string_release_ex(decoded_value, 0); - } else { - size_t i; - int doprint = 0; - if (parser->skipwhite) { - for (i = 0; i < ZSTR_LEN(decoded_value); i++) { - switch (ZSTR_VAL(decoded_value)[i]) { - case ' ': - case '\t': - case '\n': - continue; - default: - doprint = 1; - break; - } - if (doprint) { - break; - } - } - } - - if (doprint || !parser->skipwhite) { - add_assoc_str(parser->ctag, "value", decoded_value); - } else { - zend_string_release_ex(decoded_value, 0); - } - } - - } else { - zval tag; - zval *curtag, *mytype, *myval; - - ZEND_HASH_REVERSE_FOREACH_VAL(Z_ARRVAL(parser->data), curtag) { - if ((mytype = zend_hash_str_find(Z_ARRVAL_P(curtag),"type", sizeof("type") - 1))) { - if (!strcmp(Z_STRVAL_P(mytype), "cdata")) { - if ((myval = zend_hash_str_find(Z_ARRVAL_P(curtag), "value", sizeof("value") - 1))) { - int newlen = Z_STRLEN_P(myval) + ZSTR_LEN(decoded_value); - Z_STR_P(myval) = zend_string_extend(Z_STR_P(myval), newlen, 0); - strncpy(Z_STRVAL_P(myval) + Z_STRLEN_P(myval) - ZSTR_LEN(decoded_value), - ZSTR_VAL(decoded_value), ZSTR_LEN(decoded_value) + 1); - zend_string_release_ex(decoded_value, 0); - return; - } - } - } - break; - } ZEND_HASH_FOREACH_END(); - - if (parser->level <= XML_MAXLEVEL && parser->level > 0) { - array_init(&tag); - - _xml_add_to_info(parser,SKIP_TAGSTART(parser->ltags[parser->level-1])); - - add_assoc_string(&tag, "tag", SKIP_TAGSTART(parser->ltags[parser->level-1])); - add_assoc_str(&tag, "value", decoded_value); - add_assoc_string(&tag, "type", "cdata"); - add_assoc_long(&tag, "level", parser->level); - - zend_hash_next_index_insert(Z_ARRVAL(parser->data), &tag); - } else if (parser->level == (XML_MAXLEVEL + 1)) { - php_error_docref(NULL, E_WARNING, "Maximum depth exceeded - Results truncated"); - } - } + if (parser->lastwasopen) { + zval *myval; + + /* check if the current tag already has a value - if yes append to that! */ + if ((myval = zend_hash_str_find(Z_ARRVAL_P(parser->ctag), "value", sizeof("value") - 1))) { + size_t newlen = Z_STRLEN_P(myval) + ZSTR_LEN(decoded_value); + Z_STR_P(myval) = zend_string_extend(Z_STR_P(myval), newlen, 0); + strncpy(Z_STRVAL_P(myval) + Z_STRLEN_P(myval) - ZSTR_LEN(decoded_value), + ZSTR_VAL(decoded_value), ZSTR_LEN(decoded_value) + 1); + zend_string_release_ex(decoded_value, 0); + } else { + size_t i; + int doprint = 0; + if (parser->skipwhite) { + for (i = 0; i < ZSTR_LEN(decoded_value); i++) { + switch (ZSTR_VAL(decoded_value)[i]) { + case ' ': + case '\t': + case '\n': + continue; + default: + doprint = 1; + break; + } + if (doprint) { + break; + } + } + } + + if (doprint || !parser->skipwhite) { + add_assoc_str(parser->ctag, "value", decoded_value); + } else { + zend_string_release_ex(decoded_value, 0); + } + } + + } else { + zval tag; + zval *curtag, *mytype, *myval; + + ZEND_HASH_REVERSE_FOREACH_VAL(Z_ARRVAL(parser->data), curtag) { + if ((mytype = zend_hash_str_find(Z_ARRVAL_P(curtag),"type", sizeof("type") - 1))) { + if (!strcmp(Z_STRVAL_P(mytype), "cdata")) { + if ((myval = zend_hash_str_find(Z_ARRVAL_P(curtag), "value", sizeof("value") - 1))) { + int newlen = Z_STRLEN_P(myval) + ZSTR_LEN(decoded_value); + Z_STR_P(myval) = zend_string_extend(Z_STR_P(myval), newlen, 0); + strncpy(Z_STRVAL_P(myval) + Z_STRLEN_P(myval) - ZSTR_LEN(decoded_value), + ZSTR_VAL(decoded_value), ZSTR_LEN(decoded_value) + 1); + zend_string_release_ex(decoded_value, 0); + return; + } + } + } + break; + } ZEND_HASH_FOREACH_END(); + + if (parser->level <= XML_MAXLEVEL && parser->level > 0) { + array_init(&tag); + + _xml_add_to_info(parser,SKIP_TAGSTART(parser->ltags[parser->level-1])); + + add_assoc_string(&tag, "tag", SKIP_TAGSTART(parser->ltags[parser->level-1])); + add_assoc_str(&tag, "value", decoded_value); + add_assoc_string(&tag, "type", "cdata"); + add_assoc_long(&tag, "level", parser->level); + + zend_hash_next_index_insert(Z_ARRVAL(parser->data), &tag); + } else if (parser->level == (XML_MAXLEVEL + 1)) { + php_error_docref(NULL, E_WARNING, "Maximum depth exceeded - Results truncated"); + } + } } } } From 75a2bcdde36e6099624a1531c7e9930d6d0a2f36 Mon Sep 17 00:00:00 2001 From: Aliaksandr Bystry Date: Wed, 15 Sep 2021 19:50:16 +0200 Subject: [PATCH 3/3] Ignore empty "cdata" elements. --- ext/xml/tests/bug70962.phpt | 6 ++++- ext/xml/xml.c | 46 +++++++++++++++++++------------------ 2 files changed, 29 insertions(+), 23 deletions(-) diff --git a/ext/xml/tests/bug70962.phpt b/ext/xml/tests/bug70962.phpt index f22a866ef30a4..46a6d5dfca1b2 100644 --- a/ext/xml/tests/bug70962.phpt +++ b/ext/xml/tests/bug70962.phpt @@ -16,7 +16,7 @@ function parseAndOutput($xml) return $values; } -$xml = "<d>\n <e>\n \t"; +$xml = "<d>\n <e>\n \t"; $parsed = parseAndOutput($xml); @@ -26,8 +26,12 @@ echo $parsed[1]['value'] . "\n"; // Check XML_OPTION_SKIP_WHITE ignores values of tags containing whitespace characters only. var_dump(isset($parsed[2]['value'])); +// Check XML_OPTION_SKIP_WHITE ignores empty values. +var_dump(count($parsed)); + ?> --EXPECT-- bool(false) +int(4) diff --git a/ext/xml/xml.c b/ext/xml/xml.c index d654a2230a111..84e9bc2b96506 100644 --- a/ext/xml/xml.c +++ b/ext/xml/xml.c @@ -881,9 +881,28 @@ void _xml_characterDataHandler(void *userData, const XML_Char *s, int len) } if (!Z_ISUNDEF(parser->data)) { + size_t i; + int doprint = 0; zend_string *decoded_value; decoded_value = xml_utf8_decode(s, len, parser->target_encoding); + if (parser->skipwhite) { + for (i = 0; i < ZSTR_LEN(decoded_value); i++) { + switch (ZSTR_VAL(decoded_value)[i]) { + case ' ': + case '\t': + case '\n': + continue; + default: + doprint = 1; + break; + } + if (doprint) { + break; + } + } + } + if (parser->lastwasopen) { zval *myval; @@ -895,26 +914,7 @@ void _xml_characterDataHandler(void *userData, const XML_Char *s, int len) ZSTR_VAL(decoded_value), ZSTR_LEN(decoded_value) + 1); zend_string_release_ex(decoded_value, 0); } else { - size_t i; - int doprint = 0; - if (parser->skipwhite) { - for (i = 0; i < ZSTR_LEN(decoded_value); i++) { - switch (ZSTR_VAL(decoded_value)[i]) { - case ' ': - case '\t': - case '\n': - continue; - default: - doprint = 1; - break; - } - if (doprint) { - break; - } - } - } - - if (doprint || !parser->skipwhite) { + if (doprint || (! parser->skipwhite)) { add_assoc_str(parser->ctag, "value", decoded_value); } else { zend_string_release_ex(decoded_value, 0); @@ -941,7 +941,7 @@ void _xml_characterDataHandler(void *userData, const XML_Char *s, int len) break; } ZEND_HASH_FOREACH_END(); - if (parser->level <= XML_MAXLEVEL && parser->level > 0) { + if (parser->level <= XML_MAXLEVEL && parser->level > 0 && (doprint || (! parser->skipwhite))) { array_init(&tag); _xml_add_to_info(parser,SKIP_TAGSTART(parser->ltags[parser->level-1])); @@ -953,7 +953,9 @@ void _xml_characterDataHandler(void *userData, const XML_Char *s, int len) zend_hash_next_index_insert(Z_ARRVAL(parser->data), &tag); } else if (parser->level == (XML_MAXLEVEL + 1)) { - php_error_docref(NULL, E_WARNING, "Maximum depth exceeded - Results truncated"); + php_error_docref(NULL, E_WARNING, "Maximum depth exceeded - Results truncated"); + } else { + zend_string_release_ex(decoded_value, 0); } } }