Skip to content

Commit 4a2aa99

Browse files
committed
HTML API: Fix CDATA lookalike matching invalid CDATA
When `next_token()` was introduced to the HTML Tag Processor, it started classifying comments that look like they were intended to be CDATA sections. In one of the changes made during development, however, a typo slipped through code review that treated comments as CDATA even if they only ended in `]>` and not the required `]]>`. The consequences of this defect were minor because in all cases these are treated as HTML comments from invalid syntax, but this patch adds the missing check to ensure the proper reporting of CDATA-lookalikes. Follow-up to [57348] Props jonsurrell Fixes #60406 git-svn-id: https://develop.svn.wordpress.org/trunk@57506 602fd350-edb4-49c9-b593-d223f7449a82
1 parent 5e33f4b commit 4a2aa99

File tree

2 files changed

+40
-1
lines changed

2 files changed

+40
-1
lines changed

src/wp-includes/html-api/class-wp-html-tag-processor.php

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1762,7 +1762,8 @@ private function parse_next_tag() {
17621762
'T' === $html[ $this->token_starts_at + 6 ] &&
17631763
'A' === $html[ $this->token_starts_at + 7 ] &&
17641764
'[' === $html[ $this->token_starts_at + 8 ] &&
1765-
']' === $html[ $closer_at - 1 ]
1765+
']' === $html[ $closer_at - 1 ] &&
1766+
']' === $html[ $closer_at - 2 ]
17661767
) {
17671768
$this->parser_state = self::STATE_COMMENT;
17681769
$this->comment_type = self::COMMENT_AS_CDATA_LOOKALIKE;

tests/phpunit/tests/html-api/wpHtmlTagProcessor-token-scanning.php

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,38 @@ public function test_basic_assertion_cdata_section() {
347347
);
348348
}
349349

350+
/**
351+
* Ensures that normative CDATA sections are properly parsed.
352+
*
353+
* @ticket 60406
354+
*
355+
* @since 6.5.0
356+
*
357+
* @covers WP_HTML_Tag_Processor::next_token
358+
*/
359+
public function test_cdata_comment_with_incorrect_closer() {
360+
$processor = new WP_HTML_Tag_Processor( '<![CDATA[this is missing a closing square bracket]>' );
361+
$processor->next_token();
362+
363+
$this->assertSame(
364+
'#comment',
365+
$processor->get_token_name(),
366+
"Should have found comment token but found {$processor->get_token_name()} instead."
367+
);
368+
369+
$this->assertSame(
370+
WP_HTML_Processor::COMMENT_AS_INVALID_HTML,
371+
$processor->get_comment_type(),
372+
'Should have detected invalid HTML comment.'
373+
);
374+
375+
$this->assertSame(
376+
'[CDATA[this is missing a closing square bracket]',
377+
$processor->get_modifiable_text(),
378+
'Found incorrect modifiable text.'
379+
);
380+
}
381+
350382
/**
351383
* Ensures that abruptly-closed CDATA sections are properly parsed as comments.
352384
*
@@ -366,6 +398,12 @@ public function test_basic_assertion_abruptly_closed_cdata_section() {
366398
"Should have found a bogus comment but found {$processor->get_token_name()} instead."
367399
);
368400

401+
$this->assertSame(
402+
WP_HTML_Processor::COMMENT_AS_INVALID_HTML,
403+
$processor->get_comment_type(),
404+
'Should have detected invalid HTML comment.'
405+
);
406+
369407
$this->assertNull(
370408
$processor->get_tag(),
371409
'Should not have been able to query tag name on non-element token.'

0 commit comments

Comments
 (0)