From 75fc2c4e684a81731f18147635862a73609233e1 Mon Sep 17 00:00:00 2001 From: "Christoph M. Becker" Date: Wed, 23 Jun 2021 15:48:48 +0200 Subject: [PATCH 1/4] Fix #74264: grapheme_sttrpos() broken for negative offsets We must not assume that `usearch_last()` gives the proper result for negative offsets. Instead we'd need to continue to search backwards (`usearch_previous`) until we find a proper match. However, apparently searching backwards is broken, so we work around by searching forward from the start of the string until we pass the `offset_pos`, and then use the previous result. --- ext/intl/grapheme/grapheme_util.c | 24 ++++++++++++++++++------ ext/intl/tests/bug74264.phpt | 26 ++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 6 deletions(-) create mode 100644 ext/intl/tests/bug74264.phpt diff --git a/ext/intl/grapheme/grapheme_util.c b/ext/intl/grapheme/grapheme_util.c index 5bc23be2093b6..926614deb768e 100644 --- a/ext/intl/grapheme/grapheme_util.c +++ b/ext/intl/grapheme/grapheme_util.c @@ -133,7 +133,7 @@ void grapheme_substr_ascii(char *str, size_t str_len, int32_t f, int32_t l, char int32_t grapheme_strpos_utf16(char *haystack, size_t haystack_len, char *needle, size_t needle_len, int32_t offset, int32_t *puchar_pos, int f_ignore_case, int last) { UChar *uhaystack = NULL, *uneedle = NULL; - int32_t uhaystack_len = 0, uneedle_len = 0, char_pos, ret_pos, offset_pos = 0; + int32_t uhaystack_len = 0, uneedle_len = 0, char_pos, ret_pos, offset_pos = 0, prev_pos = USEARCH_DONE; unsigned char u_break_iterator_buffer[U_BRK_SAFECLONE_BUFFERSIZE]; UBreakIterator* bi = NULL; UErrorCode status; @@ -179,16 +179,28 @@ int32_t grapheme_strpos_utf16(char *haystack, size_t haystack_len, char *needle, STRPOS_CHECK_STATUS(status, "Invalid search offset"); } status = U_ZERO_ERROR; - usearch_setOffset(src, offset_pos, &status); + usearch_setOffset(src, last ? 0 : offset_pos, &status); STRPOS_CHECK_STATUS(status, "Invalid search offset"); } if(last) { - char_pos = usearch_last(src, &status); - if(char_pos < offset_pos) { - /* last one is beyound our start offset */ - char_pos = USEARCH_DONE; + if (offset >= 0) { + char_pos = usearch_last(src, &status); + if(char_pos < offset_pos) { + /* last one is beyond our start offset */ + char_pos = USEARCH_DONE; + } + } else { + /* searching backwards is broken, so we search forwards, albeit it's less efficient */ + do { + char_pos = usearch_next(src, &status); + if (char_pos == USEARCH_DONE || char_pos > offset_pos) { + char_pos = prev_pos; + break; + } + prev_pos = char_pos; + } while(1); } } else { char_pos = usearch_next(src, &status); diff --git a/ext/intl/tests/bug74264.phpt b/ext/intl/tests/bug74264.phpt new file mode 100644 index 0000000000000..13826cb4b0949 --- /dev/null +++ b/ext/intl/tests/bug74264.phpt @@ -0,0 +1,26 @@ +--TEST-- +Bug #74264 (grapheme_sttrpos() broken for negative offsets) +--SKIPIF-- + +--FILE-- + +--EXPECT-- +bool(false) +bool(false) +int(3) +int(3) +int(4) +int(4) +int(5) +int(5) +int(6) +int(6) From 7f75d59f5257cf52bba57396a4dc0e813bdfc1b0 Mon Sep 17 00:00:00 2001 From: "Christoph M. Becker" Date: Mon, 5 Jul 2021 14:41:34 +0200 Subject: [PATCH 2/4] Search backwards using overlap flag --- ext/intl/grapheme/grapheme_util.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/ext/intl/grapheme/grapheme_util.c b/ext/intl/grapheme/grapheme_util.c index 926614deb768e..25468bf74beac 100644 --- a/ext/intl/grapheme/grapheme_util.c +++ b/ext/intl/grapheme/grapheme_util.c @@ -179,7 +179,7 @@ int32_t grapheme_strpos_utf16(char *haystack, size_t haystack_len, char *needle, STRPOS_CHECK_STATUS(status, "Invalid search offset"); } status = U_ZERO_ERROR; - usearch_setOffset(src, last ? 0 : offset_pos, &status); + usearch_setOffset(src, offset_pos + (last && offset < 0), &status); STRPOS_CHECK_STATUS(status, "Invalid search offset"); } @@ -192,15 +192,9 @@ int32_t grapheme_strpos_utf16(char *haystack, size_t haystack_len, char *needle, char_pos = USEARCH_DONE; } } else { - /* searching backwards is broken, so we search forwards, albeit it's less efficient */ - do { - char_pos = usearch_next(src, &status); - if (char_pos == USEARCH_DONE || char_pos > offset_pos) { - char_pos = prev_pos; - break; - } - prev_pos = char_pos; - } while(1); + usearch_setAttribute(src, USEARCH_OVERLAP, USEARCH_ON, &status); + STRPOS_CHECK_STATUS(status, "Error setting overlap flag"); + char_pos = usearch_previous(src, &status); } } else { char_pos = usearch_next(src, &status); From ec6ffc440dfb7f40c78d09f706aa5e8f601faa89 Mon Sep 17 00:00:00 2001 From: "Christoph M. Becker" Date: Mon, 5 Jul 2021 15:52:05 +0200 Subject: [PATCH 3/4] Revert "Search backwards using overlap flag" This reverts commit 7f75d59f5257cf52bba57396a4dc0e813bdfc1b0. --- ext/intl/grapheme/grapheme_util.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/ext/intl/grapheme/grapheme_util.c b/ext/intl/grapheme/grapheme_util.c index 25468bf74beac..926614deb768e 100644 --- a/ext/intl/grapheme/grapheme_util.c +++ b/ext/intl/grapheme/grapheme_util.c @@ -179,7 +179,7 @@ int32_t grapheme_strpos_utf16(char *haystack, size_t haystack_len, char *needle, STRPOS_CHECK_STATUS(status, "Invalid search offset"); } status = U_ZERO_ERROR; - usearch_setOffset(src, offset_pos + (last && offset < 0), &status); + usearch_setOffset(src, last ? 0 : offset_pos, &status); STRPOS_CHECK_STATUS(status, "Invalid search offset"); } @@ -192,9 +192,15 @@ int32_t grapheme_strpos_utf16(char *haystack, size_t haystack_len, char *needle, char_pos = USEARCH_DONE; } } else { - usearch_setAttribute(src, USEARCH_OVERLAP, USEARCH_ON, &status); - STRPOS_CHECK_STATUS(status, "Error setting overlap flag"); - char_pos = usearch_previous(src, &status); + /* searching backwards is broken, so we search forwards, albeit it's less efficient */ + do { + char_pos = usearch_next(src, &status); + if (char_pos == USEARCH_DONE || char_pos > offset_pos) { + char_pos = prev_pos; + break; + } + prev_pos = char_pos; + } while(1); } } else { char_pos = usearch_next(src, &status); From 56681033f39fe5e56f429bff3f4d74c5a3cf7cd5 Mon Sep 17 00:00:00 2001 From: "Christoph M. Becker" Date: Mon, 5 Jul 2021 16:04:14 +0200 Subject: [PATCH 4/4] Move prev_pos declaration into narrower scope --- ext/intl/grapheme/grapheme_util.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ext/intl/grapheme/grapheme_util.c b/ext/intl/grapheme/grapheme_util.c index 926614deb768e..8a21fa51ee4a1 100644 --- a/ext/intl/grapheme/grapheme_util.c +++ b/ext/intl/grapheme/grapheme_util.c @@ -133,7 +133,7 @@ void grapheme_substr_ascii(char *str, size_t str_len, int32_t f, int32_t l, char int32_t grapheme_strpos_utf16(char *haystack, size_t haystack_len, char *needle, size_t needle_len, int32_t offset, int32_t *puchar_pos, int f_ignore_case, int last) { UChar *uhaystack = NULL, *uneedle = NULL; - int32_t uhaystack_len = 0, uneedle_len = 0, char_pos, ret_pos, offset_pos = 0, prev_pos = USEARCH_DONE; + int32_t uhaystack_len = 0, uneedle_len = 0, char_pos, ret_pos, offset_pos = 0; unsigned char u_break_iterator_buffer[U_BRK_SAFECLONE_BUFFERSIZE]; UBreakIterator* bi = NULL; UErrorCode status; @@ -193,6 +193,7 @@ int32_t grapheme_strpos_utf16(char *haystack, size_t haystack_len, char *needle, } } else { /* searching backwards is broken, so we search forwards, albeit it's less efficient */ + int32_t prev_pos = USEARCH_DONE; do { char_pos = usearch_next(src, &status); if (char_pos == USEARCH_DONE || char_pos > offset_pos) {