From 485f7914e6619acaf5622c3239107fc0fe9de52b Mon Sep 17 00:00:00 2001 From: "Christoph M. Becker" Date: Mon, 14 Jun 2021 15:01:42 +0200 Subject: [PATCH] Fix #72809: Locale::lookup() wrong result with canonicalize option Canonicalization converts the locale to ICU format[1]. However, the lookup described in RFC 4647, section 3.4, is about POSIX format. To make that lookup work for ICU format, we also need to cater to keyword separators. The results are somewhat unexpected, but apparently canonical lookup is explicitly supposed to return canonical language tags[2]. [1] [2] --- ext/intl/locale/locale_methods.c | 2 +- ext/intl/tests/locale/bug72809.phpt | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) create mode 100644 ext/intl/tests/locale/bug72809.phpt diff --git a/ext/intl/locale/locale_methods.c b/ext/intl/locale/locale_methods.c index 50838de7ed8fa..1c4ba327bd83c 100644 --- a/ext/intl/locale/locale_methods.c +++ b/ext/intl/locale/locale_methods.c @@ -149,7 +149,7 @@ static zend_off_t getStrrtokenPos(char* str, zend_off_t savedPos) zend_off_t i; for(i=savedPos-1; i>=0; i--) { - if(isIDSeparator(*(str+i)) ){ + if(isIDSeparator(*(str+i)) || isKeywordSeparator(*(str+i))){ /* delimiter found; check for singleton */ if(i>=2 && isIDSeparator(*(str+i-2)) ){ /* a singleton; so send the position of token before the singleton */ diff --git a/ext/intl/tests/locale/bug72809.phpt b/ext/intl/tests/locale/bug72809.phpt new file mode 100644 index 0000000000000..98dc7d39694da --- /dev/null +++ b/ext/intl/tests/locale/bug72809.phpt @@ -0,0 +1,18 @@ +--TEST-- +Bug #72809 (Locale::lookup() wrong result with canonicalize option) +--SKIPIF-- + +--FILE-- + +--EXPECT-- +string(5) "en_us" +string(5) "en_US" +string(5) "en_us"