@@ -873,16 +873,30 @@ def test_ignore_case(self):
873873 self .assertEqual (re .match (r"((a)\s(abc|a))" , "a a" , re .I ).group (1 ), "a a" )
874874 self .assertEqual (re .match (r"((a)\s(abc|a)*)" , "a aa" , re .I ).group (1 ), "a aa" )
875875
876- assert '\u212a ' .lower () == 'k' # 'K'
876+ # Two different characters have the same lowercase.
877+ assert 'K' .lower () == '\u212a ' .lower () == 'k' # 'K'
877878 self .assertTrue (re .match (r'K' , '\u212a ' , re .I ))
878879 self .assertTrue (re .match (r'k' , '\u212a ' , re .I ))
879880 self .assertTrue (re .match (r'\u212a' , 'K' , re .I ))
880881 self .assertTrue (re .match (r'\u212a' , 'k' , re .I ))
881- assert '\u017f ' .upper () == 'S' # 'ſ'
882+
883+ # Two different characters have the same uppercase.
884+ assert 's' .upper () == '\u017f ' .upper () == 'S' # 'ſ'
882885 self .assertTrue (re .match (r'S' , '\u017f ' , re .I ))
883886 self .assertTrue (re .match (r's' , '\u017f ' , re .I ))
884887 self .assertTrue (re .match (r'\u017f' , 'S' , re .I ))
885888 self .assertTrue (re .match (r'\u017f' , 's' , re .I ))
889+
890+ # Two different characters have the same uppercase. Unicode 9.0+.
891+ assert '\u0432 ' .upper () == '\u1c80 ' .upper () == '\u0412 ' # 'в', 'ᲀ', 'В'
892+ self .assertTrue (re .match (r'\u0412' , '\u0432 ' , re .I ))
893+ self .assertTrue (re .match (r'\u0412' , '\u1c80 ' , re .I ))
894+ self .assertTrue (re .match (r'\u0432' , '\u0412 ' , re .I ))
895+ self .assertTrue (re .match (r'\u0432' , '\u1c80 ' , re .I ))
896+ self .assertTrue (re .match (r'\u1c80' , '\u0412 ' , re .I ))
897+ self .assertTrue (re .match (r'\u1c80' , '\u0432 ' , re .I ))
898+
899+ # Two different characters have the same multicharacter uppercase.
886900 assert '\ufb05 ' .upper () == '\ufb06 ' .upper () == 'ST' # 'ſt', 'st'
887901 self .assertTrue (re .match (r'\ufb05' , '\ufb06 ' , re .I ))
888902 self .assertTrue (re .match (r'\ufb06' , '\ufb05 ' , re .I ))
@@ -896,16 +910,31 @@ def test_ignore_case_set(self):
896910 self .assertTrue (re .match (br'[19a]' , b'a' , re .I ))
897911 self .assertTrue (re .match (br'[19a]' , b'A' , re .I ))
898912 self .assertTrue (re .match (br'[19A]' , b'a' , re .I ))
899- assert '\u212a ' .lower () == 'k' # 'K'
913+
914+ # Two different characters have the same lowercase.
915+ assert 'K' .lower () == '\u212a ' .lower () == 'k' # 'K'
900916 self .assertTrue (re .match (r'[19K]' , '\u212a ' , re .I ))
901917 self .assertTrue (re .match (r'[19k]' , '\u212a ' , re .I ))
902918 self .assertTrue (re .match (r'[19\u212a]' , 'K' , re .I ))
903919 self .assertTrue (re .match (r'[19\u212a]' , 'k' , re .I ))
904- assert '\u017f ' .upper () == 'S' # 'ſ'
920+
921+ # Two different characters have the same uppercase.
922+ assert 's' .upper () == '\u017f ' .upper () == 'S' # 'ſ'
905923 self .assertTrue (re .match (r'[19S]' , '\u017f ' , re .I ))
906924 self .assertTrue (re .match (r'[19s]' , '\u017f ' , re .I ))
907925 self .assertTrue (re .match (r'[19\u017f]' , 'S' , re .I ))
908926 self .assertTrue (re .match (r'[19\u017f]' , 's' , re .I ))
927+
928+ # Two different characters have the same uppercase. Unicode 9.0+.
929+ assert '\u0432 ' .upper () == '\u1c80 ' .upper () == '\u0412 ' # 'в', 'ᲀ', 'В'
930+ self .assertTrue (re .match (r'[19\u0412]' , '\u0432 ' , re .I ))
931+ self .assertTrue (re .match (r'[19\u0412]' , '\u1c80 ' , re .I ))
932+ self .assertTrue (re .match (r'[19\u0432]' , '\u0412 ' , re .I ))
933+ self .assertTrue (re .match (r'[19\u0432]' , '\u1c80 ' , re .I ))
934+ self .assertTrue (re .match (r'[19\u1c80]' , '\u0412 ' , re .I ))
935+ self .assertTrue (re .match (r'[19\u1c80]' , '\u0432 ' , re .I ))
936+
937+ # Two different characters have the same multicharacter uppercase.
909938 assert '\ufb05 ' .upper () == '\ufb06 ' .upper () == 'ST' # 'ſt', 'st'
910939 self .assertTrue (re .match (r'[19\ufb05]' , '\ufb06 ' , re .I ))
911940 self .assertTrue (re .match (r'[19\ufb06]' , '\ufb05 ' , re .I ))
@@ -929,16 +958,30 @@ def test_ignore_case_range(self):
929958 self .assertTrue (re .match (r'[\U00010400-\U00010427]' , '\U00010428 ' , re .I ))
930959 self .assertTrue (re .match (r'[\U00010400-\U00010427]' , '\U00010400 ' , re .I ))
931960
932- assert '\u212a ' .lower () == 'k' # 'K'
961+ # Two different characters have the same lowercase.
962+ assert 'K' .lower () == '\u212a ' .lower () == 'k' # 'K'
933963 self .assertTrue (re .match (r'[J-M]' , '\u212a ' , re .I ))
934964 self .assertTrue (re .match (r'[j-m]' , '\u212a ' , re .I ))
935965 self .assertTrue (re .match (r'[\u2129-\u212b]' , 'K' , re .I ))
936966 self .assertTrue (re .match (r'[\u2129-\u212b]' , 'k' , re .I ))
937- assert '\u017f ' .upper () == 'S' # 'ſ'
967+
968+ # Two different characters have the same uppercase.
969+ assert 's' .upper () == '\u017f ' .upper () == 'S' # 'ſ'
938970 self .assertTrue (re .match (r'[R-T]' , '\u017f ' , re .I ))
939971 self .assertTrue (re .match (r'[r-t]' , '\u017f ' , re .I ))
940972 self .assertTrue (re .match (r'[\u017e-\u0180]' , 'S' , re .I ))
941973 self .assertTrue (re .match (r'[\u017e-\u0180]' , 's' , re .I ))
974+
975+ # Two different characters have the same uppercase. Unicode 9.0+.
976+ assert '\u0432 ' .upper () == '\u1c80 ' .upper () == '\u0412 ' # 'в', 'ᲀ', 'В'
977+ self .assertTrue (re .match (r'[\u0411-\u0413]' , '\u0432 ' , re .I ))
978+ self .assertTrue (re .match (r'[\u0411-\u0413]' , '\u1c80 ' , re .I ))
979+ self .assertTrue (re .match (r'[\u0431-\u0433]' , '\u0412 ' , re .I ))
980+ self .assertTrue (re .match (r'[\u0431-\u0433]' , '\u1c80 ' , re .I ))
981+ self .assertTrue (re .match (r'[\u1c80-\u1c82]' , '\u0412 ' , re .I ))
982+ self .assertTrue (re .match (r'[\u1c80-\u1c82]' , '\u0432 ' , re .I ))
983+
984+ # Two different characters have the same multicharacter uppercase.
942985 assert '\ufb05 ' .upper () == '\ufb06 ' .upper () == 'ST' # 'ſt', 'st'
943986 self .assertTrue (re .match (r'[\ufb04-\ufb05]' , '\ufb06 ' , re .I ))
944987 self .assertTrue (re .match (r'[\ufb06-\ufb07]' , '\ufb05 ' , re .I ))
0 commit comments