Skip to content

Commit 4e4473c

Browse files
committed
patch 8.2.1536: cannot get the class of a character; emoji widths are wrong
Problem: Cannot get the class of a character; emoji widths are wrong in some environments. Solution: Add charclass(). Update some emoji widths. Add script to check emoji widths.
1 parent 08aac3c commit 4e4473c

File tree

9 files changed

+91
-8
lines changed

9 files changed

+91
-8
lines changed

Filelist

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,7 @@ SRC_ALL = \
197197
src/testdir/samples/*.txt \
198198
src/testdir/samples/test000 \
199199
src/testdir/color_ramp.vim \
200+
src/testdir/emoji_list.vim \
200201
src/testdir/silent.wav \
201202
src/testdir/popupbounce.vim \
202203
src/proto.h \

runtime/doc/eval.txt

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2425,6 +2425,7 @@ ch_status({handle} [, {options}])
24252425
String status of channel {handle}
24262426
changenr() Number current change number
24272427
char2nr({expr} [, {utf8}]) Number ASCII/UTF8 value of first char in {expr}
2428+
charclass({string}) Number character class of {string}
24282429
chdir({dir}) String change current working directory
24292430
cindent({lnum}) Number C indent for line {lnum}
24302431
clearmatches([{win}]) none clear all matches
@@ -3520,6 +3521,18 @@ char2nr({expr} [, {utf8}]) *char2nr()*
35203521
Can also be used as a |method|: >
35213522
GetChar()->char2nr()
35223523

3524+
3525+
charclass({string}) *charclass()*
3526+
Return the character class of the first character in {string}.
3527+
The character class is one of:
3528+
0 blank
3529+
1 punctuation
3530+
2 word character
3531+
3 emoji
3532+
other specific Unicode class
3533+
The class is used in patterns and word motions.
3534+
3535+
35233536
chdir({dir}) *chdir()*
35243537
Change the current working directory to {dir}. The scope of
35253538
the directory change depends on the directory of the current

runtime/doc/usr_41.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -600,6 +600,7 @@ String manipulation: *string-functions*
600600
strtrans() translate a string to make it printable
601601
tolower() turn a string to lowercase
602602
toupper() turn a string to uppercase
603+
charclass() class of a character
603604
match() position where a pattern matches in a string
604605
matchend() position where a pattern match ends in a string
605606
matchstr() match of a pattern in a string

src/evalfunc.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -564,6 +564,7 @@ static funcentry_T global_functions[] =
564564
{"ch_status", 1, 2, FEARG_1, ret_string, JOB_FUNC(f_ch_status)},
565565
{"changenr", 0, 0, 0, ret_number, f_changenr},
566566
{"char2nr", 1, 2, FEARG_1, ret_number, f_char2nr},
567+
{"charclass", 1, 1, FEARG_1, ret_number, f_charclass},
567568
{"chdir", 1, 1, FEARG_1, ret_string, f_chdir},
568569
{"cindent", 1, 1, FEARG_1, ret_number, f_cindent},
569570
{"clearmatches", 0, 1, FEARG_1, ret_void, f_clearmatches},

src/mbyte.c

Lines changed: 43 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,9 @@ static int dbcs_char2cells(int c);
132132
static int dbcs_ptr2cells_len(char_u *p, int size);
133133
static int dbcs_ptr2char(char_u *p);
134134
static int dbcs_head_off(char_u *base, char_u *p);
135+
#ifdef FEAT_EVAL
135136
static int cw_value(int c);
137+
#endif
136138

137139
/*
138140
* Lookup table to quickly get the length in bytes of a UTF-8 character from
@@ -1388,8 +1390,7 @@ utf_char2cells(int c)
13881390
{0x26ce, 0x26ce},
13891391
{0x26d4, 0x26d4},
13901392
{0x26ea, 0x26ea},
1391-
{0x26f2, 0x26f3},
1392-
{0x26f5, 0x26f5},
1393+
{0x26f2, 0x26f5},
13931394
{0x26fa, 0x26fa},
13941395
{0x26fd, 0x26fd},
13951396
{0x2705, 0x2705},
@@ -1490,6 +1491,21 @@ utf_char2cells(int c)
14901491
// based on http://unicode.org/emoji/charts/emoji-list.html
14911492
static struct interval emoji_wide[] =
14921493
{
1494+
{0x23ed, 0x23ef},
1495+
{0x23f1, 0x23f2},
1496+
{0x23f8, 0x23fa},
1497+
{0x24c2, 0x24c2},
1498+
{0x261d, 0x261d},
1499+
{0x26c8, 0x26c8},
1500+
{0x26cf, 0x26cf},
1501+
{0x26d1, 0x26d1},
1502+
{0x26d3, 0x26d3},
1503+
{0x26e9, 0x26e9},
1504+
{0x26f0, 0x26f1},
1505+
{0x26f7, 0x26f9},
1506+
{0x270c, 0x270d},
1507+
{0x2934, 0x2935},
1508+
{0x1f170, 0x1f189},
14931509
{0x1f1e6, 0x1f1ff},
14941510
{0x1f321, 0x1f321},
14951511
{0x1f324, 0x1f32c},
@@ -1533,11 +1549,15 @@ utf_char2cells(int c)
15331549

15341550
if (c >= 0x100)
15351551
{
1552+
#if defined(FEAT_EVAL) || defined(USE_WCHAR_FUNCTIONS)
15361553
int n;
1554+
#endif
15371555

1556+
#ifdef FEAT_EVAL
15381557
n = cw_value(c);
15391558
if (n != 0)
15401559
return n;
1560+
#endif
15411561

15421562
#ifdef USE_WCHAR_FUNCTIONS
15431563
/*
@@ -2667,8 +2687,7 @@ static struct interval emoji_all[] =
26672687
{0x3299, 0x3299},
26682688
{0x1f004, 0x1f004},
26692689
{0x1f0cf, 0x1f0cf},
2670-
{0x1f170, 0x1f171},
2671-
{0x1f17e, 0x1f17f},
2690+
{0x1f170, 0x1f189},
26722691
{0x1f18e, 0x1f18e},
26732692
{0x1f191, 0x1f19a},
26742693
{0x1f1e6, 0x1f1ff},
@@ -2835,6 +2854,10 @@ utf_class_buf(int c, buf_T *buf)
28352854
return 1; // punctuation
28362855
}
28372856

2857+
// emoji
2858+
if (intable(emoji_all, sizeof(emoji_all), c))
2859+
return 3;
2860+
28382861
// binary search in table
28392862
while (top >= bot)
28402863
{
@@ -2847,10 +2870,6 @@ utf_class_buf(int c, buf_T *buf)
28472870
return (int)classes[mid].class;
28482871
}
28492872

2850-
// emoji
2851-
if (intable(emoji_all, sizeof(emoji_all), c))
2852-
return 3;
2853-
28542873
// most other characters are "word" characters
28552874
return 2;
28562875
}
@@ -5352,6 +5371,8 @@ string_convert_ext(
53525371
return retval;
53535372
}
53545373

5374+
#if defined(FEAT_EVAL) || defined(PROTO)
5375+
53555376
/*
53565377
* Table set by setcellwidths().
53575378
*/
@@ -5525,3 +5546,17 @@ f_setcellwidths(typval_T *argvars, typval_T *rettv UNUSED)
55255546
cw_table = table;
55265547
cw_table_size = l->lv_len;
55275548
}
5549+
5550+
void
5551+
f_charclass(typval_T *argvars, typval_T *rettv UNUSED)
5552+
{
5553+
if (argvars[0].v_type != VAR_STRING
5554+
|| argvars[0].vval.v_string == NULL
5555+
|| *argvars[0].vval.v_string == NUL)
5556+
{
5557+
emsg(_(e_stringreq));
5558+
return;
5559+
}
5560+
rettv->vval.v_number = mb_get_class(argvars[0].vval.v_string);
5561+
}
5562+
#endif

src/proto/mbyte.pro

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,4 +85,5 @@ int convert_input_safe(char_u *ptr, int len, int maxlen, char_u **restp, int *re
8585
char_u *string_convert(vimconv_T *vcp, char_u *ptr, int *lenp);
8686
char_u *string_convert_ext(vimconv_T *vcp, char_u *ptr, int *lenp, int *unconvlenp);
8787
void f_setcellwidths(typval_T *argvars, typval_T *rettv);
88+
void f_charclass(typval_T *argvars, typval_T *rettv);
8889
/* vim: set ft=c : */

src/testdir/emoji_list.vim

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
" Script to fill the window with emoji characters, one per line.
2+
3+
if &modified
4+
new
5+
else
6+
enew
7+
endif
8+
9+
" Use a compiled Vim9 function for speed
10+
def DoIt()
11+
let lnum = 1
12+
for c in range(0x100, 0x1ffff)
13+
let cs = nr2char(c)
14+
if charclass(cs) == 3
15+
setline(lnum, '|' .. cs .. '| ' .. strwidth(cs))
16+
lnum += 1
17+
endif
18+
endfor
19+
enddef
20+
21+
call DoIt()
22+
set nomodified

src/testdir/test_functions.vim

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2077,6 +2077,13 @@ func Test_char2nr()
20772077
set encoding=utf-8
20782078
endfunc
20792079

2080+
func Test_charclass()
2081+
call assert_equal(0, charclass(' '))
2082+
call assert_equal(1, charclass('.'))
2083+
call assert_equal(2, charclass('x'))
2084+
call assert_equal(3, charclass("\u203c"))
2085+
endfunc
2086+
20802087
func Test_eventhandler()
20812088
call assert_equal(0, eventhandler())
20822089
endfunc

src/version.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -754,6 +754,8 @@ static char *(features[]) =
754754

755755
static int included_patches[] =
756756
{ /* Add new patch number below this line */
757+
/**/
758+
1536,
757759
/**/
758760
1535,
759761
/**/

0 commit comments

Comments
 (0)