Skip to content

Commit 9d40128

Browse files
committed
patch 8.1.1122: char2nr() does not handle composing characters
Problem: char2nr() does not handle composing characters. Solution: Add str2list() and list2str(). (Ozaki Kiichi, closes #4190)
1 parent 4a5711b commit 9d40128

File tree

5 files changed

+178
-2
lines changed

5 files changed

+178
-2
lines changed

runtime/doc/eval.txt

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2442,6 +2442,7 @@ libcallnr({lib}, {func}, {arg}) Number idem, but return a Number
24422442
line({expr}) Number line nr of cursor, last line or mark
24432443
line2byte({lnum}) Number byte count of line {lnum}
24442444
lispindent({lnum}) Number Lisp indent for line {lnum}
2445+
list2str({list} [, {utf8}]) String turn numbers in {list} into a String
24452446
localtime() Number current time
24462447
log({expr}) Float natural logarithm (base e) of {expr}
24472448
log10({expr}) Float logarithm of Float {expr} to base 10
@@ -2609,6 +2610,8 @@ split({expr} [, {pat} [, {keepempty}]])
26092610
List make |List| from {pat} separated {expr}
26102611
sqrt({expr}) Float square root of {expr}
26112612
str2float({expr}) Float convert String to Float
2613+
str2list({expr} [, {utf8}]) List convert each character of {expr} to
2614+
ASCII/UTF8 value
26122615
str2nr({expr} [, {base}]) Number convert String to Number
26132616
strchars({expr} [, {skipcc}]) Number character length of the String {expr}
26142617
strcharpart({str}, {start} [, {len}])
@@ -6193,6 +6196,20 @@ lispindent({lnum}) *lispindent()*
61936196
When {lnum} is invalid or Vim was not compiled the
61946197
|+lispindent| feature, -1 is returned.
61956198

6199+
list2str({list} [, {utf8}]) *list2str()*
6200+
Convert each number in {list} to a character string can
6201+
concatenate them all. Examples: >
6202+
list2str([32]) returns " "
6203+
list2str([65, 66, 67]) returns "ABC"
6204+
< The same can be done (slowly) with: >
6205+
join(map(list, {nr, val -> nr2char(val)}), '')
6206+
< |str2list()| does the opposite.
6207+
6208+
When {utf8} is omitted or zero, the current 'encoding' is used.
6209+
With {utf8} is 1, always return utf-8 characters.
6210+
With utf-8 composing characters work as expected: >
6211+
list2str([97, 769]) returns "á"
6212+
<
61966213
localtime() *localtime()*
61976214
Return the current time, measured as seconds since 1st Jan
61986215
1970. See also |strftime()| and |getftime()|.
@@ -8722,6 +8739,18 @@ str2float({expr}) *str2float()*
87228739
let f = str2float(substitute(text, ',', '', 'g'))
87238740
< {only available when compiled with the |+float| feature}
87248741

8742+
str2list({expr} [, {utf8}]) *str2list()*
8743+
Return a list containing the number values which represent
8744+
each character in String {expr}. Examples: >
8745+
str2list(" ") returns [32]
8746+
str2list("ABC") returns [65, 66, 67]
8747+
< |list2str()| does the opposite.
8748+
8749+
When {utf8} is omitted or zero, the current 'encoding' is used.
8750+
With {utf8} set to 1, always treat the String as utf-8
8751+
characters. With utf-8 composing characters are handled
8752+
properly: >
8753+
str2list("á") returns [97, 769]
87258754
87268755
str2nr({expr} [, {base}]) *str2nr()*
87278756
Convert string {expr} to a number.

runtime/doc/usr_41.txt

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -577,8 +577,10 @@ used for. You can find an alphabetical list here: |functions|. Use CTRL-] on
577577
the function name to jump to detailed help on it.
578578

579579
String manipulation: *string-functions*
580-
nr2char() get a character by its ASCII value
581-
char2nr() get ASCII value of a character
580+
nr2char() get a character by its number value
581+
list2str() get a character string from a list of numbers
582+
char2nr() get number value of a character
583+
str2list() get list of numbers from a string
582584
str2nr() convert a string to a Number
583585
str2float() convert a string to a Float
584586
printf() format a string according to % items

src/evalfunc.c

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,7 @@ static void f_libcallnr(typval_T *argvars, typval_T *rettv);
262262
static void f_line(typval_T *argvars, typval_T *rettv);
263263
static void f_line2byte(typval_T *argvars, typval_T *rettv);
264264
static void f_lispindent(typval_T *argvars, typval_T *rettv);
265+
static void f_list2str(typval_T *argvars, typval_T *rettv);
265266
static void f_localtime(typval_T *argvars, typval_T *rettv);
266267
#ifdef FEAT_FLOAT
267268
static void f_log(typval_T *argvars, typval_T *rettv);
@@ -401,6 +402,7 @@ static void f_split(typval_T *argvars, typval_T *rettv);
401402
static void f_sqrt(typval_T *argvars, typval_T *rettv);
402403
static void f_str2float(typval_T *argvars, typval_T *rettv);
403404
#endif
405+
static void f_str2list(typval_T *argvars, typval_T *rettv);
404406
static void f_str2nr(typval_T *argvars, typval_T *rettv);
405407
static void f_strchars(typval_T *argvars, typval_T *rettv);
406408
#ifdef HAVE_STRFTIME
@@ -752,6 +754,7 @@ static struct fst
752754
{"line", 1, 1, f_line},
753755
{"line2byte", 1, 1, f_line2byte},
754756
{"lispindent", 1, 1, f_lispindent},
757+
{"list2str", 1, 2, f_list2str},
755758
{"localtime", 0, 0, f_localtime},
756759
#ifdef FEAT_FLOAT
757760
{"log", 1, 1, f_log},
@@ -902,6 +905,7 @@ static struct fst
902905
{"sqrt", 1, 1, f_sqrt},
903906
{"str2float", 1, 1, f_str2float},
904907
#endif
908+
{"str2list", 1, 2, f_str2list},
905909
{"str2nr", 1, 2, f_str2nr},
906910
{"strcharpart", 2, 3, f_strcharpart},
907911
{"strchars", 1, 2, f_strchars},
@@ -7849,6 +7853,61 @@ f_lispindent(typval_T *argvars UNUSED, typval_T *rettv)
78497853
rettv->vval.v_number = -1;
78507854
}
78517855

7856+
/*
7857+
* "list2str()" function
7858+
*/
7859+
static void
7860+
f_list2str(typval_T *argvars, typval_T *rettv)
7861+
{
7862+
list_T *l;
7863+
listitem_T *li;
7864+
garray_T ga;
7865+
int utf8 = FALSE;
7866+
7867+
rettv->v_type = VAR_STRING;
7868+
rettv->vval.v_string = NULL;
7869+
if (argvars[0].v_type != VAR_LIST)
7870+
{
7871+
emsg(_(e_invarg));
7872+
return;
7873+
}
7874+
7875+
l = argvars[0].vval.v_list;
7876+
if (l == NULL)
7877+
return; // empty list results in empty string
7878+
7879+
if (argvars[1].v_type != VAR_UNKNOWN)
7880+
utf8 = (int)tv_get_number_chk(&argvars[1], NULL);
7881+
7882+
ga_init2(&ga, 1, 80);
7883+
if (has_mbyte || utf8)
7884+
{
7885+
char_u buf[MB_MAXBYTES + 1];
7886+
int (*char2bytes)(int, char_u *);
7887+
7888+
if (utf8 || enc_utf8)
7889+
char2bytes = utf_char2bytes;
7890+
else
7891+
char2bytes = mb_char2bytes;
7892+
7893+
for (li = l->lv_first; li != NULL; li = li->li_next)
7894+
{
7895+
buf[(*char2bytes)(tv_get_number(&li->li_tv), buf)] = NUL;
7896+
ga_concat(&ga, buf);
7897+
}
7898+
ga_append(&ga, NUL);
7899+
}
7900+
else if (ga_grow(&ga, list_len(l) + 1) == OK)
7901+
{
7902+
for (li = l->lv_first; li != NULL; li = li->li_next)
7903+
ga_append(&ga, tv_get_number(&li->li_tv));
7904+
ga_append(&ga, NUL);
7905+
}
7906+
7907+
rettv->v_type = VAR_STRING;
7908+
rettv->vval.v_string = ga.ga_data;
7909+
}
7910+
78527911
/*
78537912
* "localtime()" function
78547913
*/
@@ -12900,6 +12959,47 @@ f_str2float(typval_T *argvars, typval_T *rettv)
1290012959
}
1290112960
#endif
1290212961

12962+
/*
12963+
* "str2list()" function
12964+
*/
12965+
static void
12966+
f_str2list(typval_T *argvars, typval_T *rettv)
12967+
{
12968+
char_u *p;
12969+
int utf8 = FALSE;
12970+
12971+
if (rettv_list_alloc(rettv) == FAIL)
12972+
return;
12973+
12974+
if (argvars[1].v_type != VAR_UNKNOWN)
12975+
utf8 = (int)tv_get_number_chk(&argvars[1], NULL);
12976+
12977+
p = tv_get_string(&argvars[0]);
12978+
12979+
if (has_mbyte || utf8)
12980+
{
12981+
int (*ptr2len)(char_u *);
12982+
int (*ptr2char)(char_u *);
12983+
12984+
if (utf8 || enc_utf8)
12985+
{
12986+
ptr2len = utf_ptr2len;
12987+
ptr2char = utf_ptr2char;
12988+
}
12989+
else
12990+
{
12991+
ptr2len = mb_ptr2len;
12992+
ptr2char = mb_ptr2char;
12993+
}
12994+
12995+
for ( ; *p != NUL; p += (*ptr2len)(p))
12996+
list_append_number(rettv->vval.v_list, (*ptr2char)(p));
12997+
}
12998+
else
12999+
for ( ; *p != NUL; ++p)
13000+
list_append_number(rettv->vval.v_list, *p);
13001+
}
13002+
1290313003
/*
1290413004
* "str2nr()" function
1290513005
*/

src/testdir/test_utf8.vim

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,49 @@ func Test_getvcol()
6262
call assert_equal(2, virtcol("']"))
6363
endfunc
6464

65+
func Test_list2str_str2list_utf8()
66+
" One Unicode codepoint
67+
let s = "\u3042\u3044"
68+
let l = [0x3042, 0x3044]
69+
call assert_equal(l, str2list(s, 1))
70+
call assert_equal(s, list2str(l, 1))
71+
if &enc ==# 'utf-8'
72+
call assert_equal(str2list(s), str2list(s, 1))
73+
call assert_equal(list2str(l), list2str(l, 1))
74+
endif
75+
76+
" With composing characters
77+
let s = "\u304b\u3099\u3044"
78+
let l = [0x304b, 0x3099, 0x3044]
79+
call assert_equal(l, str2list(s, 1))
80+
call assert_equal(s, list2str(l, 1))
81+
if &enc ==# 'utf-8'
82+
call assert_equal(str2list(s), str2list(s, 1))
83+
call assert_equal(list2str(l), list2str(l, 1))
84+
endif
85+
86+
" Null list is the same as an empty list
87+
call assert_equal('', list2str([]))
88+
call assert_equal('', list2str(test_null_list()))
89+
endfunc
90+
91+
func Test_list2str_str2list_latin1()
92+
" When 'encoding' is not multi-byte can still get utf-8 string.
93+
" But we need to create the utf-8 string while 'encoding' is utf-8.
94+
let s = "\u3042\u3044"
95+
let l = [0x3042, 0x3044]
96+
97+
let save_encoding = &encoding
98+
set encoding=latin1
99+
100+
let lres = str2list(s, 1)
101+
let sres = list2str(l, 1)
102+
103+
let &encoding = save_encoding
104+
call assert_equal(l, lres)
105+
call assert_equal(s, sres)
106+
endfunc
107+
65108
func Test_screenchar_utf8()
66109
new
67110

src/version.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -771,6 +771,8 @@ static char *(features[]) =
771771

772772
static int included_patches[] =
773773
{ /* Add new patch number below this line */
774+
/**/
775+
1122,
774776
/**/
775777
1121,
776778
/**/

0 commit comments

Comments
 (0)