@@ -81,8 +81,10 @@ bufnr([{buf} [, {create}]]) Number Number of the buffer {buf}
8181bufwinid({buf} ) Number window ID of buffer {buf}
8282bufwinnr({buf} ) Number window number of buffer {buf}
8383byte2line({byte} ) Number line number at byte count {byte}
84- byteidx({expr} , {nr} ) Number byte index of {nr} 'th char in {expr}
85- byteidxcomp({expr} , {nr} ) Number byte index of {nr} 'th char in {expr}
84+ byteidx({expr} , {nr} [, {utf16} ])
85+ Number byte index of {nr} 'th char in {expr}
86+ byteidxcomp({expr} , {nr} [, {utf16} ])
87+ Number byte index of {nr} 'th char in {expr}
8688call({func} , {arglist} [, {dict} ])
8789 any call {func} with arguments {arglist}
8890ceil({expr} ) Float round {expr} up
@@ -117,7 +119,7 @@ changenr() Number current change number
117119char2nr({expr} [, {utf8} ]) Number ASCII/UTF-8 value of first char in {expr}
118120charclass({string} ) Number character class of {string}
119121charcol({expr} [, {winid} ]) Number column number of cursor or mark
120- charidx({string} , {idx} [, {countcc} ])
122+ charidx({string} , {idx} [, {countcc} [, {utf16} ] ])
121123 Number char index of byte {idx} in {string}
122124chdir({dir} ) String change current working directory
123125cindent({lnum} ) Number C indent for line {lnum}
@@ -604,6 +606,8 @@ strptime({format}, {timestring})
604606strridx({haystack} , {needle} [, {start} ])
605607 Number last index of {needle} in {haystack}
606608strtrans({expr} ) String translate string to make it printable
609+ strutf16len({string} [, {countcc} ])
610+ Number number of UTF-16 code units in {string}
607611strwidth({expr} ) Number display cell length of the String {expr}
608612submatch({nr} [, {list} ]) String or List
609613 specific match in ":s" or substitute()
@@ -704,6 +708,8 @@ undofile({name}) String undo file name for {name}
704708undotree() List undo file tree
705709uniq({list} [, {func} [, {dict} ]])
706710 List remove adjacent duplicates from a list
711+ utf16idx({string} , {idx} [, {countcc} [, {charidx} ]])
712+ Number UTF-16 index of byte {idx} in {string}
707713values({dict} ) List values in {dict}
708714virtcol({expr} [, {list} ]) Number or List
709715 screen column of cursor or mark
@@ -1363,7 +1369,7 @@ byte2line({byte}) *byte2line()*
13631369< {not available when compiled without the | +byte_offset |
13641370 feature}
13651371
1366- byteidx({expr} , {nr} ) *byteidx()*
1372+ byteidx({expr} , {nr} [, {utf16} ]) *byteidx()*
13671373 Return byte index of the {nr} 'th character in the String
13681374 {expr} . Use zero for the first character, it then returns
13691375 zero.
@@ -1373,6 +1379,13 @@ byteidx({expr}, {nr}) *byteidx()*
13731379 length is added to the preceding base character. See
13741380 | byteidxcomp() | below for counting composing characters
13751381 separately.
1382+ When {utf16} is present and TRUE, {nr} is used as the UTF-16
1383+ index in the String {expr} instead of as the character index.
1384+ The UTF-16 index is the index in the string when it is encoded
1385+ with 16-bit words. If the specified UTF-16 index is in the
1386+ middle of a character (e.g. in a 4-byte character), then the
1387+ byte index of the first byte in the character is returned.
1388+ Refer to | string-offset-encoding | for more information.
13761389 Example : >
13771390 echo matchstr(str, ".", byteidx(str, 3))
13781391< will display the fourth character. Another way to do the
@@ -1384,11 +1397,17 @@ byteidx({expr}, {nr}) *byteidx()*
13841397 If there are less than {nr} characters -1 is returned.
13851398 If there are exactly {nr} characters the length of the string
13861399 in bytes is returned.
1387-
1400+ See | charidx() | and | utf16idx() | for getting the character and
1401+ UTF-16 index respectively from the byte index.
1402+ Examples: >
1403+ echo byteidx('a😊😊', 2) returns 5
1404+ echo byteidx('a😊😊', 2, 1) returns 1
1405+ echo byteidx('a😊😊', 3, 1) returns 5
1406+ <
13881407 Can also be used as a | method | : >
13891408 GetName()->byteidx(idx)
13901409
1391- byteidxcomp({expr} , {nr} ) *byteidxcomp()*
1410+ byteidxcomp({expr} , {nr} [, {utf16} ]) *byteidxcomp()*
13921411 Like byteidx(), except that a composing character is counted
13931412 as a separate character. Example: >
13941413 let s = 'e' .. nr2char(0x301)
@@ -1493,27 +1512,36 @@ charcol({expr} [, {winid}]) *charcol()*
14931512 GetPos()->col()
14941513<
14951514 *charidx()*
1496- charidx({string} , {idx} [, {countcc} ])
1515+ charidx({string} , {idx} [, {countcc} [, {utf16} ] ])
14971516 Return the character index of the byte at {idx} in {string} .
14981517 The index of the first character is zero.
14991518 If there are no multibyte characters the returned value is
15001519 equal to {idx} .
1520+
15011521 When {countcc} is omitted or | FALSE | , then composing characters
1502- are not counted separately, their byte length is
1503- added to the preceding base character.
1522+ are not counted separately, their byte length is added to the
1523+ preceding base character.
15041524 When {countcc} is | TRUE | , then composing characters are
15051525 counted as separate characters.
1526+
1527+ When {utf16} is present and TRUE, {idx} is used as the UTF-16
1528+ index in the String {expr} instead of as the byte index.
1529+
15061530 Returns -1 if the arguments are invalid or if {idx} is greater
15071531 than the index of the last byte in {string} . An error is
15081532 given if the first argument is not a string, the second
15091533 argument is not a number or when the third argument is present
15101534 and is not zero or one.
1535+
15111536 See | byteidx() | and | byteidxcomp() | for getting the byte index
1512- from the character index.
1537+ from the character index and | utf16idx() | for getting the
1538+ UTF-16 index from the character index.
1539+ Refer to | string-offset-encoding | for more information.
15131540 Examples: >
15141541 echo charidx('áb́ć', 3) returns 1
15151542 echo charidx('áb́ć', 6, 1) returns 4
15161543 echo charidx('áb́ć', 16) returns -1
1544+ echo charidx('a😊😊', 4, 0, 1) returns 2
15171545<
15181546 Can also be used as a | method | : >
15191547 GetName()->charidx(idx)
@@ -9244,6 +9272,28 @@ strtrans({string}) *strtrans()*
92449272 Can also be used as a | method | : >
92459273 GetString()->strtrans()
92469274
9275+ strutf16len({string} [, {countcc} ]) *strutf16len()*
9276+ The result is a Number, which is the number of UTF-16 code
9277+ units in String {string} (after converting it to UTF-16).
9278+
9279+ When {countcc} is TRUE, composing characters are counted
9280+ separately.
9281+ When {countcc} is omitted or FALSE, composing characters are
9282+ ignored.
9283+
9284+ Returns zero on error.
9285+
9286+ Also see | strlen() | and | strcharlen() | .
9287+ Examples: >
9288+ echo strutf16len('a') returns 1
9289+ echo strutf16len('©') returns 1
9290+ echo strutf16len('😊') returns 2
9291+ echo strutf16len('ą́') returns 1
9292+ echo strutf16len('ą́', v:true) returns 3
9293+
9294+ Can also be used as a |method|: >
9295+ GetText()->strutf16len()
9296+ <
92479297strwidth({string} ) *strwidth()*
92489298 The result is a Number, which is the number of display cells
92499299 String {string} occupies. A Tab character is counted as one
@@ -10059,6 +10109,34 @@ uniq({list} [, {func} [, {dict}]]) *uniq()* *E882*
1005910109
1006010110 Can also be used as a | method | : >
1006110111 mylist->uniq()
10112+ <
10113+ *utf16idx()*
10114+ utf16idx({string} , {idx} [, {countcc} [, {charidx} ]])
10115+ Same as | charidx() | but returns the UTF-16 index of the byte
10116+ at {idx} in {string} (after converting it to UTF-16).
10117+
10118+ When {charidx} is present and TRUE, {idx} is used as the
10119+ character index in the String {string} instead of as the byte
10120+ index.
10121+ An {idx} in the middle of a UTF-8 sequence is rounded upwards
10122+ to the end of that sequence.
10123+
10124+ See | byteidx() | and | byteidxcomp() | for getting the byte index
10125+ from the UTF-16 index and | charidx() | for getting the
10126+ character index from the UTF-16 index.
10127+ Refer to | string-offset-encoding | for more information.
10128+ Examples: >
10129+ echo utf16idx('a😊😊', 3) returns 2
10130+ echo utf16idx('a😊😊', 7) returns 4
10131+ echo utf16idx('a😊😊', 1, 0, 1) returns 2
10132+ echo utf16idx('a😊😊', 2, 0, 1) returns 4
10133+ echo utf16idx('aą́c', 6) returns 2
10134+ echo utf16idx('aą́c', 6, 1) returns 4
10135+ echo utf16idx('a😊😊', 9) returns -1
10136+ <
10137+ Can also be used as a | method | : >
10138+ GetName()->utf16idx(idx)
10139+
1006210140
1006310141 values({dict} ) *values()*
1006410142 Return a | List | with all the values of {dict} . The | List | is
0 commit comments