Skip to content

Commit 55e2961

Browse files
committed
patch 8.2.1933: cannot sort using locale ordering
Problem: Cannot sort using locale ordering. Solution: Add a flag for :sort and sort() to use the locale. (Dominique Pellé, closes #7237)
1 parent 963734e commit 55e2961

File tree

6 files changed

+112
-12
lines changed

6 files changed

+112
-12
lines changed

runtime/doc/change.txt

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1801,14 +1801,22 @@ Vim has a sorting function and a sorting command. The sorting function can be
18011801
found here: |sort()|, |uniq()|.
18021802

18031803
*:sor* *:sort*
1804-
:[range]sor[t][!] [b][f][i][n][o][r][u][x] [/{pattern}/]
1804+
:[range]sor[t][!] [b][f][i][l][n][o][r][u][x] [/{pattern}/]
18051805
Sort lines in [range]. When no range is given all
18061806
lines are sorted.
18071807

18081808
With [!] the order is reversed.
18091809

18101810
With [i] case is ignored.
18111811

1812+
With [l] sort uses the current locale. See
1813+
`language collate` to check or set the locale used
1814+
for ordering. For example, with "en_US.UTF8",
1815+
Ö will be ordered after O and before P,
1816+
whereas with the Swedish locale "sv_SE.UTF8",
1817+
it will be after Z.
1818+
Case is typically ignored by the locale.
1819+
18121820
Options [n][f][x][o][b] are mutually exclusive.
18131821

18141822
With [n] sorting is done on the first decimal number
@@ -1875,8 +1883,7 @@ found here: |sort()|, |uniq()|.
18751883
Note that using `:sort` with `:global` doesn't sort the matching lines, it's
18761884
quite useless.
18771885

1878-
The details about sorting depend on the library function used. There is no
1879-
guarantee that sorting obeys the current locale. You will have to try it out.
1886+
`:sort` does not use the current locale unless the l flag is used.
18801887
Vim does do a "stable" sort.
18811888

18821889
The sorting can be interrupted, but if you interrupt it too late in the

runtime/doc/eval.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9700,6 +9700,13 @@ sort({list} [, {func} [, {dict}]]) *sort()* *E702*
97009700
When {func} is given and it is '1' or 'i' then case is
97019701
ignored.
97029702

9703+
When {func} is given and it is 'l' then the current locale
9704+
is used for ordering. See `language collate` to check or set
9705+
the locale used for ordering. For example, with "en_US.UTF8",
9706+
Ö will be ordered after O and before P, whereas with the
9707+
Swedish locale "sv_SE.UTF8", it will be after Z.
9708+
Case is typically ignored by the locale.
9709+
97039710
When {func} is given and it is 'n' then all items will be
97049711
sorted numerical (Implementation detail: This uses the
97059712
strtod() function to parse numbers, Strings, Lists, Dicts and

src/ex_cmds.c

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,7 @@ linelen(int *has_tab)
277277
static char_u *sortbuf1;
278278
static char_u *sortbuf2;
279279

280+
static int sort_lc; // sort using locale
280281
static int sort_ic; // ignore case
281282
static int sort_nr; // sort on number
282283
static int sort_rx; // sort on regex instead of skipping it
@@ -307,7 +308,13 @@ typedef struct
307308
} st_u;
308309
} sorti_T;
309310

310-
static int sort_compare(const void *s1, const void *s2);
311+
static int
312+
string_compare(const void *s1, const void *s2)
313+
{
314+
if (sort_lc)
315+
return strcoll((char *)s1, (char *)s2);
316+
return sort_ic ? STRICMP(s1, s2) : STRCMP(s1, s2);
317+
}
311318

312319
static int
313320
sort_compare(const void *s1, const void *s2)
@@ -350,8 +357,7 @@ sort_compare(const void *s1, const void *s2)
350357
l2.st_u.line.end_col_nr - l2.st_u.line.start_col_nr + 1);
351358
sortbuf2[l2.st_u.line.end_col_nr - l2.st_u.line.start_col_nr] = 0;
352359

353-
result = sort_ic ? STRICMP(sortbuf1, sortbuf2)
354-
: STRCMP(sortbuf1, sortbuf2);
360+
result = string_compare(sortbuf1, sortbuf2);
355361
}
356362

357363
// If two lines have the same value, preserve the original line order.
@@ -398,7 +404,7 @@ ex_sort(exarg_T *eap)
398404
if (nrs == NULL)
399405
goto sortend;
400406

401-
sort_abort = sort_ic = sort_rx = sort_nr = 0;
407+
sort_abort = sort_ic = sort_lc = sort_rx = sort_nr = 0;
402408
#ifdef FEAT_FLOAT
403409
sort_flt = 0;
404410
#endif
@@ -409,6 +415,8 @@ ex_sort(exarg_T *eap)
409415
;
410416
else if (*p == 'i')
411417
sort_ic = TRUE;
418+
else if (*p == 'l')
419+
sort_lc = TRUE;
412420
else if (*p == 'r')
413421
sort_rx = TRUE;
414422
else if (*p == 'n')
@@ -614,8 +622,7 @@ ex_sort(exarg_T *eap)
614622
change_occurred = TRUE;
615623

616624
s = ml_get(get_lnum);
617-
if (!unique || i == 0
618-
|| (sort_ic ? STRICMP(s, sortbuf1) : STRCMP(s, sortbuf1)) != 0)
625+
if (!unique || i == 0 || string_compare(s, sortbuf1) != 0)
619626
{
620627
// Copy the line into a buffer, it may become invalid in
621628
// ml_append(). And it's needed for "unique".

src/list.c

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1516,6 +1516,7 @@ typedef struct
15161516
typedef struct
15171517
{
15181518
int item_compare_ic;
1519+
int item_compare_lc;
15191520
int item_compare_numeric;
15201521
int item_compare_numbers;
15211522
#ifdef FEAT_FLOAT
@@ -1594,10 +1595,10 @@ item_compare(const void *s1, const void *s2)
15941595
p2 = (char_u *)"";
15951596
if (!sortinfo->item_compare_numeric)
15961597
{
1597-
if (sortinfo->item_compare_ic)
1598-
res = STRICMP(p1, p2);
1598+
if (sortinfo->item_compare_lc)
1599+
res = strcoll((char *)p1, (char *)p2);
15991600
else
1600-
res = STRCMP(p1, p2);
1601+
res = sortinfo->item_compare_ic ? STRICMP(p1, p2): STRCMP(p1, p2);
16011602
}
16021603
else
16031604
{
@@ -1706,6 +1707,7 @@ do_sort_uniq(typval_T *argvars, typval_T *rettv, int sort)
17061707
goto theend; // short list sorts pretty quickly
17071708

17081709
info.item_compare_ic = FALSE;
1710+
info.item_compare_lc = FALSE;
17091711
info.item_compare_numeric = FALSE;
17101712
info.item_compare_numbers = FALSE;
17111713
#ifdef FEAT_FLOAT
@@ -1773,6 +1775,11 @@ do_sort_uniq(typval_T *argvars, typval_T *rettv, int sort)
17731775
info.item_compare_func = NULL;
17741776
info.item_compare_ic = TRUE;
17751777
}
1778+
else if (STRCMP(info.item_compare_func, "l") == 0)
1779+
{
1780+
info.item_compare_func = NULL;
1781+
info.item_compare_lc = TRUE;
1782+
}
17761783
}
17771784
}
17781785

src/testdir/test_sort.vim

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,25 @@ func Test_sort_strings()
1515
" numbers compared as strings
1616
call assert_equal([1, 2, 3], sort([3, 2, 1]))
1717
call assert_equal([13, 28, 3], sort([3, 28, 13]))
18+
19+
call assert_equal(['A', 'O', 'P', 'a', 'o', 'p', 'Ä', 'Ô', 'ä', 'ô', 'œ', 'œ'],
20+
\ sort(['A', 'O', 'P', 'a', 'o', 'p', 'Ä', 'Ô', 'ä', 'ô', 'œ', 'œ']))
21+
22+
call assert_equal(['A', 'a', 'o', 'O', 'p', 'P', 'Ä', 'Ô', 'ä', 'ô', 'œ', 'œ'],
23+
\ sort(['A', 'a', 'o', 'O', 'œ', 'œ', 'p', 'P', 'Ä', 'ä', 'ô', 'Ô'], 'i'))
24+
25+
let lc = execute('language collate')
26+
" With the following locales, the accentuated letters are ordered
27+
" similarly to the non-accentuated letters...
28+
if lc =~? '"\(en\|es\|de\|fr\|it\|nl\).*\.utf-\?8"'
29+
call assert_equal(['a', 'A', 'ä', 'Ä', 'o', 'O', 'ô', 'Ô', 'œ', 'œ', 'p', 'P'],
30+
\ sort(['A', 'a', 'o', 'O', 'œ', 'œ', 'p', 'P', 'Ä', 'ä', 'ô', 'Ô'], 'l'))
31+
" ... whereas with a Swedish locale, the accentuated letters are ordered
32+
" after Z.
33+
elseif lc =~? '"sv.*utf-\?8"'
34+
call assert_equal(['a', 'A', 'o', 'O', 'p', 'P', 'ä', 'Ä', 'œ', 'œ', 'ô', 'Ô'],
35+
\ sort(['A', 'a', 'o', 'O', 'œ', 'œ', 'p', 'P', 'Ä', 'ä', 'ô', 'Ô'], 'l'))
36+
endif
1837
endfunc
1938

2039
func Test_sort_numeric()
@@ -1204,6 +1223,57 @@ func Test_sort_cmd()
12041223
\ },
12051224
\ ]
12061225

1226+
" With the following locales, the accentuated letters are ordered
1227+
" similarly to the non-accentuated letters...
1228+
let lc = execute('language collate')
1229+
if lc =~? '"\(en\|es\|de\|fr\|it\|nl\).*\.utf-\?8"'
1230+
let tests += [
1231+
\ {
1232+
\ 'name' : 'sort with locale',
1233+
\ 'cmd' : '%sort l',
1234+
\ 'input' : [
1235+
\ 'A',
1236+
\ 'E',
1237+
\ 'O',
1238+
\ 'À',
1239+
\ 'È',
1240+
\ 'É',
1241+
\ 'Ô',
1242+
\ 'Œ',
1243+
\ 'Z',
1244+
\ 'a',
1245+
\ 'e',
1246+
\ 'o',
1247+
\ 'à',
1248+
\ 'è',
1249+
\ 'é',
1250+
\ 'ô',
1251+
\ 'œ',
1252+
\ 'z'
1253+
\ ],
1254+
\ 'expected' : [
1255+
\ 'a',
1256+
\ 'A',
1257+
\ 'à',
1258+
\ 'À',
1259+
\ 'e',
1260+
\ 'E',
1261+
\ 'é',
1262+
\ 'É',
1263+
\ 'è',
1264+
\ 'È',
1265+
\ 'o',
1266+
\ 'O',
1267+
\ 'ô',
1268+
\ 'Ô',
1269+
\ 'œ',
1270+
\ 'Œ',
1271+
\ 'z',
1272+
\ 'Z'
1273+
\ ]
1274+
\ },
1275+
\ ]
1276+
endif
12071277
if has('float')
12081278
let tests += [
12091279
\ {

src/version.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -750,6 +750,8 @@ static char *(features[]) =
750750

751751
static int included_patches[] =
752752
{ /* Add new patch number below this line */
753+
/**/
754+
1933,
753755
/**/
754756
1932,
755757
/**/

0 commit comments

Comments
 (0)