Skip to content

Commit 23059da

Browse files
authored
fix: Add option to not auto stem during split (#4310)
1 parent fae4975 commit 23059da

5 files changed

Lines changed: 45 additions & 33 deletions

File tree

packages/cspell-tools/src/compiler/legacyLineToWords.test.ts

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -14,23 +14,23 @@ describe('Validate legacyLineToWords', () => {
1414
});
1515

1616
test.each`
17-
line | expectedResult
18-
${'hello'} | ${['hello']}
19-
${'AppendIterator::getArrayIterator'} | ${['append', 'iterator', 'get', 'array']}
20-
${'Austin Martin'} | ${['austin', 'martin']}
21-
${'JPEGsBLOBs'} | ${['jpegs', 'blobs']}
22-
${'CURLs CURLing' /* Sadly we cannot do this one correctly */} | ${['curls', 'curling']}
23-
${'DNSTable Lookup'} | ${['dns', 'table', 'lookup']}
24-
${'OUTRing'} | ${['outring']}
25-
${'OUTRings'} | ${['outrings']}
26-
${'DIRs'} | ${['dirs']}
27-
${'AVGAspect'} | ${['avg', 'aspect']}
28-
${'New York'} | ${['new', 'york']}
29-
${'Namespace DNSLookup'} | ${['namespace', 'dns', 'lookup']}
30-
${'well-educated'} | ${['well', 'educated']}
31-
${'CURLcode'} | ${['cur', 'lcode']}
32-
${'kDNSServiceErr_BadSig'} | ${['k', 'dns', 'service', 'err', 'bad', 'sig']}
33-
${'apd_get_active_symbols'} | ${['apd', 'get', 'active', 'symbols']}
17+
line | expectedResult
18+
${'hello'} | ${['hello']}
19+
${'AppendIterator::getArrayIterator'} | ${['append', 'iterator', 'get', 'array']}
20+
${'Austin Martin'} | ${['austin', 'martin']}
21+
${'JPEGSBlobs'} | ${['jpegs', 'blobs']}
22+
${'CURLS Curling'} | ${['curls', 'curling']}
23+
${'DNSTable Lookup'} | ${['dns', 'table', 'lookup']}
24+
${'OUTRing'} | ${['out', 'ring']}
25+
${'OUTRings'} | ${['out', 'rings']}
26+
${'DIRs'} | ${['di', 'rs']}
27+
${'AVGAspect'} | ${['avg', 'aspect']}
28+
${'New York'} | ${['new', 'york']}
29+
${'Namespace DNSLookup'} | ${['namespace', 'dns', 'lookup']}
30+
${'well-educated'} | ${['well', 'educated']}
31+
${'CURLcode'} | ${['cur', 'lcode']}
32+
${'kDNSServiceErr_BadSig'} | ${['k', 'dns', 'service', 'err', 'bad', 'sig']}
33+
${'apd_get_active_symbols'} | ${['apd', 'get', 'active', 'symbols']}
3434
`('legacy splitting lines $line', ({ line, expectedResult }: { line: string; expectedResult: string[] }) => {
3535
expect([...pipe(legacyLineToWords(line, false, allowed), opFilter(distinct()))]).toEqual(expectedResult);
3636
});

packages/cspell-tools/src/compiler/splitCamelCaseIfAllowed.test.ts

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,19 @@ import { splitCamelCaseIfAllowed } from './splitCamelCaseIfAllowed';
33

44
describe('splitCamelCaseIfAllowed', () => {
55
test.each`
6-
text | keepCase | allowed | expected
7-
${''} | ${false} | ${undefined} | ${[]}
8-
${'hello'} | ${false} | ${undefined} | ${['hello']}
9-
${'helloThere'} | ${false} | ${['hello', 'there']} | ${['hello', 'there']}
10-
${'helloThere'} | ${false} | ${['hello', 'There']} | ${['hello', 'There']}
11-
${'helloThere'} | ${true} | ${['hello', 'There']} | ${['hello', 'There']}
12-
${'ERRORCode'} | ${false} | ${['error', 'code']} | ${['error', 'code']}
13-
${'ERRORCode'} | ${true} | ${['error', 'code']} | ${['ERROR', 'code']}
14-
${'ERRORCode'} | ${true} | ${['code']} | ${['ERRORCode']}
15-
${'ERRORCode'} | ${false} | ${['code']} | ${['ERRORCode']}
16-
${'ErrorCode'} | ${true} | ${['error', 'code']} | ${['error', 'code']}
6+
text | keepCase | allowed | expected
7+
${''} | ${false} | ${undefined} | ${[]}
8+
${'hello'} | ${false} | ${undefined} | ${['hello']}
9+
${'helloThere'} | ${false} | ${['hello', 'there']} | ${['hello', 'there']}
10+
${'helloThere'} | ${false} | ${['hello', 'There']} | ${['hello', 'There']}
11+
${'helloThere'} | ${true} | ${['hello', 'There']} | ${['hello', 'There']}
12+
${'ERRORCode'} | ${false} | ${['error', 'code']} | ${['error', 'code']}
13+
${'ERRORCode'} | ${true} | ${['error', 'code']} | ${['ERROR', 'code']}
14+
${'ERRORCode'} | ${true} | ${['code']} | ${['ERRORCode']}
15+
${'ERRORCode'} | ${false} | ${['code']} | ${['ERRORCode']}
16+
${'ErrorCode'} | ${true} | ${['error', 'code']} | ${['error', 'code']}
17+
${'xmlUCSIsCatZ'} | ${true} | ${['xml', 'UCS', 'is', 'cat', 'z']} | ${['xml', 'UCS', 'is', 'cat', 'z']}
18+
${'ADP_ConnectionStateMsg_Closed'} | ${true} | ${undefined} | ${['ADP', 'connection', 'state', 'msg', 'closed']}
1719
`('splitCamelCaseIfAllowed $text $keepCase $allowed', ({ text, keepCase, allowed, expected }) => {
1820
allowed = createAllowedSplitWords(allowed);
1921
expect(splitCamelCaseIfAllowed(text, allowed, keepCase)).toEqual(expected);

packages/cspell-tools/src/compiler/splitCamelCaseIfAllowed.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ function isUnknown(word: string, allowedWords: AllowedSplitWordsCollection): boo
3434
}
3535

3636
function splitCamelCase(word: string): Iterable<string> {
37-
const splitWords = Text.splitCamelCaseWord(word).filter((word) => !regExpIsNumber.test(word));
37+
const splitWords = Text.splitCamelCaseWord(word, false).filter((word) => !regExpIsNumber.test(word));
3838
// We only want to preserve this: "New York" and not "Namespace DNSLookup"
3939
if (splitWords.length > 1 && regExpSpaceOrDash.test(word)) {
4040
return splitWords.flatMap((w) => w.split(regExpSpaceOrDash));

packages/cspell-tools/src/compiler/text.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@ const regExSplitWords2 = /(\p{Lu})(\p{Lu}\p{Ll})/gu;
77
/**
88
* Split camelCase words into an array of strings.
99
*/
10-
export function splitCamelCaseWord(word: string): string[] {
11-
const wPrime = word.replace(regExUpperSOrIng, (s) => s[0] + s.slice(1).toLowerCase());
10+
export function splitCamelCaseWord(word: string, autoStem = true): string[] {
11+
const wPrime = autoStem ? word.replace(regExUpperSOrIng, (s) => s[0] + s.slice(1).toLowerCase()) : word;
1212
const pass1 = wPrime.replace(regExSplitWords, '$1|$2');
1313
const pass2 = pass1.replace(regExSplitWords2, '$1|$2');
1414
const pass3 = pass2.replace(/[\d_]+/g, '|');

packages/cspell-tools/src/compiler/wordListCompiler.ts

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,23 @@ export async function compileWordList(
2323
destFilename: string,
2424
options: CompileOptions
2525
): Promise<void> {
26-
const filter = normalizeTargetWords(options);
26+
const finalLines = normalize(lines, options);
2727

28-
const finalSeq = pipe(wordListHeaderLines, opAppend(pipe(lines, filter)));
28+
const finalSeq = pipe(wordListHeaderLines, opAppend(finalLines));
2929

3030
return createWordListTarget(destFilename)(finalSeq);
3131
}
3232

33+
function normalize(lines: Iterable<string>, options: CompileOptions): Iterable<string> {
34+
const filter = normalizeTargetWords(options);
35+
36+
const iter = pipe(lines, filter);
37+
if (!options.sort) return iter;
38+
39+
const result = new Set(iter);
40+
return [...result].sort();
41+
}
42+
3343
function createWordListTarget(destFilename: string): (seq: Iterable<string>) => Promise<void> {
3444
const target = createTarget(destFilename);
3545
return (seq: Iterable<string>) =>

0 commit comments

Comments
 (0)