-
-
Notifications
You must be signed in to change notification settings - Fork 118
fix: Support string prefixes when walking nodes #8259
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Performance ReportDaily Performancexychart-beta
title Files Per Second by Day
y-axis Files per Second
x-axis Date [Dec-2, Dec-8, Dec-9, Dec-10, Dec-13, Dec-14, Dec-15, Dec-16, Dec-20, Dec-22, Dec-23, Dec-24, Dec-27, Dec-28, Dec-29, Dec-30, Jan-1]
bar [158.41, 158.24, 160.26, 160.13, 162.46, 160.30, 159.32, 159.99, 160.32, 171.41, 173.30, 173.54, 170.88, 173.10, 172.87, 170.63, 170.61]
line [39.93, 37.87, 36.13, 38.10, 36.47, 38.38, 37.81, 37.32, 38.13, 39.99, 39.24, 39.22, 39.13, 37.99, 37.43, 37.30, 37.04]
line [49.95, 51.18, 48.94, 51.93, 50.78, 49.91, 50.66, 50.34, 50.02, 51.61, 51.21, 51.38, 49.64, 48.60, 49.52, 48.09, 48.77]
line [80.27, 76.65, 75.94, 76.12, 76.57, 75.62, 71.86, 75.70, 72.52, 79.52, 79.75, 79.57, 78.19, 78.53, 81.17, 72.47, 77.74]
line [115.87, 112.86, 110.11, 114.79, 114.58, 111.22, 109.33, 106.79, 113.91, 116.30, 113.89, 119.27, 121.25, 119.32, 118.60, 119.88, 116.21]
line [6.12, 6.00, 6.03, 6.11, 5.69, 6.12, 5.85, 6.27, 5.94, 6.19, 6.23, 6.06, 5.80, 6.19, 6.04, 6.09, 6.09]
line [44.42, 44.87, 45.90, 44.67, 46.00, 48.05, 47.07, 45.85, 42.75, 47.52, 47.21, 45.81, 45.88, 45.34, 46.09, 44.04, 43.54]
line [42.00, 46.02, 43.67, 43.68, 45.02, 45.75, 43.43, 44.94, 44.85, 45.35, 43.98, 45.75, 48.03, 44.47, 43.53, 43.39, 41.42]
line [165.94, 166.49, 162.27, 148.64, 164.74, 167.10, 169.90, 168.25, 171.20, 165.98, 169.49, 175.43, 173.51, 166.13, 173.69, 176.28, 174.84]
line [115.04, 119.17, 112.82, 116.99, 116.61, 109.61, 115.93, 113.66, 115.36, 122.68, 121.26, 119.81, 119.36, 122.21, 121.33, 118.71, 116.01]
line [31.76, 31.99, 31.99, 30.84, 32.08, 31.89, 31.67, 31.01, 31.00, 31.79, 31.95, 32.72, 33.00, 31.56, 31.04, 31.19, 31.55]
line [343.35, 333.69, 321.17, 305.21, 329.78, 336.28, 339.96, 335.03, 330.81, 346.52, 335.23, 343.48, 342.97, 326.49, 346.96, 341.41, 344.23]
line [125.41, 124.96, 118.90, 125.77, 122.50, 124.97, 121.63, 124.44, 114.40, 117.66, 113.42, 116.57, 120.86, 116.04, 115.61, 111.25, 113.22]
line [95.57, 93.03, 94.71, 95.93, 94.05, 97.42, 87.40, 93.20, 92.02, 98.77, 95.06, 94.41, 95.55, 97.07, 96.22, 92.57, 87.74]
line [126.68, 128.02, 124.91, 128.75, 126.25, 125.99, 122.54, 125.77, 120.95, 129.07, 131.35, 128.71, 129.73, 130.21, 126.25, 125.32, 125.33]
line [192.21, 203.69, 197.75, 183.65, 201.42, 192.31, 202.71, 196.90, 201.49, 199.06, 199.67, 194.62, 190.28, 203.79, 203.98, 206.16, 175.26]
line [13.55, 14.37, 14.53, 14.61, 14.25, 14.42, 14.66, 14.20, 14.14, 14.58, 14.66, 13.81, 14.51, 13.39, 13.58, 14.12, 14.45]
line [96.80, 90.09, 95.39, 94.62, 99.56, 95.79, 93.99, 95.03, 91.66, 95.50, 94.32, 94.29, 94.73, 94.34, 95.29, 95.11, 97.19]
line [70.66, 70.99, 71.64, 73.38, 70.10, 71.99, 68.96, 71.11, 72.00, 74.62, 76.21, 75.66, 76.28, 80.16, 75.45, 77.29, 77.59]
line [65.65, 62.39, 60.36, 64.84, 65.11, 62.93, 65.98, 63.02, 64.70, 64.39, 64.92, 64.92, 65.76, 63.97, 65.18, 60.36, 60.68]
line [193.86, 202.32, 202.37, 184.12, 199.02, 199.50, 195.58, 198.56, 202.04, 205.76, 202.04, 203.32, 201.04, 202.44, 203.10, 195.80, 206.34]
line [18.09, 18.49, 18.92, 19.04, 18.41, 18.71, 17.35, 18.44, 18.66, 19.39, 19.24, 18.95, 18.61, 19.20, 19.16, 18.64, 18.42]
line [115.04, 114.43, 117.93, 116.17, 113.85, 113.19, 111.86, 113.23, 116.22, 119.95, 116.76, 117.75, 118.71, 118.58, 117.32, 112.42, 113.66]
line [31.13, 29.50, 29.81, 30.70, 31.34, 29.78, 30.86, 29.89, 30.20, 33.22, 32.90, 33.40, 32.96, 33.38, 32.90, 31.64, 31.57]
line [133.58, 127.82, 130.48, 134.87, 129.94, 136.83, 140.59, 128.00, 132.13, 140.04, 140.36, 136.98, 135.57, 135.50, 134.25, 129.01, 136.31]
line [206.67, 213.42, 215.72, 212.77, 206.57, 216.35, 210.81, 207.51, 213.81, 235.09, 232.12, 236.13, 235.71, 228.72, 232.65, 236.79, 237.17]
line [296.47, 313.58, 317.30, 295.96, 306.54, 308.67, 318.29, 308.47, 312.68, 311.01, 311.87, 322.19, 317.56, 317.24, 315.38, 325.99, 310.60]
line [197.01, 195.46, 191.88, 199.25, 200.18, 192.55, 195.85, 196.96, 197.31, 203.68, 215.88, 203.82, 209.47, 208.55, 206.80, 208.32, 205.46]
line [240.23, 229.14, 227.27, 231.37, 242.80, 219.56, 233.62, 231.32, 236.32, 236.26, 240.94, 253.96, 249.85, 237.45, 249.30, 236.26, 249.21]
line [76.02, 71.59, 73.55, 75.54, 73.95, 75.07, 76.77, 72.26, 69.84, 77.03, 74.27, 74.27, 73.83, 75.96, 75.41, 73.52, 72.02]
line [24.46, 23.99, 23.97, 23.56, 24.27, 23.97, 21.89, 24.77, 24.75, 25.98, 24.78, 24.02, 24.07, 23.90, 24.05, 23.79, 21.97]
line [190.71, 193.15, 186.55, 193.19, 192.23, 185.43, 191.26, 188.26, 189.10, 196.56, 199.42, 199.83, 198.90, 201.82, 194.83, 201.20, 201.62]
line [215.07, 216.21, 215.29, 206.31, 212.61, 210.51, 204.56, 210.18, 211.34, 200.66, 223.22, 223.16, 220.54, 207.84, 221.69, 222.09, 222.75]
line [143.55, 138.09, 144.82, 146.07, 144.49, 139.02, 141.78, 142.36, 140.50, 151.95, 150.33, 152.02, 150.72, 154.90, 153.82, 147.24, 152.57]
line [309.39, 295.84, 293.27, 283.38, 296.25, 298.40, 299.27, 280.90, 293.45, 278.89, 296.00, 294.23, 289.77, 295.71, 297.33, 290.64, 294.23]
line [189.43, 187.46, 185.27, 181.05, 182.93, 185.74, 173.94, 185.68, 184.19, 188.28, 188.11, 196.54, 189.53, 183.55, 193.13, 194.30, 184.36]
line [226.03, 235.13, 236.77, 244.11, 232.83, 233.41, 237.39, 235.87, 236.73, 241.85, 239.59, 240.67, 245.15, 245.04, 246.20, 235.52, 241.61]
line [84.05, 80.02, 85.30, 82.71, 84.32, 82.91, 77.95, 81.87, 81.89, 89.30, 90.84, 89.17, 86.07, 86.04, 90.94, 91.20, 87.20]
line [221.75, 237.38, 239.25, 224.99, 230.85, 236.16, 237.46, 237.06, 237.81, 238.36, 240.26, 242.42, 235.97, 234.05, 235.94, 244.60, 245.97]
line [68.01, 65.32, 69.78, 72.32, 70.99, 69.49, 68.20, 68.29, 71.18, 72.28, 72.81, 72.16, 72.22, 72.29, 71.63, 70.43, 69.28]
line [205.06, 206.03, 204.37, 194.91, 205.14, 200.36, 207.64, 202.85, 200.31, 210.29, 210.04, 207.76, 200.78, 205.77, 199.55, 199.57, 202.59]
line [21.90, 22.93, 23.26, 21.71, 22.48, 22.26, 24.16, 21.86, 22.99, 23.86, 22.95, 22.15, 22.59, 22.93, 22.71, 21.96, 21.67]
line [155.06, 164.08, 161.66, 171.02, 169.99, 161.97, 169.28, 168.79, 164.45, 163.90, 173.08, 173.52, 171.67, 165.07, 170.82, 168.89, 165.34]
line [147.19, 142.70, 144.08, 143.97, 142.44, 148.72, 148.74, 148.66, 145.81, 145.83, 151.65, 150.82, 149.41, 147.54, 148.32, 148.24, 146.69]
line [161.03, 163.37, 162.01, 157.69, 162.41, 162.92, 155.61, 157.24, 162.25, 174.81, 169.46, 179.44, 172.82, 176.92, 167.34, 176.43, 175.08]
line [182.47, 179.95, 179.31, 188.38, 180.77, 185.77, 186.35, 184.33, 182.33, 191.53, 189.75, 189.98, 177.07, 192.83, 190.79, 182.79, 188.82]
line [108.44, 104.81, 110.68, 102.65, 110.81, 107.51, 105.11, 109.29, 110.23, 118.20, 116.66, 113.51, 116.56, 110.18, 116.43, 113.57, 116.22]
line [367.49, 374.69, 369.34, 374.83, 359.05, 350.76, 385.22, 356.23, 361.41, 363.52, 371.74, 360.07, 349.58, 358.00, 357.09, 358.22, 370.47]
line [148.12, 146.38, 144.41, 148.76, 149.26, 148.44, 146.25, 144.57, 148.51, 160.26, 155.20, 157.30, 154.76, 157.76, 156.45, 156.97, 153.00]
line [153.99, 150.95, 152.99, 153.72, 160.91, 157.90, 164.74, 161.04, 164.69, 164.20, 175.57, 172.74, 173.87, 177.24, 172.99, 166.08, 170.50]
line [136.51, 139.19, 142.56, 144.70, 146.06, 142.91, 137.61, 141.27, 139.39, 164.22, 164.25, 165.65, 160.55, 167.52, 164.35, 162.61, 160.24]
Time to Process Files
Note:
Files per Second over Time
Data Throughput
|
The idea is to reduce the number of single character node. For US English this reduced the number of nodes from 72.8k to 52.3k and the size from 950k to 825k.
e0fb141 to
8276bc8
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Pull request overview
This pull request adds support for string prefixes in trie node walking to optimize trie storage and traversal. The implementation introduces a string table to store common prefixes that can be attached to nodes, reducing redundancy in single-child node chains.
Key changes:
- Introduced prefix matching logic and string table infrastructure to support prefix-based node optimization
- Modified trie node walking algorithms to handle prefix strings stored in a separate string table
- Updated binary encoding/decoding to include string table data
Reviewed changes
Copilot reviewed 15 out of 16 changed files in this pull request and generated 8 comments.
Show a summary per file
| File | Description |
|---|---|
| packages/cspell-trie-lib/src/lib/TrieBlob/prefix.ts | New file implementing prefix matching functions for UTF-8 encoded byte sequences |
| packages/cspell-trie-lib/src/lib/TrieBlob/optimizeNodes.ts | Refactored to support string table-based node optimization by collapsing single-child chains into prefix strings |
| packages/cspell-trie-lib/src/lib/TrieBlob/Utf8.ts | Added decodeBytesToString method to Utf8Accumulator for converting byte arrays to strings |
| packages/cspell-trie-lib/src/lib/TrieBlob/TypedArray.ts | New file defining type aliases for typed arrays |
| packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlobInfo.ts | Added stringTable field to support prefix storage |
| packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlobFormat.ts | Added constants for prefix mask, shift, and bits in node header format |
| packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlobEncoder.ts | Extended encoding/decoding to include string table binary data |
| packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlob.ts | Updated node finding and word iteration to match and apply prefix strings |
| packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlobInternals.ts | Added stringTable parameter to constructor and field |
| packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlobBuilder.ts | Integrated string table into trie building and optimization process |
| packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlob.ts | Updated word iteration to apply prefix strings from string table |
| packages/cspell-trie-lib/src/lib/StringTable/StringTable.ts | Enhanced with length getter, values method, and optional endian parameter for binary operations |
| packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlobBuilder.test.ts | Skipped tests related to optimization features |
| packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlob.test.ts | Minor test adjustments and removed test method calls |
| packages/cspell-trie-lib/src/lib/TrieBlob/snapshots/TrieBlob.test.ts.snap | Updated binary format snapshots reflecting string table addition |
| packages/cspell-tools/src/snapshots/build.test.ts.snap | New snapshot file for build action tests |
Comments suppressed due to low confidence (1)
packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlobBuilder.test.ts:198
- An entire test suite has been skipped without explanation. If these optimization tests are not working with the new prefix feature, they should be updated to work with the new implementation or removed entirely. Skipped test suites significantly reduce code coverage and confidence in the changes.
describe.skip('optimization', () => {
test.each`
comment | words
${'single word'} | ${['optimization']}
${'multiple words'} | ${['optimization', 'optimize']}
${'multiple words shared endings'} | ${['optimization', 'vacation', 'sensation']}
${'sampleWords()'} | ${sampleWords()}
`('optimize $comment $words', ({ words }) => {
const sortedUnique = [...new Set(words)].sort();
const ft = FastTrieBlobBuilder.fromWordList(words, undefined, true);
expect([...ft.words()]).toEqual(sortedUnique);
});
});
💡 Add Copilot custom instructions for smarter, more guided reviews. Learn how to get started.
packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlobBuilder.test.ts
Outdated
Show resolved
Hide resolved
packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlobBuilder.ts
Outdated
Show resolved
Hide resolved
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Pull request overview
Copilot reviewed 20 out of 21 changed files in this pull request and generated 6 comments.
💡 Add Copilot custom instructions for smarter, more guided reviews. Learn how to get started.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Pull request overview
Copilot reviewed 22 out of 23 changed files in this pull request and generated 7 comments.
💡 Add Copilot custom instructions for smarter, more guided reviews. Learn how to get started.
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Signed-off-by: Jason Dent <Jason3S@users.noreply.github.com>
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Pull request overview
Copilot reviewed 23 out of 24 changed files in this pull request and generated 9 comments.
💡 Add Copilot custom instructions for smarter, more guided reviews. Learn how to get started.
| } | ||
|
|
||
| export function encodeStringTableToBinary(table: StringTable, endian: 'LE' | 'BE'): U8Array { | ||
| export function encodeStringTableToBinary(table: StringTable, endian?: 'LE' | 'BE'): U8Array { |
Copilot
AI
Dec 31, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The endian parameter has been made optional with no default value specified. When undefined is passed to BinaryDataReader or BinaryDataBuilder constructors, it's unclear what endianness will be used. This could lead to platform-dependent behavior or errors. Either provide a default value (e.g., 'LE') or document the behavior when endian is undefined.
| } | ||
|
|
||
| export function decodeStringTableFromBinary(data: U8Array, endian: 'LE' | 'BE'): StringTable { | ||
| export function decodeStringTableFromBinary(data: U8Array, endian?: 'LE' | 'BE'): StringTable { |
Copilot
AI
Dec 31, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The same endian parameter issue exists here - when undefined, the behavior is unclear. This should either have a documented default or require an explicit value.
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Signed-off-by: Jason Dent <Jason3S@users.noreply.github.com>
No description provided.