Skip to content

Commit 819f1ac

Browse files
Merge commit from fork
* fix: better encoding for sparse arrays * misc improvements * Update src/stringify.js Co-authored-by: Rich Harris <richard.a.harris@gmail.com> * nits * rename * more validity, less bad --------- Co-authored-by: Rich Harris <richard.a.harris@gmail.com>
1 parent 0f04d4d commit 819f1ac

10 files changed

Lines changed: 378 additions & 12 deletions

File tree

.changeset/young-ads-push.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"devalue": patch
3+
---
4+
5+
fix: better encoding for sparse arrays

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
"changeset:version": "changeset version",
3030
"changeset:publish": "changeset publish",
3131
"build": "dts-buddy",
32-
"test": "uvu test",
32+
"test": "uvu",
3333
"prepublishOnly": "npm test && npm run build && publint"
3434
},
3535
"license": "MIT",

src/constants.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,4 @@ export const NAN = -3;
44
export const POSITIVE_INFINITY = -4;
55
export const NEGATIVE_INFINITY = -5;
66
export const NEGATIVE_ZERO = -6;
7+
export const SPARSE = -7;

src/parse.js

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import {
55
NEGATIVE_INFINITY,
66
NEGATIVE_ZERO,
77
POSITIVE_INFINITY,
8+
SPARSE,
89
UNDEFINED
910
} from './constants.js';
1011

@@ -201,6 +202,17 @@ export function unflatten(parsed, revivers) {
201202
default:
202203
throw new Error(`Unknown type ${type}`);
203204
}
205+
} else if (value[0] === SPARSE) {
206+
// Sparse array encoding: [SPARSE, length, idx, val, idx, val, ...]
207+
const len = value[1];
208+
209+
const array = new Array(len);
210+
hydrated[index] = array;
211+
212+
for (let i = 2; i < value.length; i += 2) {
213+
const idx = value[i];
214+
array[idx] = hydrate(value[i + 1]);
215+
}
204216
} else {
205217
const array = new Array(value.length);
206218
hydrated[index] = array;

src/stringify.js

Lines changed: 71 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,16 @@ import {
55
is_plain_object,
66
is_primitive,
77
stringify_key,
8-
stringify_string
8+
stringify_string,
9+
valid_array_indices
910
} from './utils.js';
1011
import {
1112
HOLE,
1213
NAN,
1314
NEGATIVE_INFINITY,
1415
NEGATIVE_ZERO,
1516
POSITIVE_INFINITY,
17+
SPARSE,
1618
UNDEFINED
1719
} from './constants.js';
1820
import { encode64 } from './base64.js';
@@ -105,24 +107,89 @@ export function stringify(value, reducers) {
105107
: `["RegExp",${stringify_string(source)}]`;
106108
break;
107109

108-
case 'Array':
110+
case 'Array': {
111+
// For dense arrays (no holes), we iterate normally.
112+
// When we encounter the first hole, we call Object.keys
113+
// to determine the sparseness, then decide between:
114+
// - HOLE encoding: [-2, val, -2, ...] (default)
115+
// - Sparse encoding: [-7, length, idx, val, ...] (for very sparse arrays)
116+
// Only the sparse path avoids iterating every slot, which
117+
// is what protects against the DoS of e.g. `arr[1000000] = 1`.
118+
let mostly_dense = false;
119+
109120
str = '[';
110121

111122
for (let i = 0; i < thing.length; i += 1) {
112123
if (i > 0) str += ',';
113124

114-
if (i in thing) {
125+
if (Object.hasOwn(thing, i)) {
115126
keys.push(`[${i}]`);
116127
str += flatten(thing[i]);
117128
keys.pop();
118-
} else {
129+
} else if (mostly_dense) {
130+
// Use dense encoding. The heuristic guarantees the
131+
// array is only mildly sparse, so iterating over every
132+
// slot is fine.
119133
str += HOLE;
134+
} else {
135+
// Decide between HOLE encoding and sparse encoding.
136+
//
137+
// HOLE encoding: each hole is serialized as the HOLE
138+
// sentinel (-2). For example, [, "a", ,] becomes
139+
// [-2, 0, -2]. Each hole costs 3 chars ("-2" + comma).
140+
//
141+
// Sparse encoding: lists only populated indices.
142+
// For example, [, "a", ,] becomes [-7, 3, 1, 0] — the
143+
// -7 sentinel, the array length (3), then index-value
144+
// pairs. This avoids paying per-hole, but each element
145+
// costs extra chars to write its index.
146+
//
147+
// The values are the same size either way, so the
148+
// choice comes down to structural overhead:
149+
//
150+
// HOLE overhead:
151+
// 3 chars per hole ("-2" + comma)
152+
// = (L - P) * 3
153+
//
154+
// Sparse overhead:
155+
// "-7," — 3 chars (sparse sentinel + comma)
156+
// + length + "," — (d + 1) chars (array length + comma)
157+
// + per element: index + "," — (d + 1) chars
158+
// = (4 + d) + P * (d + 1)
159+
//
160+
// where L is the array length, P is the number of
161+
// populated elements, and d is the number of digits
162+
// in L (an upper bound on the digits in any index).
163+
//
164+
// Sparse encoding is cheaper when:
165+
// (4 + d) + P * (d + 1) < (L - P) * 3
166+
const populated_keys = valid_array_indices(/** @type {any[]} */ (thing));
167+
const population = populated_keys.length;
168+
const d = String(thing.length).length;
169+
170+
const hole_cost = (thing.length - population) * 3;
171+
const sparse_cost = 4 + d + population * (d + 1);
172+
173+
if (hole_cost > sparse_cost) {
174+
str = '[' + SPARSE + ',' + thing.length;
175+
for (let j = 0; j < populated_keys.length; j++) {
176+
const key = populated_keys[j];
177+
keys.push(`[${key}]`);
178+
str += ',' + key + ',' + flatten(thing[key]);
179+
keys.pop();
180+
}
181+
break;
182+
} else {
183+
mostly_dense = true;
184+
str += HOLE;
185+
}
120186
}
121187
}
122188

123189
str += ']';
124190

125191
break;
192+
}
126193

127194
case 'Set':
128195
str = '["Set"';

src/uneval.js

Lines changed: 80 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@ import {
66
is_plain_object,
77
is_primitive,
88
stringify_key,
9-
stringify_string
9+
stringify_string,
10+
valid_array_indices
1011
} from './utils.js';
1112

1213
const chars = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_$';
@@ -199,12 +200,85 @@ export function uneval(value, replacer) {
199200
case 'URLSearchParams':
200201
return `new URLSearchParams(${stringify_string(thing.toString())})`;
201202

202-
case 'Array':
203-
const members = /** @type {any[]} */ (thing).map((v, i) =>
204-
i in thing ? stringify(v) : ''
205-
);
203+
case 'Array': {
204+
// For dense arrays (no holes), we iterate normally.
205+
// When we encounter the first hole, we call Object.keys
206+
// to determine the sparseness, then decide between:
207+
// - Array literal with holes: [,"a",,] (default)
208+
// - Object.assign: Object.assign(Array(n),{...}) (for very sparse arrays)
209+
// Only the Object.assign path avoids iterating every slot, which
210+
// is what protects against the DoS of e.g. `arr[1000000] = 1`.
211+
let has_holes = false;
212+
213+
let result = '[';
214+
215+
for (let i = 0; i < thing.length; i += 1) {
216+
if (i > 0) result += ',';
217+
218+
if (Object.hasOwn(thing, i)) {
219+
result += stringify(thing[i]);
220+
} else if (!has_holes) {
221+
// Decide between array literal and Object.assign.
222+
//
223+
// Array literal: holes are consecutive commas.
224+
// For example, [, "a", ,] is written as [,"a",,].
225+
// Each hole costs 1 char (a comma).
226+
//
227+
// Object.assign: populated indices are listed explicitly.
228+
// For example, [, "a", ,] would be written as
229+
// Object.assign(Array(3),{1:"a"}). This avoids paying
230+
// per-hole, but has a large fixed overhead for the
231+
// "Object.assign(Array(n),{...})" wrapper, and each
232+
// element costs extra chars for its index and colon.
233+
//
234+
// The serialized values are the same size either way, so
235+
// the choice comes down to the structural overhead:
236+
//
237+
// Array literal overhead:
238+
// 1 char per element or hole (comma separators)
239+
// + 2 chars for "[" and "]"
240+
// = L + 2
241+
//
242+
// Object.assign overhead:
243+
// "Object.assign(Array(" — 20 chars
244+
// + length — d chars
245+
// + "),{" — 3 chars
246+
// + for each populated element:
247+
// index + ":" + "," — (d + 2) chars
248+
// + "})" — 2 chars
249+
// = (25 + d) + P * (d + 2)
250+
//
251+
// where L is the array length, P is the number of
252+
// populated elements, and d is the number of digits
253+
// in L (an upper bound on the digits in any index).
254+
//
255+
// Object.assign is cheaper when:
256+
// (25 + d) + P * (d + 2) < L + 2
257+
const populated_keys = valid_array_indices(/** @type {any[]} */ (thing));
258+
const population = populated_keys.length;
259+
const d = String(thing.length).length;
260+
261+
const hole_cost = thing.length + 2;
262+
const sparse_cost = (25 + d) + population * (d + 2);
263+
264+
if (hole_cost > sparse_cost) {
265+
const entries = populated_keys
266+
.map((k) => `${k}:${stringify(thing[k])}`)
267+
.join(',');
268+
return `Object.assign(Array(${thing.length}),{${entries}})`;
269+
}
270+
271+
// Re-process this index as a hole in the array literal
272+
has_holes = true;
273+
i -= 1;
274+
}
275+
// else: already decided on array literal, hole is just an empty slot
276+
// (the comma separator is all we need — no content for this position)
277+
}
278+
206279
const tail = thing.length === 0 || thing.length - 1 in thing ? '' : ',';
207-
return `[${members.join(',')}${tail}]`;
280+
return result + tail + ']';
281+
}
208282

209283
case 'Set':
210284
case 'Map':

src/utils.js

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,3 +116,33 @@ const is_identifier = /^[a-zA-Z_$][a-zA-Z_$0-9]*$/;
116116
export function stringify_key(key) {
117117
return is_identifier.test(key) ? '.' + key : '[' + JSON.stringify(key) + ']';
118118
}
119+
120+
/** @param {string} s */
121+
function is_valid_array_index(s) {
122+
if (s.length === 0) return false;
123+
if (s.length > 1 && s.charCodeAt(0) === 48) return false; // leading zero
124+
for (let i = 0; i < s.length; i++) {
125+
const c = s.charCodeAt(i);
126+
if (c < 48 || c > 57) return false;
127+
}
128+
// by this point we know it's a string of digits, but it has to be within the range of valid array indices
129+
const n = +s;
130+
if (n >= 2 ** 32 - 1) return false;
131+
if (n < 0) return false;
132+
return true;
133+
}
134+
135+
/**
136+
* Finds the populated indices of an array.
137+
* @param {unknown[]} array
138+
*/
139+
export function valid_array_indices(array) {
140+
const keys = Object.keys(array);
141+
for (var i = keys.length - 1; i >= 0; i--) {
142+
if (is_valid_array_index(keys[i])) {
143+
break;
144+
}
145+
}
146+
keys.length = i + 1;
147+
return keys;
148+
}

src/utils.test.js

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
import * as assert from 'uvu/assert';
2+
import { suite } from 'uvu';
3+
import { valid_array_indices } from './utils.js';
4+
5+
const test = suite('valid_array_indices');
6+
7+
test('returns all indices for a normal dense array', () => {
8+
const arr = ['a', 'b', 'c'];
9+
assert.equal(valid_array_indices(arr), ['0', '1', '2']);
10+
});
11+
12+
test('returns empty array for an empty array', () => {
13+
assert.equal(valid_array_indices([]), []);
14+
});
15+
16+
test('returns populated indices for a sparse array', () => {
17+
const arr = [, 'b', ,];
18+
assert.equal(valid_array_indices(arr), ['1']);
19+
});
20+
21+
test('strips non-numeric properties from a dense array', () => {
22+
const arr = ['a', 'b'];
23+
arr.foo = 'x';
24+
arr.bar = 42;
25+
assert.equal(valid_array_indices(arr), ['0', '1']);
26+
});
27+
28+
test('strips non-numeric properties from a very sparse array', () => {
29+
const arr = [];
30+
arr[1_000_000] = 'x';
31+
arr.foo = 'should be ignored';
32+
assert.equal(valid_array_indices(arr), ['1000000']);
33+
});
34+
35+
test('returns empty array when only non-numeric properties exist', () => {
36+
const arr = [];
37+
arr.foo = 'x';
38+
arr.bar = 42;
39+
assert.equal(valid_array_indices(arr), []);
40+
});
41+
42+
test('handles multiple non-numeric properties after indices', () => {
43+
const arr = [1, 2, 3];
44+
arr.a = 'x';
45+
arr.b = 'y';
46+
arr.c = 'z';
47+
assert.equal(valid_array_indices(arr), ['0', '1', '2']);
48+
});
49+
50+
test('handles a single-element array with non-numeric property', () => {
51+
const arr = ['only'];
52+
arr.extra = true;
53+
assert.equal(valid_array_indices(arr), ['0']);
54+
});
55+
56+
test('handles array properties pretending to be indices', () => {
57+
const arr = ['a', 'b'];
58+
arr[-1] = 'negative index';
59+
arr[2**32 - 1] = 'too large index';
60+
assert.equal(valid_array_indices(arr), ['0', '1']);
61+
})
62+
63+
test.run();

0 commit comments

Comments
 (0)