Skip to content

Commit 8c01615

Browse files
authored
marshal (#7467)
* CPython-compatible marshal format Unify marshal to a single CPython-compatible format. No separate "cpython_marshal" reader — one format for frozen modules, .pyc files, and the Python-level marshal module. - ComparisonOperator: `(cmp_index << 5) | mask` matching COMPARE_OP - MakeFunctionFlag: bit-position matching SET_FUNCTION_ATTRIBUTE - Exception table varint: big-endian (matching Python/assemble.c) - Linetable varint: little-endian (unchanged) - Integer: TYPE_INT (i32) / TYPE_LONG (base-2^15 digits) - Code objects: CPython field order (argcount, posonlyargcount, ..., co_localsplusnames, co_localspluskinds, ..., co_exceptiontable) - FLAG_REF / TYPE_REF for object deduplication (version >= 3) - allow_code keyword argument on dumps/loads/dump/load - Subclass rejection (int/float/complex/tuple/list/dict/set/frozenset) - Slice serialization (version >= 5) - Buffer protocol fallback for memoryview/array - Recursion depth limit (2000) for both reads and writes - Streaming load (reads one object, seeks file position) - TYPE_INT64, TYPE_FLOAT (text), TYPE_COMPLEX (text) for compat serialize_code writes co_localsplusnames/co_localspluskinds from split varnames/cellvars/freevars. deserialize_code splits them back. Cell variable DEREF indices are translated between flat (wire) and cell-relative (internal) representations in both directions. Replace bitwise trick with match for new ComparisonOperator values. 21 -> 3 expected failures. Remaining: test_bad_reader (IO layer), test_deterministic_sets (PYTHONHASHSEED), testIntern (string interning). * Address code review: preserve CO_FAST_HIDDEN, fix varint overflow - Use original localspluskinds from marshal data instead of rebuilding, preserving CO_FAST_HIDDEN and other flags - Fix write_varint_be to handle values >= 2^30 (add 6th chunk) - Remove unused build_localspluskinds_from_split * Add depth guard to deserialize_value_typed Prevents usize underflow when dict key deserialization path calls deserialize_value_typed with depth=0 on composite types.
1 parent 907ce4d commit 8c01615

File tree

8 files changed

+1274
-357
lines changed

8 files changed

+1274
-357
lines changed

Lib/test/test_marshal.py

Lines changed: 1 addition & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,6 @@ def test_ints(self):
4949
self.helper(expected)
5050
n = n >> 1
5151

52-
@unittest.expectedFailure # TODO: RUSTPYTHON
5352
def test_int64(self):
5453
# Simulate int marshaling with TYPE_INT64.
5554
maxint64 = (1 << 63) - 1
@@ -141,7 +140,6 @@ def test_different_filenames(self):
141140
self.assertEqual(co1.co_filename, "f1")
142141
self.assertEqual(co2.co_filename, "f2")
143142

144-
@unittest.expectedFailure # TODO: RUSTPYTHON; TypeError: Unexpected keyword argument allow_code
145143
def test_no_allow_code(self):
146144
data = {'a': [({0},)]}
147145
dump = marshal.dumps(data, allow_code=False)
@@ -234,14 +232,12 @@ def test_bytearray(self):
234232
new = marshal.loads(marshal.dumps(b))
235233
self.assertEqual(type(new), bytes)
236234

237-
@unittest.expectedFailure # TODO: RUSTPYTHON
238235
def test_memoryview(self):
239236
b = memoryview(b"abc")
240237
self.helper(b)
241238
new = marshal.loads(marshal.dumps(b))
242239
self.assertEqual(type(new), bytes)
243240

244-
@unittest.expectedFailure # TODO: RUSTPYTHON
245241
def test_array(self):
246242
a = array.array('B', b"abc")
247243
new = marshal.loads(marshal.dumps(a))
@@ -274,7 +270,6 @@ def test_fuzz(self):
274270
except Exception:
275271
pass
276272

277-
@unittest.expectedFailure # TODO: RUSTPYTHON
278273
def test_loads_recursion(self):
279274
def run_tests(N, check):
280275
# (((...None...),),)
@@ -295,7 +290,7 @@ def check(s):
295290
run_tests(2**20, check)
296291

297292
@unittest.skipIf(support.is_android, "TODO: RUSTPYTHON; segfault")
298-
@unittest.expectedFailure # TODO: RUSTPYTHON; segfault
293+
@unittest.skipIf(os.name == 'nt', "TODO: RUSTPYTHON; write depth limit is 2000 not 1000")
299294
def test_recursion_limit(self):
300295
# Create a deeply nested structure.
301296
head = last = []
@@ -324,7 +319,6 @@ def test_recursion_limit(self):
324319
last.append([0])
325320
self.assertRaises(ValueError, marshal.dumps, head)
326321

327-
@unittest.expectedFailure # TODO: RUSTPYTHON
328322
def test_exact_type_match(self):
329323
# Former bug:
330324
# >>> class Int(int): pass
@@ -348,7 +342,6 @@ def test_invalid_longs(self):
348342
invalid_string = b'l\x02\x00\x00\x00\x00\x00\x00\x00'
349343
self.assertRaises(ValueError, marshal.loads, invalid_string)
350344

351-
@unittest.expectedFailure # TODO: RUSTPYTHON
352345
def test_multiple_dumps_and_loads(self):
353346
# Issue 12291: marshal.load() should be callable multiple times
354347
# with interleaved data written by non-marshal code
@@ -532,66 +525,56 @@ def helper3(self, rsample, recursive=False, simple=False):
532525
else:
533526
self.assertGreaterEqual(len(s2), len(s3))
534527

535-
@unittest.expectedFailure # TODO: RUSTPYTHON
536528
def testInt(self):
537529
intobj = 123321
538530
self.helper(intobj)
539531
self.helper3(intobj, simple=True)
540532

541-
@unittest.expectedFailure # TODO: RUSTPYTHON
542533
def testFloat(self):
543534
floatobj = 1.2345
544535
self.helper(floatobj)
545536
self.helper3(floatobj)
546537

547-
@unittest.expectedFailure # TODO: RUSTPYTHON
548538
def testStr(self):
549539
strobj = "abcde"*3
550540
self.helper(strobj)
551541
self.helper3(strobj)
552542

553-
@unittest.expectedFailure # TODO: RUSTPYTHON
554543
def testBytes(self):
555544
bytesobj = b"abcde"*3
556545
self.helper(bytesobj)
557546
self.helper3(bytesobj)
558547

559-
@unittest.expectedFailure # TODO: RUSTPYTHON
560548
def testList(self):
561549
for obj in self.keys:
562550
listobj = [obj, obj]
563551
self.helper(listobj)
564552
self.helper3(listobj)
565553

566-
@unittest.expectedFailure # TODO: RUSTPYTHON
567554
def testTuple(self):
568555
for obj in self.keys:
569556
tupleobj = (obj, obj)
570557
self.helper(tupleobj)
571558
self.helper3(tupleobj)
572559

573-
@unittest.expectedFailure # TODO: RUSTPYTHON
574560
def testSet(self):
575561
for obj in self.keys:
576562
setobj = {(obj, 1), (obj, 2)}
577563
self.helper(setobj)
578564
self.helper3(setobj)
579565

580-
@unittest.expectedFailure # TODO: RUSTPYTHON
581566
def testFrozenSet(self):
582567
for obj in self.keys:
583568
frozensetobj = frozenset({(obj, 1), (obj, 2)})
584569
self.helper(frozensetobj)
585570
self.helper3(frozensetobj)
586571

587-
@unittest.expectedFailure # TODO: RUSTPYTHON
588572
def testDict(self):
589573
for obj in self.keys:
590574
dictobj = {"hello": obj, "goodbye": obj, obj: "hello"}
591575
self.helper(dictobj)
592576
self.helper3(dictobj)
593577

594-
@unittest.expectedFailure # TODO: RUSTPYTHON
595578
def testModule(self):
596579
with open(__file__, "rb") as f:
597580
code = f.read()
@@ -651,7 +634,6 @@ def testNoIntern(self):
651634
self.assertNotEqual(id(s2), id(s))
652635

653636
class SliceTestCase(unittest.TestCase, HelperMixin):
654-
@unittest.expectedFailure # TODO: RUSTPYTHON; NotImplementedError: TODO: not implemented yet or marshal unsupported type
655637
def test_slice(self):
656638
for obj in (
657639
slice(None), slice(1), slice(1, 2), slice(1, 2, 3),

crates/compiler-core/src/bytecode.rs

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
44
use crate::{
55
marshal::MarshalError,
6-
varint::{read_varint, read_varint_with_start, write_varint, write_varint_with_start},
6+
varint::{read_varint, read_varint_with_start, write_varint_be, write_varint_with_start},
77
{OneIndexed, SourceLocation},
88
};
99
use alloc::{borrow::ToOwned, boxed::Box, collections::BTreeSet, fmt, string::String, vec::Vec};
@@ -71,9 +71,9 @@ pub fn encode_exception_table(entries: &[ExceptionTableEntry]) -> alloc::boxed::
7171
let depth_lasti = ((entry.depth as u32) << 1) | (entry.push_lasti as u32);
7272

7373
write_varint_with_start(&mut data, entry.start);
74-
write_varint(&mut data, size);
75-
write_varint(&mut data, entry.target);
76-
write_varint(&mut data, depth_lasti);
74+
write_varint_be(&mut data, size);
75+
write_varint_be(&mut data, entry.target);
76+
write_varint_be(&mut data, depth_lasti);
7777
}
7878
data.into_boxed_slice()
7979
}
@@ -204,7 +204,7 @@ impl PyCodeLocationInfoKind {
204204
}
205205
}
206206

207-
pub trait Constant: Sized {
207+
pub trait Constant: Sized + Clone {
208208
type Name: AsRef<str>;
209209

210210
/// Transforms the given Constant to a BorrowedConstant
@@ -567,6 +567,14 @@ impl Deref for CodeUnits {
567567
}
568568

569569
impl CodeUnits {
570+
/// Disable adaptive specialization by setting all counters to unreachable.
571+
/// Used for CPython-compiled bytecode where specialization may not be safe.
572+
pub fn disable_specialization(&self) {
573+
for counter in self.adaptive_counters.iter() {
574+
counter.store(UNREACHABLE_BACKOFF, Ordering::Relaxed);
575+
}
576+
}
577+
570578
/// Replace the opcode at `index` in-place without changing the arg byte.
571579
/// Uses atomic Release store to ensure prior cache writes are visible
572580
/// to threads that subsequently read the new opcode with Acquire.

crates/compiler-core/src/bytecode/oparg.rs

Lines changed: 60 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -382,6 +382,10 @@ oparg_enum!(
382382
);
383383

384384
bitflagset::bitflag! {
385+
/// `SET_FUNCTION_ATTRIBUTE` flags.
386+
/// Bitmask: Defaults=0x01, KwOnly=0x02, Annotations=0x04,
387+
/// Closure=0x08, TypeParams=0x10, Annotate=0x20.
388+
/// Stored as bit position (0-5) by `bitflag!` macro.
385389
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
386390
#[repr(u8)]
387391
pub enum MakeFunctionFlag {
@@ -426,20 +430,63 @@ impl From<MakeFunctionFlag> for u32 {
426430

427431
impl OpArgType for MakeFunctionFlag {}
428432

429-
oparg_enum!(
430-
/// The possible comparison operators.
431-
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
432-
pub enum ComparisonOperator {
433-
// be intentional with bits so that we can do eval_ord with just a bitwise and
434-
// bits: | Equal | Greater | Less |
435-
Less = 0b001,
436-
Greater = 0b010,
437-
NotEqual = 0b011,
438-
Equal = 0b100,
439-
LessOrEqual = 0b101,
440-
GreaterOrEqual = 0b110,
433+
/// `COMPARE_OP` arg is `(cmp_index << 5) | mask`. Only the upper
434+
/// 3 bits identify the comparison; the lower 5 bits are an inline
435+
/// cache mask for adaptive specialization.
436+
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
437+
pub enum ComparisonOperator {
438+
Less,
439+
LessOrEqual,
440+
Equal,
441+
NotEqual,
442+
Greater,
443+
GreaterOrEqual,
444+
}
445+
446+
impl TryFrom<u8> for ComparisonOperator {
447+
type Error = MarshalError;
448+
fn try_from(value: u8) -> Result<Self, Self::Error> {
449+
Self::try_from(value as u32)
441450
}
442-
);
451+
}
452+
453+
impl TryFrom<u32> for ComparisonOperator {
454+
type Error = MarshalError;
455+
/// Decode from `COMPARE_OP` arg: `(cmp_index << 5) | mask`.
456+
fn try_from(value: u32) -> Result<Self, Self::Error> {
457+
match value >> 5 {
458+
0 => Ok(Self::Less),
459+
1 => Ok(Self::LessOrEqual),
460+
2 => Ok(Self::Equal),
461+
3 => Ok(Self::NotEqual),
462+
4 => Ok(Self::Greater),
463+
5 => Ok(Self::GreaterOrEqual),
464+
_ => Err(MarshalError::InvalidBytecode),
465+
}
466+
}
467+
}
468+
469+
impl From<ComparisonOperator> for u8 {
470+
/// Encode as `cmp_index << 5` (mask bits zero).
471+
fn from(value: ComparisonOperator) -> Self {
472+
match value {
473+
ComparisonOperator::Less => 0,
474+
ComparisonOperator::LessOrEqual => 1 << 5,
475+
ComparisonOperator::Equal => 2 << 5,
476+
ComparisonOperator::NotEqual => 3 << 5,
477+
ComparisonOperator::Greater => 4 << 5,
478+
ComparisonOperator::GreaterOrEqual => 5 << 5,
479+
}
480+
}
481+
}
482+
483+
impl From<ComparisonOperator> for u32 {
484+
fn from(value: ComparisonOperator) -> Self {
485+
Self::from(u8::from(value))
486+
}
487+
}
488+
489+
impl OpArgType for ComparisonOperator {}
443490

444491
oparg_enum!(
445492
/// The possible Binary operators

0 commit comments

Comments
 (0)