Skip to content

Commit 375b547

Browse files
authored
Fix thread-safety in GC, type cache, and instruction cache (#7355)
* Fix thread-safety in GC, type cache, and instruction cache GC / refcount: - Add safe_inc() check for strong()==0 in RefCount - Add try_to_owned() to PyObject for atomic refcount acquire - Replace strong_count()+to_owned() with try_to_owned() in GC collection and weakref callback paths to prevent TOCTOU races Type cache: - Add proper SeqLock (sequence counter) to TypeCacheEntry - Readers spin-wait on odd sequence, validate after read - Writers bracket updates with begin_write/end_write - Use try_to_owned + pointer revalidation on read path - Call modified() BEFORE attribute modification in set_attr Instruction cache: - Add pointer_cache (AtomicUsize array) to CodeUnits for single atomic pointer load/store (prevents torn reads) - Add try_read_cached_descriptor with try_to_owned + pointer and version revalidation after increment - Add write_cached_descriptor with version-bracketed writes RLock: - Fix release() to check is_owned_by_current_thread - Add _release_save/_acquire_restore methods * Fix RLock _acquire_restore tuple handling and unxfail threading test * Align type cache seqlock writer protocol with CPython * RLock: use single parking_lot level, track recursion manually Instead of calling lock()/unlock() N times for recursion depth N, keep parking_lot at 1 level and manage the count ourselves. This makes acquire/release O(1) and matches CPython's _PyRecursiveMutex approach (lock once + set level directly). * Add try_to_owned_from_ptr to avoid &PyObject on stale ptrs Use addr_of! to access ref_count directly from a raw pointer without forming &PyObject first. Applied in type cache and instruction cache hit paths where the pointer may be stale. * Fix CI: spelling typo and xfail flaky test_thread_safety - Fix "minimising" -> "minimizing" for cspell - xfail test_thread_safety: dict iteration races with concurrent GC mutations in _finalizer_registry
1 parent 86134e1 commit 375b547

File tree

10 files changed

+374
-181
lines changed

10 files changed

+374
-181
lines changed

Lib/test/_test_multiprocessing.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4815,9 +4815,9 @@ def test_finalize(self):
48154815
result = [obj for obj in iter(conn.recv, 'STOP')]
48164816
self.assertEqual(result, ['a', 'b', 'd10', 'd03', 'd02', 'd01', 'e'])
48174817

4818-
# TODO: RUSTPYTHON - gc.get_threshold() and gc.set_threshold() not implemented
4819-
@unittest.expectedFailure
48204818
@support.requires_resource('cpu')
4819+
# TODO: RUSTPYTHON; dict iteration races with concurrent GC mutations
4820+
@unittest.expectedFailure
48214821
def test_thread_safety(self):
48224822
# bpo-24484: _run_finalizers() should be thread-safe
48234823
def cb():

Lib/test/test_threading.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2141,10 +2141,6 @@ def __init__(self, a, *, b) -> None:
21412141
CustomRLock(1, b=2)
21422142
self.assertEqual(warnings_log, [])
21432143

2144-
@unittest.expectedFailure # TODO: RUSTPYTHON
2145-
def test_release_save_unacquired(self):
2146-
return super().test_release_save_unacquired()
2147-
21482144
@unittest.skip('TODO: RUSTPYTHON; flaky test')
21492145
def test_different_thread(self):
21502146
return super().test_different_thread()

crates/common/src/refcount.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ impl RefCount {
123123
pub fn safe_inc(&self) -> bool {
124124
let mut old = State::from_raw(self.state.load(Ordering::Relaxed));
125125
loop {
126-
if old.destructed() {
126+
if old.destructed() || old.strong() == 0 {
127127
return false;
128128
}
129129
if (old.strong() as usize) >= STRONG {

crates/compiler-core/src/bytecode.rs

Lines changed: 35 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ use core::{
1212
cell::UnsafeCell,
1313
hash, mem,
1414
ops::Deref,
15-
sync::atomic::{AtomicU8, AtomicU16, Ordering},
15+
sync::atomic::{AtomicU8, AtomicU16, AtomicUsize, Ordering},
1616
};
1717
use itertools::Itertools;
1818
use malachite_bigint::BigInt;
@@ -411,6 +411,10 @@ impl TryFrom<&[u8]> for CodeUnit {
411411
pub struct CodeUnits {
412412
units: UnsafeCell<Box<[CodeUnit]>>,
413413
adaptive_counters: Box<[AtomicU16]>,
414+
/// Pointer-sized cache entries for descriptor pointers.
415+
/// Single atomic load/store prevents torn reads when multiple threads
416+
/// specialize the same instruction concurrently.
417+
pointer_cache: Box<[AtomicUsize]>,
414418
}
415419

416420
// SAFETY: All cache operations use atomic read/write instructions.
@@ -432,9 +436,15 @@ impl Clone for CodeUnits {
432436
.iter()
433437
.map(|c| AtomicU16::new(c.load(Ordering::Relaxed)))
434438
.collect();
439+
let pointer_cache = self
440+
.pointer_cache
441+
.iter()
442+
.map(|c| AtomicUsize::new(c.load(Ordering::Relaxed)))
443+
.collect();
435444
Self {
436445
units: UnsafeCell::new(units),
437446
adaptive_counters,
447+
pointer_cache,
438448
}
439449
}
440450
}
@@ -472,13 +482,19 @@ impl<const N: usize> From<[CodeUnit; N]> for CodeUnits {
472482
impl From<Vec<CodeUnit>> for CodeUnits {
473483
fn from(value: Vec<CodeUnit>) -> Self {
474484
let units = value.into_boxed_slice();
475-
let adaptive_counters = (0..units.len())
485+
let len = units.len();
486+
let adaptive_counters = (0..len)
476487
.map(|_| AtomicU16::new(0))
477488
.collect::<Vec<_>>()
478489
.into_boxed_slice();
490+
let pointer_cache = (0..len)
491+
.map(|_| AtomicUsize::new(0))
492+
.collect::<Vec<_>>()
493+
.into_boxed_slice();
479494
Self {
480495
units: UnsafeCell::new(units),
481496
adaptive_counters,
497+
pointer_cache,
482498
}
483499
}
484500
}
@@ -600,25 +616,29 @@ impl CodeUnits {
600616
lo | (hi << 16)
601617
}
602618

603-
/// Write a u64 value across four consecutive CACHE code units starting at `index`.
619+
/// Store a pointer-sized value atomically in the pointer cache at `index`.
620+
///
621+
/// Uses a single `AtomicUsize` store to prevent torn writes when
622+
/// multiple threads specialize the same instruction concurrently.
604623
///
605624
/// # Safety
606-
/// Same requirements as `write_cache_u16`.
607-
pub unsafe fn write_cache_u64(&self, index: usize, value: u64) {
608-
unsafe {
609-
self.write_cache_u32(index, value as u32);
610-
self.write_cache_u32(index + 2, (value >> 32) as u32);
611-
}
625+
/// - `index` must be in bounds.
626+
/// - `value` must be `0` or a valid `*const PyObject` encoded as `usize`.
627+
/// - Callers must follow the cache invalidation/upgrade protocol:
628+
/// invalidate the version guard before writing and publish the new
629+
/// version after writing.
630+
pub unsafe fn write_cache_ptr(&self, index: usize, value: usize) {
631+
self.pointer_cache[index].store(value, Ordering::Relaxed);
612632
}
613633

614-
/// Read a u64 value from four consecutive CACHE code units starting at `index`.
634+
/// Load a pointer-sized value atomically from the pointer cache at `index`.
635+
///
636+
/// Uses a single `AtomicUsize` load to prevent torn reads.
615637
///
616638
/// # Panics
617-
/// Panics if `index + 3` is out of bounds.
618-
pub fn read_cache_u64(&self, index: usize) -> u64 {
619-
let lo = self.read_cache_u32(index) as u64;
620-
let hi = self.read_cache_u32(index + 2) as u64;
621-
lo | (hi << 32)
639+
/// Panics if `index` is out of bounds.
640+
pub fn read_cache_ptr(&self, index: usize) -> usize {
641+
self.pointer_cache[index].load(Ordering::Relaxed)
622642
}
623643

624644
/// Read adaptive counter bits for instruction at `index`.

0 commit comments

Comments
 (0)