Skip to content

Commit dcbc37c

Browse files
committed
Add type attribute cache (MCACHE) for MRO lookups
4096-entry direct-mapped cache keyed by (tp_version_tag, interned_name_ptr) using lock-free SeqLock pattern for concurrent access. - find_name_in_mro() checks cache before MRO walk - has_name_in_mro() avoids cloning on cache hit - Auto-assigns version tags on cache miss - Invalidation via modified() (tp_version_tag=0) with value drops - TYPE_CACHE_CLEARING flag suppresses re-population during GC drops - Clear type cache during GC collection to break reference cycles
1 parent 257b0c0 commit dcbc37c

File tree

3 files changed

+258
-24
lines changed

3 files changed

+258
-24
lines changed

.cspell.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@
7777
"kwonly",
7878
"lossily",
7979
"makeunicodedata",
80+
"mcache",
8081
"microbenchmark",
8182
"microbenchmarks",
8283
"miri",

crates/vm/src/builtins/type.rs

Lines changed: 253 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ use core::{
3434
ops::Deref,
3535
pin::Pin,
3636
ptr::NonNull,
37-
sync::atomic::{AtomicU32, Ordering},
37+
sync::atomic::{AtomicBool, AtomicPtr, AtomicU32, Ordering},
3838
};
3939
use indexmap::{IndexMap, map::Entry};
4040
use itertools::Itertools;
@@ -61,6 +61,113 @@ pub struct PyType {
6161
/// generic path).
6262
static NEXT_TYPE_VERSION: AtomicU32 = AtomicU32::new(1);
6363

64+
// Type attribute cache (MCACHE): direct-mapped cache keyed by
65+
// (tp_version_tag, interned_name_ptr).
66+
//
67+
// Uses a lock-free SeqLock pattern:
68+
// - version acts as both cache key AND sequence counter
69+
// - Read: load version (Acquire), read value ptr, re-check version
70+
// - Write: set version=0 (invalidate), store value, store version (Release)
71+
// No mutex needed on the hot path (cache hit).
72+
73+
const TYPE_CACHE_SIZE_EXP: u32 = 12;
74+
const TYPE_CACHE_SIZE: usize = 1 << TYPE_CACHE_SIZE_EXP;
75+
const TYPE_CACHE_MASK: usize = TYPE_CACHE_SIZE - 1;
76+
77+
struct TypeCacheEntry {
78+
/// tp_version_tag at cache time. 0 = empty/invalid.
79+
version: AtomicU32,
80+
/// Interned attribute name pointer (pointer equality check).
81+
name: AtomicPtr<PyStrInterned>,
82+
/// Cached lookup result as raw pointer. null = empty.
83+
/// The cache holds a strong reference (refcount incremented).
84+
value: AtomicPtr<PyObject>,
85+
}
86+
87+
// SAFETY: TypeCacheEntry is thread-safe:
88+
// - All fields use atomic operations
89+
// - Value pointer is valid as long as version matches (SeqLock pattern)
90+
// - PyObjectRef uses atomic reference counting
91+
unsafe impl Send for TypeCacheEntry {}
92+
unsafe impl Sync for TypeCacheEntry {}
93+
94+
impl TypeCacheEntry {
95+
fn new() -> Self {
96+
Self {
97+
version: AtomicU32::new(0),
98+
name: AtomicPtr::new(core::ptr::null_mut()),
99+
value: AtomicPtr::new(core::ptr::null_mut()),
100+
}
101+
}
102+
103+
/// Take the value out of this entry, returning the owned PyObjectRef.
104+
/// Caller must ensure no concurrent reads can observe this entry
105+
/// (version should be set to 0 first).
106+
fn take_value(&self) -> Option<PyObjectRef> {
107+
let ptr = self.value.swap(core::ptr::null_mut(), Ordering::Relaxed);
108+
// SAFETY: non-null ptr was stored via PyObjectRef::into_raw
109+
NonNull::new(ptr).map(|nn| unsafe { PyObjectRef::from_raw(nn) })
110+
}
111+
}
112+
113+
// std::sync::LazyLock is used here (not crate::common::lock::LazyLock)
114+
// because TYPE_CACHE is a global shared across test threads. The common
115+
// LazyLock delegates to LazyCell in non-threading mode, which is !Sync.
116+
static TYPE_CACHE: std::sync::LazyLock<Box<[TypeCacheEntry]>> = std::sync::LazyLock::new(|| {
117+
(0..TYPE_CACHE_SIZE)
118+
.map(|_| TypeCacheEntry::new())
119+
.collect::<Vec<_>>()
120+
.into_boxed_slice()
121+
});
122+
123+
/// When true, find_name_in_mro skips populating the cache.
124+
/// Set during GC's type_cache_clear to prevent re-population from drops.
125+
static TYPE_CACHE_CLEARING: AtomicBool = AtomicBool::new(false);
126+
127+
/// MCACHE_HASH: XOR of version and name pointer hash, masked to cache size.
128+
#[inline]
129+
fn type_cache_hash(version: u32, name: &'static PyStrInterned) -> usize {
130+
let name_hash = (name as *const PyStrInterned as usize >> 3) as u32;
131+
((version ^ name_hash) as usize) & TYPE_CACHE_MASK
132+
}
133+
134+
/// Invalidate cache entries for a specific version tag and release values.
135+
/// Called from modified() when a type is changed.
136+
fn type_cache_clear_version(version: u32) {
137+
let mut to_drop = Vec::new();
138+
for entry in TYPE_CACHE.iter() {
139+
if entry.version.load(Ordering::Relaxed) == version {
140+
entry.version.store(0, Ordering::Release);
141+
if let Some(v) = entry.take_value() {
142+
to_drop.push(v);
143+
}
144+
}
145+
}
146+
drop(to_drop);
147+
}
148+
149+
/// Clear all type cache entries (_PyType_ClearCache).
150+
/// Called during GC collection to release strong references that might
151+
/// prevent cycle collection.
152+
///
153+
/// Sets TYPE_CACHE_CLEARING to suppress cache re-population from any
154+
/// Python code triggered by dropping the cached values.
155+
pub fn type_cache_clear() {
156+
// Phase 1: Invalidate all entries and collect values.
157+
let mut to_drop = Vec::new();
158+
for entry in TYPE_CACHE.iter() {
159+
entry.version.store(0, Ordering::Release);
160+
if let Some(v) = entry.take_value() {
161+
to_drop.push(v);
162+
}
163+
}
164+
// Phase 2: Drop values. Suppress cache population so that any Python
165+
// code triggered by these drops doesn't re-fill the cache.
166+
TYPE_CACHE_CLEARING.store(true, Ordering::Release);
167+
drop(to_drop);
168+
TYPE_CACHE_CLEARING.store(false, Ordering::Release);
169+
}
170+
64171
unsafe impl crate::object::Traverse for PyType {
65172
fn traverse(&self, tracer_fn: &mut crate::object::TraverseFn<'_>) {
66173
self.base.traverse(tracer_fn);
@@ -206,6 +313,18 @@ impl PyType {
206313
/// Assign a fresh version tag. Returns 0 if the version counter has been
207314
/// exhausted, in which case no new cache entries can be created.
208315
pub fn assign_version_tag(&self) -> u32 {
316+
let v = self.tp_version_tag.load(Ordering::Acquire);
317+
if v != 0 {
318+
return v;
319+
}
320+
321+
// Assign versions to all direct bases first (MRO invariant).
322+
for base in self.bases.read().iter() {
323+
if base.assign_version_tag() == 0 {
324+
return 0;
325+
}
326+
}
327+
209328
loop {
210329
let current = NEXT_TYPE_VERSION.load(Ordering::Relaxed);
211330
let Some(next) = current.checked_add(1) else {
@@ -223,7 +342,17 @@ impl PyType {
223342

224343
/// Invalidate this type's version tag and cascade to all subclasses.
225344
pub fn modified(&self) {
345+
// If already invalidated, all subclasses must also be invalidated
346+
// (guaranteed by the MRO invariant in assign_version_tag).
347+
let old_version = self.tp_version_tag.load(Ordering::Acquire);
348+
if old_version == 0 {
349+
return;
350+
}
226351
self.tp_version_tag.store(0, Ordering::Release);
352+
// Release strong references held by cache entries for this version.
353+
// We hold owned refs that would prevent GC of class attributes after
354+
// type deletion.
355+
type_cache_clear_version(old_version);
227356
let subclasses = self.subclasses.read();
228357
for weak_ref in subclasses.iter() {
229358
if let Some(sub) = weak_ref.upgrade() {
@@ -574,24 +703,104 @@ impl PyType {
574703

575704
pub fn set_attr(&self, attr_name: &'static PyStrInterned, value: PyObjectRef) {
576705
self.attributes.write().insert(attr_name, value);
706+
self.modified();
577707
}
578708

579-
/// This is the internal get_attr implementation for fast lookup on a class.
709+
/// Internal get_attr implementation for fast lookup on a class.
710+
/// Searches the full MRO (including self) with type cache acceleration.
580711
pub fn get_attr(&self, attr_name: &'static PyStrInterned) -> Option<PyObjectRef> {
581-
flame_guard!(format!("class_get_attr({:?})", attr_name));
582-
583-
self.get_direct_attr(attr_name)
584-
.or_else(|| self.get_super_attr(attr_name))
712+
self.find_name_in_mro(attr_name)
585713
}
586714

587715
pub fn get_direct_attr(&self, attr_name: &'static PyStrInterned) -> Option<PyObjectRef> {
588716
self.attributes.read().get(attr_name).cloned()
589717
}
590718

591-
/// Equivalent to CPython's find_name_in_mro
592-
/// Look in tp_dict of types in MRO - bypasses descriptors and other attribute access machinery
719+
/// find_name_in_mro with type cache (MCACHE).
720+
/// Looks in tp_dict of types in MRO, bypasses descriptors.
721+
///
722+
/// Uses a lock-free SeqLock pattern keyed by version:
723+
/// Read: load version → check name → load value → clone → re-check version
724+
/// Write: version=0 → swap value → set name → version=assigned
593725
fn find_name_in_mro(&self, name: &'static PyStrInterned) -> Option<PyObjectRef> {
594-
// mro[0] is self, so we just iterate through the entire MRO
726+
let version = self.tp_version_tag.load(Ordering::Acquire);
727+
if version != 0 {
728+
let idx = type_cache_hash(version, name);
729+
let entry = &TYPE_CACHE[idx];
730+
let v1 = entry.version.load(Ordering::Acquire);
731+
if v1 == version
732+
&& core::ptr::eq(
733+
entry.name.load(Ordering::Relaxed),
734+
name as *const _ as *mut _,
735+
)
736+
{
737+
let ptr = entry.value.load(Ordering::Acquire);
738+
if !ptr.is_null() {
739+
// SAFETY: The value pointer was stored via PyObjectRef::into_raw
740+
// and is valid as long as the version hasn't changed. We create
741+
// a temporary reference (ManuallyDrop prevents decrement), clone
742+
// it to get our own strong reference, then re-check the version
743+
// to confirm the entry wasn't invalidated during our read.
744+
let cloned = unsafe {
745+
let tmp = core::mem::ManuallyDrop::new(PyObjectRef::from_raw(
746+
NonNull::new_unchecked(ptr),
747+
));
748+
(*tmp).clone()
749+
};
750+
// SeqLock validation: if version changed, discard our clone
751+
let v2 = entry.version.load(Ordering::Acquire);
752+
if v2 == v1 {
753+
return Some(cloned);
754+
}
755+
drop(cloned);
756+
}
757+
}
758+
}
759+
760+
// Assign version BEFORE the MRO walk so that any concurrent
761+
// modified() call during the walk invalidates this version.
762+
let assigned = if version == 0 {
763+
self.assign_version_tag()
764+
} else {
765+
version
766+
};
767+
768+
// MRO walk
769+
let result = self.find_name_in_mro_uncached(name);
770+
771+
// Only cache positive results. Negative results are not cached to
772+
// avoid stale entries from transient MRO walk failures during
773+
// concurrent type modifications.
774+
if let Some(ref found) = result
775+
&& assigned != 0
776+
&& !TYPE_CACHE_CLEARING.load(Ordering::Acquire)
777+
&& self.tp_version_tag.load(Ordering::Acquire) == assigned
778+
{
779+
let idx = type_cache_hash(assigned, name);
780+
let entry = &TYPE_CACHE[idx];
781+
// Invalidate first to prevent readers from seeing partial state
782+
entry.version.store(0, Ordering::Release);
783+
// Swap in new value (refcount held by cache)
784+
let new_ptr = found.clone().into_raw().as_ptr();
785+
let old_ptr = entry.value.swap(new_ptr, Ordering::Relaxed);
786+
entry
787+
.name
788+
.store(name as *const _ as *mut _, Ordering::Relaxed);
789+
// Activate entry — Release ensures value/name writes are visible
790+
entry.version.store(assigned, Ordering::Release);
791+
// Drop previous occupant (its version was already invalidated)
792+
if !old_ptr.is_null() {
793+
unsafe {
794+
drop(PyObjectRef::from_raw(NonNull::new_unchecked(old_ptr)));
795+
}
796+
}
797+
}
798+
799+
result
800+
}
801+
802+
/// Raw MRO walk without cache.
803+
fn find_name_in_mro_uncached(&self, name: &'static PyStrInterned) -> Option<PyObjectRef> {
595804
for cls in self.mro.read().iter() {
596805
if let Some(value) = cls.attributes.read().get(name) {
597806
return Some(value.clone());
@@ -600,14 +809,9 @@ impl PyType {
600809
None
601810
}
602811

603-
/// Equivalent to CPython's _PyType_LookupRef
604-
/// Looks up a name through the MRO without setting an exception
812+
/// _PyType_LookupRef: look up a name through the MRO without setting an exception.
605813
pub fn lookup_ref(&self, name: &Py<PyStr>, vm: &VirtualMachine) -> Option<PyObjectRef> {
606-
// Get interned name for efficient lookup
607814
let interned_name = vm.ctx.interned_str(name)?;
608-
609-
// Use find_name_in_mro which matches CPython's behavior
610-
// This bypasses descriptors and other attribute access machinery
611815
self.find_name_in_mro(interned_name)
612816
}
613817

@@ -617,12 +821,37 @@ impl PyType {
617821
.find_map(|class| class.attributes.read().get(attr_name).cloned())
618822
}
619823

620-
// This is the internal has_attr implementation for fast lookup on a class.
824+
/// Fast lookup for attribute existence on a class.
621825
pub fn has_attr(&self, attr_name: &'static PyStrInterned) -> bool {
622-
self.attributes.read().contains_key(attr_name)
623-
|| self.mro.read()[1..]
624-
.iter()
625-
.any(|c| c.attributes.read().contains_key(attr_name))
826+
self.has_name_in_mro(attr_name)
827+
}
828+
829+
/// Check if attribute exists in MRO, using cache for fast check.
830+
/// Unlike find_name_in_mro, avoids cloning the value on cache hit.
831+
fn has_name_in_mro(&self, name: &'static PyStrInterned) -> bool {
832+
let version = self.tp_version_tag.load(Ordering::Acquire);
833+
if version != 0 {
834+
let idx = type_cache_hash(version, name);
835+
let entry = &TYPE_CACHE[idx];
836+
let v1 = entry.version.load(Ordering::Acquire);
837+
if v1 == version
838+
&& core::ptr::eq(
839+
entry.name.load(Ordering::Relaxed),
840+
name as *const _ as *mut _,
841+
)
842+
{
843+
let ptr = entry.value.load(Ordering::Acquire);
844+
if !ptr.is_null() {
845+
let v2 = entry.version.load(Ordering::Acquire);
846+
if v2 == v1 {
847+
return true;
848+
}
849+
}
850+
}
851+
}
852+
853+
// Cache miss — use find_name_in_mro which populates cache
854+
self.find_name_in_mro(name).is_some()
626855
}
627856

628857
pub fn get_attributes(&self) -> PyAttributes {
@@ -1270,8 +1499,8 @@ impl PyType {
12701499
PySetterValue::Assign(ref val) => {
12711500
let key = identifier!(vm, __type_params__);
12721501
self.check_set_special_type_attr(key, vm)?;
1273-
let mut attrs = self.attributes.write();
1274-
attrs.insert(key, val.clone().into());
1502+
self.attributes.write().insert(key, val.clone().into());
1503+
self.modified();
12751504
}
12761505
PySetterValue::Delete => {
12771506
// For delete, we still need to check if the type is immutable
@@ -1281,9 +1510,9 @@ impl PyType {
12811510
self.slot_name()
12821511
)));
12831512
}
1284-
let mut attrs = self.attributes.write();
12851513
let key = identifier!(vm, __type_params__);
1286-
attrs.shift_remove(&key);
1514+
self.attributes.write().shift_remove(&key);
1515+
self.modified();
12871516
}
12881517
}
12891518
Ok(())

crates/vm/src/gc_state.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -410,6 +410,10 @@ impl GcState {
410410
let generation = generation.min(2);
411411
let debug = self.get_debug();
412412

413+
// Clear the type attribute cache to release strong references that
414+
// might prevent cycle collection (_PyType_ClearCache).
415+
crate::builtins::type_::type_cache_clear();
416+
413417
// Step 1: Gather objects from generations 0..=generation
414418
// Hold read locks for the entire collection to prevent other threads
415419
// from untracking objects while we're iterating.

0 commit comments

Comments
 (0)