Skip to content

Commit 3a0ec60

Browse files
committed
Add method cache (MCACHE) for MRO lookups
4096-entry direct-mapped cache keyed by (tp_version_tag, interned_name_ptr) using lock-free SeqLock pattern for concurrent access. - find_name_in_mro() checks cache before MRO walk - has_name_in_mro() avoids cloning on cache hit - Auto-assigns version tags on cache miss - Invalidation via modified() (tp_version_tag=0) with value drops - TYPE_CACHE_CLEARING flag suppresses re-population during GC drops - Clear method cache during GC collection to break reference cycles
1 parent 62ca60a commit 3a0ec60

File tree

3 files changed

+257
-24
lines changed

3 files changed

+257
-24
lines changed

.cspell.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@
7777
"kwonly",
7878
"lossily",
7979
"makeunicodedata",
80+
"mcache",
8081
"microbenchmark",
8182
"microbenchmarks",
8283
"miri",

crates/vm/src/builtins/type.rs

Lines changed: 252 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ use core::{
3434
ops::Deref,
3535
pin::Pin,
3636
ptr::NonNull,
37-
sync::atomic::{AtomicU32, Ordering},
37+
sync::atomic::{AtomicBool, AtomicPtr, AtomicU32, Ordering},
3838
};
3939
use indexmap::{IndexMap, map::Entry};
4040
use itertools::Itertools;
@@ -61,6 +61,112 @@ pub struct PyType {
6161
/// generic path).
6262
static NEXT_TYPE_VERSION: AtomicU32 = AtomicU32::new(1);
6363

64+
// Method cache (type_cache / MCACHE): direct-mapped cache keyed by
65+
// (tp_version_tag, interned_name_ptr).
66+
//
67+
// Uses a lock-free SeqLock pattern:
68+
// - version acts as both cache key AND sequence counter
69+
// - Read: load version (Acquire), read value ptr, re-check version
70+
// - Write: set version=0 (invalidate), store value, store version (Release)
71+
// No mutex needed on the hot path (cache hit).
72+
73+
const TYPE_CACHE_SIZE_EXP: u32 = 12;
74+
const TYPE_CACHE_SIZE: usize = 1 << TYPE_CACHE_SIZE_EXP;
75+
const TYPE_CACHE_MASK: usize = TYPE_CACHE_SIZE - 1;
76+
77+
struct TypeCacheEntry {
78+
/// tp_version_tag at cache time. 0 = empty/invalid.
79+
version: AtomicU32,
80+
/// Interned attribute name pointer (pointer equality check).
81+
name: AtomicPtr<PyStrInterned>,
82+
/// Cached lookup result as raw pointer. null = empty.
83+
/// The cache holds a strong reference (refcount incremented).
84+
value: AtomicPtr<PyObject>,
85+
}
86+
87+
// SAFETY: TypeCacheEntry is thread-safe:
88+
// - All fields use atomic operations
89+
// - Value pointer is valid as long as version matches (SeqLock pattern)
90+
// - PyObjectRef uses atomic reference counting
91+
unsafe impl Send for TypeCacheEntry {}
92+
unsafe impl Sync for TypeCacheEntry {}
93+
94+
impl TypeCacheEntry {
95+
fn new() -> Self {
96+
Self {
97+
version: AtomicU32::new(0),
98+
name: AtomicPtr::new(core::ptr::null_mut()),
99+
value: AtomicPtr::new(core::ptr::null_mut()),
100+
}
101+
}
102+
103+
/// Take the value out of this entry, returning the owned PyObjectRef.
104+
/// Caller must ensure no concurrent reads can observe this entry
105+
/// (version should be set to 0 first).
106+
fn take_value(&self) -> Option<PyObjectRef> {
107+
let ptr = self.value.swap(core::ptr::null_mut(), Ordering::Relaxed);
108+
// SAFETY: non-null ptr was stored via PyObjectRef::into_raw
109+
NonNull::new(ptr).map(|nn| unsafe { PyObjectRef::from_raw(nn) })
110+
}
111+
}
112+
113+
// std::sync::LazyLock is used here (not crate::common::lock::LazyLock)
114+
// because TYPE_CACHE is a global shared across test threads. The common
115+
// LazyLock delegates to LazyCell in non-threading mode, which is !Sync.
116+
static TYPE_CACHE: std::sync::LazyLock<Box<[TypeCacheEntry]>> = std::sync::LazyLock::new(|| {
117+
(0..TYPE_CACHE_SIZE)
118+
.map(|_| TypeCacheEntry::new())
119+
.collect::<Vec<_>>()
120+
.into_boxed_slice()
121+
});
122+
123+
/// When true, find_name_in_mro skips populating the cache.
124+
/// Set during GC's type_cache_clear to prevent re-population from drops.
125+
static TYPE_CACHE_CLEARING: AtomicBool = AtomicBool::new(false);
126+
127+
/// MCACHE_HASH: XOR of version and name pointer hash, masked to cache size.
128+
#[inline]
129+
fn type_cache_hash(version: u32, name: &'static PyStrInterned) -> usize {
130+
let name_hash = (name as *const PyStrInterned as usize >> 3) as u32;
131+
((version ^ name_hash) as usize) & TYPE_CACHE_MASK
132+
}
133+
134+
/// Invalidate cache entries for a specific version tag and release values.
135+
/// Called from modified() when a type is changed.
136+
fn type_cache_clear_version(version: u32) {
137+
let mut to_drop = Vec::new();
138+
for entry in TYPE_CACHE.iter() {
139+
if entry.version.load(Ordering::Relaxed) == version {
140+
entry.version.store(0, Ordering::Release);
141+
if let Some(v) = entry.take_value() {
142+
to_drop.push(v);
143+
}
144+
}
145+
}
146+
drop(to_drop);
147+
}
148+
149+
/// Clear all method cache entries (_PyType_ClearCache).
150+
/// Called during GC collection to release strong references that might
151+
/// prevent cycle collection.
152+
///
153+
/// Sets TYPE_CACHE_CLEARING to suppress cache re-population during the
154+
/// entire operation, preventing concurrent lookups from repopulating
155+
/// entries while we're clearing them.
156+
pub fn type_cache_clear() {
157+
TYPE_CACHE_CLEARING.store(true, Ordering::Release);
158+
// Invalidate all entries and collect values.
159+
let mut to_drop = Vec::new();
160+
for entry in TYPE_CACHE.iter() {
161+
entry.version.store(0, Ordering::Release);
162+
if let Some(v) = entry.take_value() {
163+
to_drop.push(v);
164+
}
165+
}
166+
drop(to_drop);
167+
TYPE_CACHE_CLEARING.store(false, Ordering::Release);
168+
}
169+
64170
unsafe impl crate::object::Traverse for PyType {
65171
fn traverse(&self, tracer_fn: &mut crate::object::TraverseFn<'_>) {
66172
self.base.traverse(tracer_fn);
@@ -206,6 +312,18 @@ impl PyType {
206312
/// Assign a fresh version tag. Returns 0 if the version counter has been
207313
/// exhausted, in which case no new cache entries can be created.
208314
pub fn assign_version_tag(&self) -> u32 {
315+
let v = self.tp_version_tag.load(Ordering::Acquire);
316+
if v != 0 {
317+
return v;
318+
}
319+
320+
// Assign versions to all direct bases first (MRO invariant).
321+
for base in self.bases.read().iter() {
322+
if base.assign_version_tag() == 0 {
323+
return 0;
324+
}
325+
}
326+
209327
loop {
210328
let current = NEXT_TYPE_VERSION.load(Ordering::Relaxed);
211329
let Some(next) = current.checked_add(1) else {
@@ -223,7 +341,17 @@ impl PyType {
223341

224342
/// Invalidate this type's version tag and cascade to all subclasses.
225343
pub fn modified(&self) {
344+
// If already invalidated, all subclasses must also be invalidated
345+
// (guaranteed by the MRO invariant in assign_version_tag).
346+
let old_version = self.tp_version_tag.load(Ordering::Acquire);
347+
if old_version == 0 {
348+
return;
349+
}
226350
self.tp_version_tag.store(0, Ordering::Release);
351+
// Release strong references held by cache entries for this version.
352+
// We hold owned refs that would prevent GC of class attributes after
353+
// type deletion.
354+
type_cache_clear_version(old_version);
227355
let subclasses = self.subclasses.read();
228356
for weak_ref in subclasses.iter() {
229357
if let Some(sub) = weak_ref.upgrade() {
@@ -574,24 +702,104 @@ impl PyType {
574702

575703
pub fn set_attr(&self, attr_name: &'static PyStrInterned, value: PyObjectRef) {
576704
self.attributes.write().insert(attr_name, value);
705+
self.modified();
577706
}
578707

579-
/// This is the internal get_attr implementation for fast lookup on a class.
708+
/// Internal get_attr implementation for fast lookup on a class.
709+
/// Searches the full MRO (including self) with method cache acceleration.
580710
pub fn get_attr(&self, attr_name: &'static PyStrInterned) -> Option<PyObjectRef> {
581-
flame_guard!(format!("class_get_attr({:?})", attr_name));
582-
583-
self.get_direct_attr(attr_name)
584-
.or_else(|| self.get_super_attr(attr_name))
711+
self.find_name_in_mro(attr_name)
585712
}
586713

587714
pub fn get_direct_attr(&self, attr_name: &'static PyStrInterned) -> Option<PyObjectRef> {
588715
self.attributes.read().get(attr_name).cloned()
589716
}
590717

591-
/// Equivalent to CPython's find_name_in_mro
592-
/// Look in tp_dict of types in MRO - bypasses descriptors and other attribute access machinery
718+
/// find_name_in_mro with method cache (MCACHE).
719+
/// Looks in tp_dict of types in MRO, bypasses descriptors.
720+
///
721+
/// Uses a lock-free SeqLock pattern keyed by version:
722+
/// Read: load version → check name → load value → clone → re-check version
723+
/// Write: version=0 → swap value → set name → version=assigned
593724
fn find_name_in_mro(&self, name: &'static PyStrInterned) -> Option<PyObjectRef> {
594-
// mro[0] is self, so we just iterate through the entire MRO
725+
let version = self.tp_version_tag.load(Ordering::Acquire);
726+
if version != 0 {
727+
let idx = type_cache_hash(version, name);
728+
let entry = &TYPE_CACHE[idx];
729+
let v1 = entry.version.load(Ordering::Acquire);
730+
if v1 == version
731+
&& core::ptr::eq(
732+
entry.name.load(Ordering::Relaxed),
733+
name as *const _ as *mut _,
734+
)
735+
{
736+
let ptr = entry.value.load(Ordering::Acquire);
737+
if !ptr.is_null() {
738+
// SAFETY: The value pointer was stored via PyObjectRef::into_raw
739+
// and is valid as long as the version hasn't changed. We create
740+
// a temporary reference (ManuallyDrop prevents decrement), clone
741+
// it to get our own strong reference, then re-check the version
742+
// to confirm the entry wasn't invalidated during our read.
743+
let cloned = unsafe {
744+
let tmp = core::mem::ManuallyDrop::new(PyObjectRef::from_raw(
745+
NonNull::new_unchecked(ptr),
746+
));
747+
(*tmp).clone()
748+
};
749+
// SeqLock validation: if version changed, discard our clone
750+
let v2 = entry.version.load(Ordering::Acquire);
751+
if v2 == v1 {
752+
return Some(cloned);
753+
}
754+
drop(cloned);
755+
}
756+
}
757+
}
758+
759+
// Assign version BEFORE the MRO walk so that any concurrent
760+
// modified() call during the walk invalidates this version.
761+
let assigned = if version == 0 {
762+
self.assign_version_tag()
763+
} else {
764+
version
765+
};
766+
767+
// MRO walk
768+
let result = self.find_name_in_mro_uncached(name);
769+
770+
// Only cache positive results. Negative results are not cached to
771+
// avoid stale entries from transient MRO walk failures during
772+
// concurrent type modifications.
773+
if let Some(ref found) = result
774+
&& assigned != 0
775+
&& !TYPE_CACHE_CLEARING.load(Ordering::Acquire)
776+
&& self.tp_version_tag.load(Ordering::Acquire) == assigned
777+
{
778+
let idx = type_cache_hash(assigned, name);
779+
let entry = &TYPE_CACHE[idx];
780+
// Invalidate first to prevent readers from seeing partial state
781+
entry.version.store(0, Ordering::Release);
782+
// Swap in new value (refcount held by cache)
783+
let new_ptr = found.clone().into_raw().as_ptr();
784+
let old_ptr = entry.value.swap(new_ptr, Ordering::Relaxed);
785+
entry
786+
.name
787+
.store(name as *const _ as *mut _, Ordering::Relaxed);
788+
// Activate entry — Release ensures value/name writes are visible
789+
entry.version.store(assigned, Ordering::Release);
790+
// Drop previous occupant (its version was already invalidated)
791+
if !old_ptr.is_null() {
792+
unsafe {
793+
drop(PyObjectRef::from_raw(NonNull::new_unchecked(old_ptr)));
794+
}
795+
}
796+
}
797+
798+
result
799+
}
800+
801+
/// Raw MRO walk without cache.
802+
fn find_name_in_mro_uncached(&self, name: &'static PyStrInterned) -> Option<PyObjectRef> {
595803
for cls in self.mro.read().iter() {
596804
if let Some(value) = cls.attributes.read().get(name) {
597805
return Some(value.clone());
@@ -600,14 +808,9 @@ impl PyType {
600808
None
601809
}
602810

603-
/// Equivalent to CPython's _PyType_LookupRef
604-
/// Looks up a name through the MRO without setting an exception
811+
/// _PyType_LookupRef: look up a name through the MRO without setting an exception.
605812
pub fn lookup_ref(&self, name: &Py<PyStr>, vm: &VirtualMachine) -> Option<PyObjectRef> {
606-
// Get interned name for efficient lookup
607813
let interned_name = vm.ctx.interned_str(name)?;
608-
609-
// Use find_name_in_mro which matches CPython's behavior
610-
// This bypasses descriptors and other attribute access machinery
611814
self.find_name_in_mro(interned_name)
612815
}
613816

@@ -617,12 +820,37 @@ impl PyType {
617820
.find_map(|class| class.attributes.read().get(attr_name).cloned())
618821
}
619822

620-
// This is the internal has_attr implementation for fast lookup on a class.
823+
/// Fast lookup for attribute existence on a class.
621824
pub fn has_attr(&self, attr_name: &'static PyStrInterned) -> bool {
622-
self.attributes.read().contains_key(attr_name)
623-
|| self.mro.read()[1..]
624-
.iter()
625-
.any(|c| c.attributes.read().contains_key(attr_name))
825+
self.has_name_in_mro(attr_name)
826+
}
827+
828+
/// Check if attribute exists in MRO, using method cache for fast check.
829+
/// Unlike find_name_in_mro, avoids cloning the value on cache hit.
830+
fn has_name_in_mro(&self, name: &'static PyStrInterned) -> bool {
831+
let version = self.tp_version_tag.load(Ordering::Acquire);
832+
if version != 0 {
833+
let idx = type_cache_hash(version, name);
834+
let entry = &TYPE_CACHE[idx];
835+
let v1 = entry.version.load(Ordering::Acquire);
836+
if v1 == version
837+
&& core::ptr::eq(
838+
entry.name.load(Ordering::Relaxed),
839+
name as *const _ as *mut _,
840+
)
841+
{
842+
let ptr = entry.value.load(Ordering::Acquire);
843+
if !ptr.is_null() {
844+
let v2 = entry.version.load(Ordering::Acquire);
845+
if v2 == v1 {
846+
return true;
847+
}
848+
}
849+
}
850+
}
851+
852+
// Cache miss — use find_name_in_mro which populates cache
853+
self.find_name_in_mro(name).is_some()
626854
}
627855

628856
pub fn get_attributes(&self) -> PyAttributes {
@@ -1270,8 +1498,8 @@ impl PyType {
12701498
PySetterValue::Assign(ref val) => {
12711499
let key = identifier!(vm, __type_params__);
12721500
self.check_set_special_type_attr(key, vm)?;
1273-
let mut attrs = self.attributes.write();
1274-
attrs.insert(key, val.clone().into());
1501+
self.attributes.write().insert(key, val.clone().into());
1502+
self.modified();
12751503
}
12761504
PySetterValue::Delete => {
12771505
// For delete, we still need to check if the type is immutable
@@ -1281,9 +1509,9 @@ impl PyType {
12811509
self.slot_name()
12821510
)));
12831511
}
1284-
let mut attrs = self.attributes.write();
12851512
let key = identifier!(vm, __type_params__);
1286-
attrs.shift_remove(&key);
1513+
self.attributes.write().shift_remove(&key);
1514+
self.modified();
12871515
}
12881516
}
12891517
Ok(())

crates/vm/src/gc_state.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -410,6 +410,10 @@ impl GcState {
410410
let generation = generation.min(2);
411411
let debug = self.get_debug();
412412

413+
// Clear the method cache to release strong references that
414+
// might prevent cycle collection (_PyType_ClearCache).
415+
crate::builtins::type_::type_cache_clear();
416+
413417
// Step 1: Gather objects from generations 0..=generation
414418
// Hold read locks for the entire collection to prevent other threads
415419
// from untracking objects while we're iterating.

0 commit comments

Comments
 (0)