Skip to content

Commit e8556b6

Browse files
committed
address review: invalidate init cache on type modification, add cspell words
1 parent 622f3c6 commit e8556b6

File tree

6 files changed

+181
-73
lines changed

6 files changed

+181
-73
lines changed

.cspell.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,10 @@
6060
"dedentations",
6161
"dedents",
6262
"deduped",
63+
"compactlong",
64+
"compactlongs",
6365
"deoptimize",
66+
"descrs",
6467
"downcastable",
6568
"downcasted",
6669
"dumpable",

crates/vm/src/builtins/function.rs

Lines changed: 44 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ use crate::{
1313
bytecode,
1414
class::PyClassImpl,
1515
common::wtf8::{Wtf8Buf, wtf8_concat},
16-
frame::Frame,
16+
frame::{Frame, FrameRef},
1717
function::{FuncArgs, OptionalArg, PyComparisonValue, PySetterValue},
1818
scope::Scope,
1919
types::{
@@ -689,11 +689,11 @@ impl Py<PyFunction> {
689689
capacity.checked_mul(core::mem::size_of::<usize>())
690690
}
691691

692-
/// Fast path for calling a simple function with exact positional args.
693-
/// Skips FuncArgs allocation, prepend_arg, and fill_locals_from_args.
694-
/// Only valid when: CO_OPTIMIZED, no VARARGS, no VARKEYWORDS, no kwonlyargs,
695-
/// and nargs == co_argcount.
696-
pub fn invoke_exact_args(&self, mut args: Vec<PyObjectRef>, vm: &VirtualMachine) -> PyResult {
692+
pub(crate) fn prepare_exact_args_frame(
693+
&self,
694+
mut args: Vec<PyObjectRef>,
695+
vm: &VirtualMachine,
696+
) -> FrameRef {
697697
let code: PyRef<PyCode> = (*self.code).to_owned();
698698

699699
debug_assert_eq!(args.len(), code.arg_count as usize);
@@ -704,16 +704,11 @@ impl Py<PyFunction> {
704704
.intersects(bytecode::CodeFlags::VARARGS | bytecode::CodeFlags::VARKEYWORDS)
705705
);
706706
debug_assert_eq!(code.kwonlyarg_count, 0);
707-
708-
// Generator/coroutine code objects are SIMPLE_FUNCTION in call
709-
// specialization classification, but their call path must still
710-
// go through invoke() to produce generator/coroutine objects.
711-
if code
712-
.flags
713-
.intersects(bytecode::CodeFlags::GENERATOR | bytecode::CodeFlags::COROUTINE)
714-
{
715-
return self.invoke(FuncArgs::from(args), vm);
716-
}
707+
debug_assert!(
708+
!code
709+
.flags
710+
.intersects(bytecode::CodeFlags::GENERATOR | bytecode::CodeFlags::COROUTINE)
711+
);
717712

718713
let locals = if code.flags.contains(bytecode::CodeFlags::NEWLOCALS) {
719714
None
@@ -727,20 +722,18 @@ impl Py<PyFunction> {
727722
self.builtins.clone(),
728723
self.closure.as_ref().map_or(&[], |c| c.as_slice()),
729724
Some(self.to_owned().into()),
730-
true, // Always use datastack (invoke_exact_args is never gen/coro)
725+
true, // Exact-args fast path is only used for non-gen/coro functions.
731726
vm,
732727
)
733728
.into_ref(&vm.ctx);
734729

735-
// Move args directly into fastlocals (no clone/refcount needed)
736730
{
737731
let fastlocals = unsafe { frame.fastlocals_mut() };
738732
for (slot, arg) in fastlocals.iter_mut().zip(args.drain(..)) {
739733
*slot = Some(arg);
740734
}
741735
}
742736

743-
// Handle cell2arg
744737
if let Some(cell2arg) = code.cell2arg.as_deref() {
745738
let fastlocals = unsafe { frame.fastlocals_mut() };
746739
for (cell_idx, arg_idx) in cell2arg.iter().enumerate().filter(|(_, i)| **i != -1) {
@@ -749,6 +742,36 @@ impl Py<PyFunction> {
749742
}
750743
}
751744

745+
frame
746+
}
747+
748+
/// Fast path for calling a simple function with exact positional args.
749+
/// Skips FuncArgs allocation, prepend_arg, and fill_locals_from_args.
750+
/// Only valid when: CO_OPTIMIZED, no VARARGS, no VARKEYWORDS, no kwonlyargs,
751+
/// and nargs == co_argcount.
752+
pub fn invoke_exact_args(&self, args: Vec<PyObjectRef>, vm: &VirtualMachine) -> PyResult {
753+
let code: PyRef<PyCode> = (*self.code).to_owned();
754+
755+
debug_assert_eq!(args.len(), code.arg_count as usize);
756+
debug_assert!(code.flags.contains(bytecode::CodeFlags::OPTIMIZED));
757+
debug_assert!(
758+
!code
759+
.flags
760+
.intersects(bytecode::CodeFlags::VARARGS | bytecode::CodeFlags::VARKEYWORDS)
761+
);
762+
debug_assert_eq!(code.kwonlyarg_count, 0);
763+
764+
// Generator/coroutine code objects are SIMPLE_FUNCTION in call
765+
// specialization classification, but their call path must still
766+
// go through invoke() to produce generator/coroutine objects.
767+
if code
768+
.flags
769+
.intersects(bytecode::CodeFlags::GENERATOR | bytecode::CodeFlags::COROUTINE)
770+
{
771+
return self.invoke(FuncArgs::from(args), vm);
772+
}
773+
let frame = self.prepare_exact_args_frame(args, vm);
774+
752775
let result = vm.run_frame(frame.clone());
753776
unsafe {
754777
if let Some(base) = frame.materialize_localsplus() {
@@ -1361,37 +1384,8 @@ pub(crate) fn vectorcall_function(
13611384
if is_simple && nargs == code.arg_count as usize {
13621385
// FAST PATH: simple positional-only call, exact arg count.
13631386
// Move owned args directly into fastlocals — no clone needed.
1364-
let locals = if code.flags.contains(bytecode::CodeFlags::NEWLOCALS) {
1365-
None // lazy allocation — most frames never access locals dict
1366-
} else {
1367-
Some(ArgMapping::from_dict_exact(zelf.globals.clone()))
1368-
};
1369-
1370-
let frame = Frame::new(
1371-
code.to_owned(),
1372-
Scope::new(locals, zelf.globals.clone()),
1373-
zelf.builtins.clone(),
1374-
zelf.closure.as_ref().map_or(&[], |c| c.as_slice()),
1375-
Some(zelf.to_owned().into()),
1376-
true, // Always use datastack (is_simple excludes gen/coro)
1377-
vm,
1378-
)
1379-
.into_ref(&vm.ctx);
1380-
1381-
{
1382-
let fastlocals = unsafe { frame.fastlocals_mut() };
1383-
for (slot, arg) in fastlocals.iter_mut().zip(args.drain(..nargs)) {
1384-
*slot = Some(arg);
1385-
}
1386-
}
1387-
1388-
if let Some(cell2arg) = code.cell2arg.as_deref() {
1389-
let fastlocals = unsafe { frame.fastlocals_mut() };
1390-
for (cell_idx, arg_idx) in cell2arg.iter().enumerate().filter(|(_, i)| **i != -1) {
1391-
let x = fastlocals[*arg_idx as usize].take();
1392-
frame.set_cell_contents(cell_idx, x);
1393-
}
1394-
}
1387+
args.truncate(nargs);
1388+
let frame = zelf.prepare_exact_args_frame(args, vm);
13951389

13961390
let result = vm.run_frame(frame.clone());
13971391
unsafe {

crates/vm/src/builtins/type.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -329,9 +329,9 @@ impl TypeSpecializationCache {
329329
#[inline]
330330
fn invalidate_for_type_modified(&self) {
331331
let _guard = self.write_lock.lock();
332-
// Match CPython _spec_cache contract: type modification invalidates
333-
// getitem cache by NULLing the pointer. `getitem_version` becomes
334-
// meaningless when getitem is NULL.
332+
// _spec_cache contract: type modification invalidates all cached
333+
// specialization functions.
334+
self.swap_init(None, None);
335335
self.swap_getitem(None, None);
336336
}
337337

crates/vm/src/frame.rs

Lines changed: 57 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1081,6 +1081,14 @@ fn specialization_nonnegative_compact_index(i: &PyInt, vm: &VirtualMachine) -> O
10811081
}
10821082
}
10831083

1084+
fn release_datastack_frame(frame: &Py<Frame>, vm: &VirtualMachine) {
1085+
unsafe {
1086+
if let Some(base) = frame.materialize_localsplus() {
1087+
vm.datastack_pop(base);
1088+
}
1089+
}
1090+
}
1091+
10841092
type BinaryOpExtendGuard = fn(&PyObject, &PyObject, &VirtualMachine) -> bool;
10851093
type BinaryOpExtendAction = fn(&PyObject, &PyObject, &VirtualMachine) -> Option<PyObjectRef>;
10861094

@@ -1249,6 +1257,52 @@ impl fmt::Debug for ExecutingFrame<'_> {
12491257
}
12501258

12511259
impl ExecutingFrame<'_> {
1260+
fn specialization_new_init_cleanup_frame(&self, vm: &VirtualMachine) -> FrameRef {
1261+
Frame::new(
1262+
vm.ctx.init_cleanup_code.clone(),
1263+
Scope::new(
1264+
Some(ArgMapping::from_dict_exact(vm.ctx.new_dict())),
1265+
self.globals.clone(),
1266+
),
1267+
self.builtins.clone(),
1268+
&[],
1269+
None,
1270+
true,
1271+
vm,
1272+
)
1273+
.into_ref(&vm.ctx)
1274+
}
1275+
1276+
fn specialization_run_init_cleanup_shim(
1277+
&self,
1278+
new_obj: PyObjectRef,
1279+
init_func: &Py<PyFunction>,
1280+
pos_args: Vec<PyObjectRef>,
1281+
vm: &VirtualMachine,
1282+
) -> PyResult<PyObjectRef> {
1283+
let shim = self.specialization_new_init_cleanup_frame(vm);
1284+
let shim_result = vm.with_frame_untraced(shim.clone(), |shim| {
1285+
shim.with_exec(vm, |mut exec| exec.push_value(new_obj.clone()));
1286+
1287+
let mut all_args = Vec::with_capacity(pos_args.len() + 1);
1288+
all_args.push(new_obj.clone());
1289+
all_args.extend(pos_args);
1290+
1291+
let init_frame = init_func.prepare_exact_args_frame(all_args, vm);
1292+
let init_result = vm.run_frame(init_frame.clone());
1293+
release_datastack_frame(&init_frame, vm);
1294+
let init_result = init_result?;
1295+
1296+
shim.with_exec(vm, |mut exec| exec.push_value(init_result));
1297+
match shim.run(vm)? {
1298+
ExecutionResult::Return(value) => Ok(value),
1299+
ExecutionResult::Yield(_) => unreachable!("_Py_InitCleanup shim cannot yield"),
1300+
}
1301+
});
1302+
release_datastack_frame(&shim, vm);
1303+
shim_result
1304+
}
1305+
12521306
#[inline(always)]
12531307
fn update_lasti(&mut self, f: impl FnOnce(&mut u32)) {
12541308
let mut val = self.lasti.load(Relaxed);
@@ -4737,24 +4791,9 @@ impl ExecutingFrame<'_> {
47374791
let pos_args: Vec<PyObjectRef> = self.pop_multiple(nargs as usize).collect();
47384792
let _null = self.pop_value_opt(); // self_or_null (None)
47394793
let _callable = self.pop_value(); // callable (type)
4740-
4741-
let mut all_args = Vec::with_capacity(pos_args.len() + 1);
4742-
all_args.push(new_obj.clone());
4743-
all_args.extend(pos_args);
4744-
4745-
// Match CPython's `_CREATE_INIT_FRAME` fast-path shape:
4746-
// run `__init__` as an exact-args Python function call.
4747-
let init_result = init_func.invoke_exact_args(all_args, vm)?;
4748-
4749-
// EXIT_INIT_CHECK: __init__ must return None
4750-
if !vm.is_none(&init_result) {
4751-
return Err(vm.new_type_error(format!(
4752-
"__init__() should return None, not '{}'",
4753-
init_result.class().name()
4754-
)));
4755-
}
4756-
4757-
self.push_value(new_obj);
4794+
let result = self
4795+
.specialization_run_init_cleanup_shim(new_obj, &init_func, pos_args, vm)?;
4796+
self.push_value(result);
47584797
return Ok(None);
47594798
}
47604799
self.execute_call_vectorcall(nargs, vm)

crates/vm/src/vm/context.rs

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ use crate::{
1414
object, pystr,
1515
type_::PyAttributes,
1616
},
17+
bytecode::{self, CodeFlags, CodeUnit, Instruction},
1718
class::StaticType,
1819
common::rc::PyRc,
1920
exceptions,
@@ -29,6 +30,7 @@ use malachite_bigint::BigInt;
2930
use num_complex::Complex64;
3031
use num_traits::ToPrimitive;
3132
use rustpython_common::lock::PyRwLock;
33+
use rustpython_compiler_core::{OneIndexed, SourceLocation};
3234

3335
#[derive(Debug)]
3436
pub struct Context {
@@ -49,6 +51,7 @@ pub struct Context {
4951
pub int_cache_pool: Vec<PyIntRef>,
5052
pub(crate) latin1_char_cache: Vec<PyRef<PyStr>>,
5153
pub(crate) ascii_char_cache: Vec<PyRef<PyStr>>,
54+
pub(crate) init_cleanup_code: PyRef<PyCode>,
5255
// there should only be exact objects of str in here, no non-str objects and no subclasses
5356
pub(crate) string_pool: StringPool,
5457
pub(crate) slot_new_wrapper: PyMethodDef,
@@ -353,6 +356,7 @@ impl Context {
353356
PyMethodFlags::METHOD,
354357
None,
355358
);
359+
let init_cleanup_code = Self::new_init_cleanup_code(&types, &names);
356360

357361
let empty_str = unsafe { string_pool.intern("", types.str_type.to_owned()) };
358362
let empty_bytes = create_object(PyBytes::from(Vec::new()), types.bytes_type);
@@ -379,6 +383,7 @@ impl Context {
379383
int_cache_pool,
380384
latin1_char_cache,
381385
ascii_char_cache,
386+
init_cleanup_code,
382387
string_pool,
383388
slot_new_wrapper,
384389
names,
@@ -388,6 +393,51 @@ impl Context {
388393
}
389394
}
390395

396+
fn new_init_cleanup_code(types: &TypeZoo, names: &ConstName) -> PyRef<PyCode> {
397+
let loc = SourceLocation {
398+
line: OneIndexed::MIN,
399+
character_offset: OneIndexed::from_zero_indexed(0),
400+
};
401+
let instructions = [
402+
CodeUnit {
403+
op: Instruction::ExitInitCheck,
404+
arg: 0.into(),
405+
},
406+
CodeUnit {
407+
op: Instruction::ReturnValue,
408+
arg: 0.into(),
409+
},
410+
CodeUnit {
411+
op: Instruction::Resume {
412+
context: bytecode::Arg::marker(),
413+
},
414+
arg: 0.into(),
415+
},
416+
];
417+
let code = bytecode::CodeObject {
418+
instructions: instructions.into(),
419+
locations: vec![(loc, loc); instructions.len()].into_boxed_slice(),
420+
flags: CodeFlags::OPTIMIZED,
421+
posonlyarg_count: 0,
422+
arg_count: 0,
423+
kwonlyarg_count: 0,
424+
source_path: names.__init__,
425+
first_line_number: None,
426+
max_stackdepth: 2,
427+
obj_name: names.__init__,
428+
qualname: names.__init__,
429+
cell2arg: None,
430+
constants: Vec::new().into_boxed_slice(),
431+
names: Vec::new().into_boxed_slice(),
432+
varnames: Vec::new().into_boxed_slice(),
433+
cellvars: Vec::new().into_boxed_slice(),
434+
freevars: Vec::new().into_boxed_slice(),
435+
linetable: Vec::new().into_boxed_slice(),
436+
exceptiontable: Vec::new().into_boxed_slice(),
437+
};
438+
PyRef::new_ref(PyCode::new(code), types.code_type.to_owned(), None)
439+
}
440+
391441
pub fn intern_str<S: InternableString>(&self, s: S) -> &'static PyStrInterned {
392442
unsafe { self.string_pool.intern(s, self.types.str_type.to_owned()) }
393443
}

0 commit comments

Comments
 (0)