Skip to content

Commit be0c3ca

Browse files
Add vectorcall (PEP 590) dispatch for function calls (#7329)
* Add vectorcall (PEP 590) dispatch for function calls Add VectorCallFunc slot to PyTypeSlots and vectorcall dispatch path in the interpreter loop for Call and CallKw instructions. Implement vectorcall for PyFunction (with fast path for simple positional-only calls that fills fastlocals directly), PyBoundMethod (avoids prepend_arg O(n) shift), and PyNativeFunction. Add FuncArgs::from_vectorcall helper for fallback conversion. Vectorcall slot is inherited with call slot and cleared when __call__ is overridden in Python subclasses. * Optimize vectorcall: move args instead of clone, use vectorcall in specialized paths - invoke_exact_args takes Vec by value and uses drain() to move args into fastlocals instead of cloning (eliminates refcount overhead) - CallPyGeneral and CallBoundMethodGeneral now call vectorcall_function directly instead of going through FuncArgs + prepend_arg + invoke - CallKwPy and CallKwBoundMethod use vectorcall_function with kwnames - vectorcall_bound_method uses insert(0) on existing Vec instead of allocating a second Vec * Auto-format: cargo fmt --all * Fix vectorcall_native_function kwarg slice out-of-bounds When needs_self was true and kwargs were present, pos_args only contained positional args (self + original positionals) but from_vectorcall expected kwarg values to follow in the slice. Build the full args array (self + all original args including kwarg values) before passing to from_vectorcall. --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
1 parent 2f2a7da commit be0c3ca

File tree

7 files changed

+453
-45
lines changed

7 files changed

+453
-45
lines changed

crates/vm/src/builtins/builtin_func.rs

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,8 +224,44 @@ impl fmt::Debug for PyNativeMethod {
224224
}
225225
}
226226

227+
/// Vectorcall for builtin functions (PEP 590).
228+
/// Avoids `prepend_arg` O(n) shift by building args with self at front.
229+
fn vectorcall_native_function(
230+
zelf_obj: &PyObject,
231+
args: Vec<PyObjectRef>,
232+
nargs: usize,
233+
kwnames: Option<&[PyObjectRef]>,
234+
vm: &VirtualMachine,
235+
) -> PyResult {
236+
let zelf: &Py<PyNativeFunction> = zelf_obj.downcast_ref().unwrap();
237+
238+
// Build FuncArgs with self already at position 0 (no insert(0) needed)
239+
let needs_self = zelf
240+
.zelf
241+
.as_ref()
242+
.is_some_and(|_| !zelf.value.flags.contains(PyMethodFlags::STATIC));
243+
244+
let func_args = if needs_self {
245+
let self_obj = zelf.zelf.as_ref().unwrap().clone();
246+
let mut all_args = Vec::with_capacity(args.len() + 1);
247+
all_args.push(self_obj);
248+
all_args.extend(args);
249+
FuncArgs::from_vectorcall(&all_args, nargs + 1, kwnames)
250+
} else {
251+
FuncArgs::from_vectorcall(&args, nargs, kwnames)
252+
};
253+
254+
(zelf.value.func)(vm, func_args)
255+
}
256+
227257
pub fn init(context: &'static Context) {
228258
PyNativeFunction::extend_class(context, context.types.builtin_function_or_method_type);
259+
context
260+
.types
261+
.builtin_function_or_method_type
262+
.slots
263+
.vectorcall
264+
.store(Some(vectorcall_native_function));
229265
}
230266

231267
/// Wrapper that provides access to the common PyNativeFunction data

crates/vm/src/builtins/function.rs

Lines changed: 103 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -648,7 +648,7 @@ impl Py<PyFunction> {
648648
/// Skips FuncArgs allocation, prepend_arg, and fill_locals_from_args.
649649
/// Only valid when: no VARARGS, no VARKEYWORDS, no kwonlyargs, not generator/coroutine,
650650
/// and nargs == co_argcount.
651-
pub fn invoke_exact_args(&self, args: &[PyObjectRef], vm: &VirtualMachine) -> PyResult {
651+
pub fn invoke_exact_args(&self, mut args: Vec<PyObjectRef>, vm: &VirtualMachine) -> PyResult {
652652
let code: PyRef<PyCode> = (*self.code).to_owned();
653653

654654
debug_assert_eq!(args.len(), code.arg_count as usize);
@@ -671,11 +671,11 @@ impl Py<PyFunction> {
671671
)
672672
.into_ref(&vm.ctx);
673673

674-
// Copy args directly into fastlocals
674+
// Move args directly into fastlocals (no clone/refcount needed)
675675
{
676676
let fastlocals = unsafe { frame.fastlocals.borrow_mut() };
677-
for (i, arg) in args.iter().enumerate() {
678-
fastlocals[i] = Some(arg.clone());
677+
for (slot, arg) in fastlocals.iter_mut().zip(args.drain(..)) {
678+
*slot = Some(arg);
679679
}
680680
}
681681

@@ -1253,8 +1253,107 @@ impl PyCell {
12531253
}
12541254
}
12551255

1256+
/// Vectorcall implementation for PyFunction (PEP 590).
1257+
/// Takes owned args to avoid cloning when filling fastlocals.
1258+
pub(crate) fn vectorcall_function(
1259+
zelf_obj: &PyObject,
1260+
mut args: Vec<PyObjectRef>,
1261+
nargs: usize,
1262+
kwnames: Option<&[PyObjectRef]>,
1263+
vm: &VirtualMachine,
1264+
) -> PyResult {
1265+
let zelf: &Py<PyFunction> = zelf_obj.downcast_ref().unwrap();
1266+
let code: &Py<PyCode> = &zelf.code;
1267+
1268+
let has_kwargs = kwnames.is_some_and(|kw| !kw.is_empty());
1269+
let is_simple = !has_kwargs
1270+
&& !code.flags.contains(bytecode::CodeFlags::VARARGS)
1271+
&& !code.flags.contains(bytecode::CodeFlags::VARKEYWORDS)
1272+
&& code.kwonlyarg_count == 0
1273+
&& !code
1274+
.flags
1275+
.intersects(bytecode::CodeFlags::GENERATOR | bytecode::CodeFlags::COROUTINE);
1276+
1277+
if is_simple && nargs == code.arg_count as usize {
1278+
// FAST PATH: simple positional-only call, exact arg count.
1279+
// Move owned args directly into fastlocals — no clone needed.
1280+
let locals = if code.flags.contains(bytecode::CodeFlags::NEWLOCALS) {
1281+
ArgMapping::from_dict_exact(vm.ctx.new_dict())
1282+
} else {
1283+
ArgMapping::from_dict_exact(zelf.globals.clone())
1284+
};
1285+
1286+
let frame = Frame::new(
1287+
code.to_owned(),
1288+
Scope::new(Some(locals), zelf.globals.clone()),
1289+
zelf.builtins.clone(),
1290+
zelf.closure.as_ref().map_or(&[], |c| c.as_slice()),
1291+
Some(zelf.to_owned().into()),
1292+
vm,
1293+
)
1294+
.into_ref(&vm.ctx);
1295+
1296+
{
1297+
let fastlocals = unsafe { frame.fastlocals.borrow_mut() };
1298+
for (slot, arg) in fastlocals.iter_mut().zip(args.drain(..nargs)) {
1299+
*slot = Some(arg);
1300+
}
1301+
}
1302+
1303+
if let Some(cell2arg) = code.cell2arg.as_deref() {
1304+
let fastlocals = unsafe { frame.fastlocals.borrow_mut() };
1305+
for (cell_idx, arg_idx) in cell2arg.iter().enumerate().filter(|(_, i)| **i != -1) {
1306+
let x = fastlocals[*arg_idx as usize].take();
1307+
frame.set_cell_contents(cell_idx, x);
1308+
}
1309+
}
1310+
1311+
return vm.run_frame(frame);
1312+
}
1313+
1314+
// SLOW PATH: construct FuncArgs from owned Vec and delegate to invoke()
1315+
let func_args = if has_kwargs {
1316+
FuncArgs::from_vectorcall(&args, nargs, kwnames)
1317+
} else {
1318+
args.truncate(nargs);
1319+
FuncArgs::from(args)
1320+
};
1321+
zelf.invoke(func_args, vm)
1322+
}
1323+
1324+
/// Vectorcall implementation for PyBoundMethod (PEP 590).
1325+
fn vectorcall_bound_method(
1326+
zelf_obj: &PyObject,
1327+
mut args: Vec<PyObjectRef>,
1328+
nargs: usize,
1329+
kwnames: Option<&[PyObjectRef]>,
1330+
vm: &VirtualMachine,
1331+
) -> PyResult {
1332+
let zelf: &Py<PyBoundMethod> = zelf_obj.downcast_ref().unwrap();
1333+
1334+
// Insert self at front of existing Vec (avoids 2nd allocation).
1335+
// O(n) memmove is cheaper than a 2nd heap alloc+dealloc for typical arg counts.
1336+
args.insert(0, zelf.object.clone());
1337+
let new_nargs = nargs + 1;
1338+
zelf.function.vectorcall(args, new_nargs, kwnames, vm)
1339+
}
1340+
12561341
pub fn init(context: &'static Context) {
12571342
PyFunction::extend_class(context, context.types.function_type);
1343+
context
1344+
.types
1345+
.function_type
1346+
.slots
1347+
.vectorcall
1348+
.store(Some(vectorcall_function));
1349+
12581350
PyBoundMethod::extend_class(context, context.types.bound_method_type);
1351+
context
1352+
.types
1353+
.bound_method_type
1354+
.slots
1355+
.vectorcall
1356+
.store(Some(vectorcall_bound_method));
1357+
12591358
PyCell::extend_class(context, context.types.cell_type);
12601359
}

0 commit comments

Comments
 (0)