Skip to content

Commit e36962b

Browse files
committed
Lazy locals dict for NEWLOCALS frames
Defer locals dict allocation for function frames until first access. Most function frames only use fastlocals and never touch the locals dict, so this skips one dict heap allocation per function call. Also remove a redundant code.clone() in invoke_with_locals. Func call ~23% faster, method call ~15% faster in benchmarks.
1 parent 0a6a6f8 commit e36962b

File tree

6 files changed

+142
-53
lines changed

6 files changed

+142
-53
lines changed

benches/microbenchmarks.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,8 @@ fn bench_rustpython_code(group: &mut BenchmarkGroup<WallTime>, bench: &MicroBenc
134134
if let Some(idx) = iterations {
135135
scope
136136
.locals
137+
.as_ref()
138+
.expect("new_scope_with_builtins always provides locals")
137139
.as_object()
138140
.set_item("ITERATIONS", vm.new_pyobj(idx), vm)
139141
.expect("Error adding ITERATIONS local variable");

crates/vm/src/builtins/function.rs

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -548,17 +548,20 @@ impl Py<PyFunction> {
548548
let code = self.code.lock().clone();
549549

550550
let locals = if code.flags.contains(bytecode::CodeFlags::NEWLOCALS) {
551-
ArgMapping::from_dict_exact(vm.ctx.new_dict())
551+
None
552552
} else if let Some(locals) = locals {
553-
locals
553+
Some(locals)
554554
} else {
555-
ArgMapping::from_dict_exact(self.globals.clone())
555+
Some(ArgMapping::from_dict_exact(self.globals.clone()))
556556
};
557557

558+
let is_gen = code.flags.contains(bytecode::CodeFlags::GENERATOR);
559+
let is_coro = code.flags.contains(bytecode::CodeFlags::COROUTINE);
560+
558561
// Construct frame:
559562
let frame = Frame::new(
560-
code.clone(),
561-
Scope::new(Some(locals), self.globals.clone()),
563+
code,
564+
Scope::new(locals, self.globals.clone()),
562565
self.builtins.clone(),
563566
self.closure.as_ref().map_or(&[], |c| c.as_slice()),
564567
Some(self.to_owned().into()),
@@ -567,10 +570,6 @@ impl Py<PyFunction> {
567570
.into_ref(&vm.ctx);
568571

569572
self.fill_locals_from_args(&frame, func_args, vm)?;
570-
571-
// If we have a generator, create a new generator
572-
let is_gen = code.flags.contains(bytecode::CodeFlags::GENERATOR);
573-
let is_coro = code.flags.contains(bytecode::CodeFlags::COROUTINE);
574573
match (is_gen, is_coro) {
575574
(true, false) => {
576575
let obj = PyGenerator::new(frame.clone(), self.__name__(), self.__qualname__())
@@ -629,11 +628,9 @@ impl Py<PyFunction> {
629628
pub fn invoke_exact_args(&self, args: &[PyObjectRef], vm: &VirtualMachine) -> PyResult {
630629
let code = self.code.lock().clone();
631630

632-
let locals = ArgMapping::from_dict_exact(vm.ctx.new_dict());
633-
634631
let frame = Frame::new(
635632
code.clone(),
636-
Scope::new(Some(locals), self.globals.clone()),
633+
Scope::new(None, self.globals.clone()),
637634
self.builtins.clone(),
638635
self.closure.as_ref().map_or(&[], |c| c.as_slice()),
639636
Some(self.to_owned().into()),

crates/vm/src/frame.rs

Lines changed: 115 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ use crate::{
1616
bytecode::{
1717
self, ADAPTIVE_BACKOFF_VALUE, Arg, Instruction, LoadAttr, LoadSuperAttr, SpecialMethod,
1818
},
19-
convert::{IntoObject, ToPyResult},
19+
convert::ToPyResult,
2020
coroutine::Coro,
2121
exceptions::ExceptionCtor,
2222
function::{ArgMapping, Either, FuncArgs},
@@ -41,7 +41,7 @@ use malachite_bigint::BigInt;
4141
use rustpython_common::atomic::{PyAtomic, Radium};
4242
use rustpython_common::{
4343
boxvec::BoxVec,
44-
lock::PyMutex,
44+
lock::{OnceCell, PyMutex},
4545
wtf8::{Wtf8, Wtf8Buf, wtf8_concat},
4646
};
4747
use rustpython_compiler_core::SourceLocation;
@@ -148,13 +148,93 @@ unsafe impl Traverse for FastLocals {
148148
}
149149
}
150150

151+
/// Lazy locals dict for frames. For NEWLOCALS frames, the dict is
152+
/// only allocated on first access (most function frames never need it).
153+
pub struct FrameLocals {
154+
inner: OnceCell<ArgMapping>,
155+
}
156+
157+
impl FrameLocals {
158+
/// Create with an already-initialized locals mapping (non-NEWLOCALS frames).
159+
fn with_locals(locals: ArgMapping) -> Self {
160+
let cell = OnceCell::new();
161+
let _ = cell.set(locals);
162+
Self { inner: cell }
163+
}
164+
165+
/// Create an empty lazy locals (for NEWLOCALS frames).
166+
/// The dict will be created on first access.
167+
fn lazy() -> Self {
168+
Self {
169+
inner: OnceCell::new(),
170+
}
171+
}
172+
173+
/// Get the locals mapping, creating it lazily if needed.
174+
#[inline]
175+
pub fn get_or_create(&self, vm: &VirtualMachine) -> &ArgMapping {
176+
self.inner
177+
.get_or_init(|| ArgMapping::from_dict_exact(vm.ctx.new_dict()))
178+
}
179+
180+
/// Get the locals mapping if already created.
181+
#[inline]
182+
pub fn get(&self) -> Option<&ArgMapping> {
183+
self.inner.get()
184+
}
185+
186+
#[inline]
187+
pub fn mapping(&self, vm: &VirtualMachine) -> crate::protocol::PyMapping<'_> {
188+
self.get_or_create(vm).mapping()
189+
}
190+
191+
#[inline]
192+
pub fn clone_mapping(&self, vm: &VirtualMachine) -> ArgMapping {
193+
self.get_or_create(vm).clone()
194+
}
195+
196+
pub fn into_object(&self, vm: &VirtualMachine) -> PyObjectRef {
197+
self.clone_mapping(vm).into()
198+
}
199+
200+
pub fn as_object(&self, vm: &VirtualMachine) -> &PyObject {
201+
self.get_or_create(vm).obj()
202+
}
203+
}
204+
205+
impl fmt::Debug for FrameLocals {
206+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
207+
f.debug_struct("FrameLocals")
208+
.field("initialized", &self.inner.get().is_some())
209+
.finish()
210+
}
211+
}
212+
213+
impl Clone for FrameLocals {
214+
fn clone(&self) -> Self {
215+
let cell = OnceCell::new();
216+
if let Some(locals) = self.inner.get() {
217+
let _ = cell.set(locals.clone());
218+
}
219+
Self { inner: cell }
220+
}
221+
}
222+
223+
unsafe impl Traverse for FrameLocals {
224+
fn traverse(&self, tracer_fn: &mut TraverseFn<'_>) {
225+
if let Some(locals) = self.inner.get() {
226+
locals.traverse(tracer_fn);
227+
}
228+
}
229+
}
230+
151231
#[pyclass(module = false, name = "frame", traverse = "manual")]
152232
pub struct Frame {
153233
pub code: PyRef<PyCode>,
154234
pub func_obj: Option<PyObjectRef>,
155235

156236
pub fastlocals: FastLocals,
157-
pub locals: ArgMapping,
237+
pub locals: FrameLocals,
158238
pub globals: PyDictRef,
159239
pub builtins: PyObjectRef,
160240

@@ -265,7 +345,13 @@ impl Frame {
265345

266346
Self {
267347
fastlocals: FastLocals::new(fastlocals_vec.into_boxed_slice()),
268-
locals: scope.locals,
348+
locals: match scope.locals {
349+
Some(locals) => FrameLocals::with_locals(locals),
350+
None if code.flags.contains(bytecode::CodeFlags::NEWLOCALS) => FrameLocals::lazy(),
351+
None => {
352+
FrameLocals::with_locals(ArgMapping::from_dict_exact(scope.globals.clone()))
353+
}
354+
},
269355
globals: scope.globals,
270356
builtins,
271357
code,
@@ -378,11 +464,12 @@ impl Frame {
378464
let code = &**self.code;
379465
// SAFETY: Called before generator resume; no concurrent access.
380466
let fastlocals = unsafe { self.fastlocals.borrow_mut() };
467+
let locals_map = self.locals.mapping(vm);
381468
for (i, &varname) in code.varnames.iter().enumerate() {
382469
if i >= fastlocals.len() {
383470
break;
384471
}
385-
match self.locals.mapping().subscript(varname, vm) {
472+
match locals_map.subscript(varname, vm) {
386473
Ok(value) => fastlocals[i] = Some(value),
387474
Err(e) if e.fast_isinstance(vm.ctx.exceptions.key_error) => {}
388475
Err(e) => return Err(e),
@@ -401,12 +488,13 @@ impl Frame {
401488
let code = &**self.code;
402489
let map = &code.varnames;
403490
let j = core::cmp::min(map.len(), code.varnames.len());
491+
let locals_map = locals.mapping(vm);
404492
if !code.varnames.is_empty() {
405493
// SAFETY: Either _guard holds the state mutex (frame not executing),
406494
// or we're in a trace callback on the same thread that holds it.
407495
let fastlocals = unsafe { self.fastlocals.borrow() };
408496
for (&k, v) in zip(&map[..j], fastlocals) {
409-
match locals.mapping().ass_subscript(k, v.clone(), vm) {
497+
match locals_map.ass_subscript(k, v.clone(), vm) {
410498
Ok(()) => {}
411499
Err(e) if e.fast_isinstance(vm.ctx.exceptions.key_error) => {}
412500
Err(e) => return Err(e),
@@ -416,7 +504,7 @@ impl Frame {
416504
if !code.cellvars.is_empty() || !code.freevars.is_empty() {
417505
for (i, &k) in code.cellvars.iter().enumerate() {
418506
let cell_value = self.get_cell_contents(i);
419-
match locals.mapping().ass_subscript(k, cell_value, vm) {
507+
match locals_map.ass_subscript(k, cell_value, vm) {
420508
Ok(()) => {}
421509
Err(e) if e.fast_isinstance(vm.ctx.exceptions.key_error) => {}
422510
Err(e) => return Err(e),
@@ -425,15 +513,15 @@ impl Frame {
425513
if code.flags.contains(bytecode::CodeFlags::OPTIMIZED) {
426514
for (i, &k) in code.freevars.iter().enumerate() {
427515
let cell_value = self.get_cell_contents(code.cellvars.len() + i);
428-
match locals.mapping().ass_subscript(k, cell_value, vm) {
516+
match locals_map.ass_subscript(k, cell_value, vm) {
429517
Ok(()) => {}
430518
Err(e) if e.fast_isinstance(vm.ctx.exceptions.key_error) => {}
431519
Err(e) => return Err(e),
432520
}
433521
}
434522
}
435523
}
436-
Ok(locals.clone())
524+
Ok(locals.clone_mapping(vm))
437525
}
438526
}
439527

@@ -534,7 +622,7 @@ impl Py<Frame> {
534622
struct ExecutingFrame<'a> {
535623
code: &'a PyRef<PyCode>,
536624
fastlocals: &'a FastLocals,
537-
locals: &'a ArgMapping,
625+
locals: &'a FrameLocals,
538626
globals: &'a PyDictRef,
539627
builtins: &'a PyObjectRef,
540628
/// Cached downcast of builtins to PyDict for fast LOAD_GLOBAL.
@@ -1377,7 +1465,7 @@ impl ExecutingFrame<'_> {
13771465
}
13781466
Instruction::DeleteName(idx) => {
13791467
let name = self.code.names[idx.get(arg) as usize];
1380-
let res = self.locals.mapping().ass_subscript(name, None, vm);
1468+
let res = self.locals.mapping(vm).ass_subscript(name, None, vm);
13811469

13821470
match res {
13831471
Ok(()) => {}
@@ -1750,7 +1838,7 @@ impl ExecutingFrame<'_> {
17501838
}
17511839
Instruction::LoadLocals => {
17521840
// Push the locals dict onto the stack
1753-
let locals = self.locals.clone().into_object();
1841+
let locals = self.locals.into_object(vm);
17541842
self.push_value(locals);
17551843
Ok(None)
17561844
}
@@ -1977,7 +2065,7 @@ impl ExecutingFrame<'_> {
19772065
}
19782066
Instruction::LoadName(idx) => {
19792067
let name = self.code.names[idx.get(arg) as usize];
1980-
let result = self.locals.mapping().subscript(name, vm);
2068+
let result = self.locals.mapping(vm).subscript(name, vm);
19812069
match result {
19822070
Ok(x) => self.push_value(x),
19832071
Err(e) if e.fast_isinstance(vm.ctx.exceptions.key_error) => {
@@ -2483,7 +2571,9 @@ impl ExecutingFrame<'_> {
24832571
Instruction::StoreName(idx) => {
24842572
let name = self.code.names[idx.get(arg) as usize];
24852573
let value = self.pop_value();
2486-
self.locals.mapping().ass_subscript(name, Some(value), vm)?;
2574+
self.locals
2575+
.mapping(vm)
2576+
.ass_subscript(name, Some(value), vm)?;
24872577
Ok(None)
24882578
}
24892579
Instruction::StoreSlice => {
@@ -3535,20 +3625,19 @@ impl ExecutingFrame<'_> {
35353625
})
35363626
};
35373627

3628+
let locals_map = self.locals.mapping(vm);
35383629
if let Ok(all) = dict.get_item(identifier!(vm, __all__), vm) {
35393630
let items: Vec<PyObjectRef> = all.try_to_value(vm)?;
35403631
for item in items {
35413632
let name = require_str(item, "__all__")?;
35423633
let value = module.get_attr(&*name, vm)?;
3543-
self.locals
3544-
.mapping()
3545-
.ass_subscript(&name, Some(value), vm)?;
3634+
locals_map.ass_subscript(&name, Some(value), vm)?;
35463635
}
35473636
} else {
35483637
for (k, v) in dict {
35493638
let k = require_str(k, "__dict__")?;
35503639
if !k.as_bytes().starts_with(b"_") {
3551-
self.locals.mapping().ass_subscript(&k, Some(v), vm)?;
3640+
locals_map.ass_subscript(&k, Some(v), vm)?;
35523641
}
35533642
}
35543643
}
@@ -4240,23 +4329,15 @@ impl ExecutingFrame<'_> {
42404329
#[cold]
42414330
fn setup_annotations(&mut self, vm: &VirtualMachine) -> FrameResult {
42424331
let __annotations__ = identifier!(vm, __annotations__);
4332+
let locals_obj = self.locals.as_object(vm);
42434333
// Try using locals as dict first, if not, fallback to generic method.
4244-
let has_annotations = match self
4245-
.locals
4246-
.clone()
4247-
.into_object()
4248-
.downcast_exact::<PyDict>(vm)
4249-
{
4250-
Ok(d) => d.contains_key(__annotations__, vm),
4251-
Err(o) => {
4252-
let needle = __annotations__.as_object();
4253-
self._in(vm, needle, &o)?
4254-
}
4334+
let has_annotations = if let Some(d) = locals_obj.downcast_ref_if_exact::<PyDict>(vm) {
4335+
d.contains_key(__annotations__, vm)
4336+
} else {
4337+
self._in(vm, __annotations__.as_object(), locals_obj)?
42554338
};
42564339
if !has_annotations {
4257-
self.locals
4258-
.as_object()
4259-
.set_item(__annotations__, vm.ctx.new_dict().into(), vm)?;
4340+
locals_obj.set_item(__annotations__, vm.ctx.new_dict().into(), vm)?;
42604341
}
42614342
Ok(None)
42624343
}
@@ -5076,12 +5157,11 @@ impl fmt::Debug for Frame {
50765157
s
50775158
});
50785159
// TODO: fix this up
5079-
let locals = self.locals.clone();
50805160
write!(
50815161
f,
5082-
"Frame Object {{ \n Stack:{}\n Locals:{:?}\n}}",
5162+
"Frame Object {{ \n Stack:{}\n Locals initialized:{}\n}}",
50835163
stack_str,
5084-
locals.into_object()
5164+
self.locals.get().is_some()
50855165
)
50865166
}
50875167
}

crates/vm/src/scope.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use alloc::fmt;
33

44
#[derive(Clone)]
55
pub struct Scope {
6-
pub locals: ArgMapping,
6+
pub locals: Option<ArgMapping>,
77
pub globals: PyDictRef,
88
}
99

@@ -17,7 +17,6 @@ impl fmt::Debug for Scope {
1717
impl Scope {
1818
#[inline]
1919
pub fn new(locals: Option<ArgMapping>, globals: PyDictRef) -> Self {
20-
let locals = locals.unwrap_or_else(|| ArgMapping::from_dict_exact(globals.clone()));
2120
Self { locals, globals }
2221
}
2322

@@ -31,6 +30,8 @@ impl Scope {
3130
.set_item("__builtins__", vm.builtins.clone().into(), vm)
3231
.unwrap();
3332
}
33+
// For module-level code, locals defaults to globals
34+
let locals = locals.or_else(|| Some(ArgMapping::from_dict_exact(globals.clone())));
3435
Self::new(locals, globals)
3536
}
3637

crates/vm/src/vm/mod.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1271,7 +1271,10 @@ impl VirtualMachine {
12711271
.map_err(|_| self.new_import_error("__import__ not found", module.to_owned()))?;
12721272

12731273
let (locals, globals) = if let Some(frame) = self.current_frame() {
1274-
(Some(frame.locals.clone()), Some(frame.globals.clone()))
1274+
(
1275+
Some(frame.locals.clone_mapping(self)),
1276+
Some(frame.globals.clone()),
1277+
)
12751278
} else {
12761279
(None, None)
12771280
};

0 commit comments

Comments
 (0)