Skip to content

Commit c717101

Browse files
committed
Improve codegen bytecode parity
1 parent 3f9307f commit c717101

File tree

4 files changed

+426
-142
lines changed

4 files changed

+426
-142
lines changed

Lib/test/test_peepholer.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -529,7 +529,6 @@ def f(x):
529529
self.assertEqual(len(returns), 1)
530530
self.check_lnotab(f)
531531

532-
@unittest.expectedFailure # TODO: RUSTPYTHON; KeyError: 20
533532
def test_elim_jump_to_return(self):
534533
# JUMP_FORWARD to RETURN --> RETURN
535534
def f(cond, true_value, false_value):

crates/codegen/src/compile.rs

Lines changed: 186 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -3216,20 +3216,19 @@ impl Compiler {
32163216
emit!(self, PseudoInstruction::PopBlock);
32173217
}
32183218

3219-
// Create a block for normal path continuation (after handler body succeeds)
3220-
let handler_normal_exit = self.new_block();
3221-
emit!(
3222-
self,
3223-
PseudoInstruction::JumpNoInterrupt {
3224-
delta: handler_normal_exit,
3225-
}
3226-
);
3227-
32283219
// cleanup_end block for named handler
32293220
// IMPORTANT: In CPython, cleanup_end is within outer SETUP_CLEANUP scope.
32303221
// so when RERAISE is executed, it goes to the cleanup block which does POP_EXCEPT.
32313222
// We MUST compile cleanup_end BEFORE popping ExceptionHandler so RERAISE routes to cleanup_block.
32323223
if let Some(cleanup_end) = handler_cleanup_block {
3224+
let handler_normal_exit = self.new_block();
3225+
emit!(
3226+
self,
3227+
PseudoInstruction::JumpNoInterrupt {
3228+
delta: handler_normal_exit,
3229+
}
3230+
);
3231+
32333232
self.switch_to_block(cleanup_end);
32343233
if let Some(alias) = name {
32353234
// name = None; del name; before RERAISE
@@ -3242,10 +3241,10 @@ impl Compiler {
32423241
// This RERAISE is within ExceptionHandler scope, so it routes to cleanup_block
32433242
// which does COPY 3; POP_EXCEPT; RERAISE
32443243
emit!(self, Instruction::Reraise { depth: 1 });
3245-
}
32463244

3247-
// Switch to normal exit block - this is where handler body success continues
3248-
self.switch_to_block(handler_normal_exit);
3245+
// Switch to normal exit block - this is where handler body success continues
3246+
self.switch_to_block(handler_normal_exit);
3247+
}
32493248

32503249
// PopBlock for outer SETUP_CLEANUP (ExceptionHandler)
32513250
emit!(self, PseudoInstruction::PopBlock);
@@ -7144,8 +7143,11 @@ impl Compiler {
71447143
condition: bool,
71457144
target_block: BlockIdx,
71467145
) -> CompileResult<()> {
7146+
let prev_source_range = self.current_source_range;
7147+
self.set_source_range(expression.range());
7148+
71477149
// Compile expression for test, and jump to label if false
7148-
match &expression {
7150+
let result = match &expression {
71497151
ast::Expr::BoolOp(ast::ExprBoolOp { op, values, .. }) => {
71507152
match op {
71517153
ast::BoolOp::And => {
@@ -7191,21 +7193,20 @@ impl Compiler {
71917193
}
71927194
}
71937195
}
7196+
Ok(())
71947197
}
71957198
ast::Expr::UnaryOp(ast::ExprUnaryOp {
71967199
op: ast::UnaryOp::Not,
71977200
operand,
71987201
..
7199-
}) => {
7200-
self.compile_jump_if(operand, !condition, target_block)?;
7201-
}
7202+
}) => self.compile_jump_if(operand, !condition, target_block),
72027203
ast::Expr::Compare(ast::ExprCompare {
72037204
left,
72047205
ops,
72057206
comparators,
72067207
..
72077208
}) if ops.len() > 1 => {
7208-
self.compile_jump_if_compare(left, ops, comparators, condition, target_block)?;
7209+
self.compile_jump_if_compare(left, ops, comparators, condition, target_block)
72097210
}
72107211
// `x is None` / `x is not None` → POP_JUMP_IF_NONE / POP_JUMP_IF_NOT_NONE
72117212
ast::Expr::Compare(ast::ExprCompare {
@@ -7240,6 +7241,7 @@ impl Compiler {
72407241
}
72417242
);
72427243
}
7244+
Ok(())
72437245
}
72447246
_ => {
72457247
// Fall back case which always will work!
@@ -7263,9 +7265,12 @@ impl Compiler {
72637265
}
72647266
);
72657267
}
7268+
Ok(())
72667269
}
7267-
}
7268-
Ok(())
7270+
};
7271+
7272+
self.set_source_range(prev_source_range);
7273+
result
72697274
}
72707275

72717276
/// Compile a boolean operation as an expression.
@@ -9031,6 +9036,28 @@ impl Compiler {
90319036
}
90329037
}
90339038

9039+
fn compile_fstring_literal_value(
9040+
&self,
9041+
string: &ast::InterpolatedStringLiteralElement,
9042+
flags: ast::FStringFlags,
9043+
) -> Wtf8Buf {
9044+
if string.value.contains(char::REPLACEMENT_CHARACTER) {
9045+
let source = self.source_file.slice(string.range);
9046+
crate::string_parser::parse_fstring_literal_element(source.into(), flags.into()).into()
9047+
} else {
9048+
string.value.to_string().into()
9049+
}
9050+
}
9051+
9052+
fn compile_fstring_part_literal_value(&self, string: &ast::StringLiteral) -> Wtf8Buf {
9053+
if string.value.contains(char::REPLACEMENT_CHARACTER) {
9054+
let source = self.source_file.slice(string.range);
9055+
crate::string_parser::parse_string_literal(source, string.flags.into()).into()
9056+
} else {
9057+
string.value.to_string().into()
9058+
}
9059+
}
9060+
90349061
fn arg_constant(&mut self, constant: ConstantData) -> oparg::ConstIdx {
90359062
let info = self.current_code_info();
90369063
info.metadata.consts.insert_full(constant).0.to_u32().into()
@@ -9575,45 +9602,63 @@ impl Compiler {
95759602

95769603
fn compile_expr_fstring(&mut self, fstring: &ast::ExprFString) -> CompileResult<()> {
95779604
let fstring = &fstring.value;
9605+
let mut element_count = 0;
9606+
let mut pending_literal = None;
95789607
for part in fstring {
9579-
self.compile_fstring_part(part)?;
9608+
self.compile_fstring_part_into(part, &mut pending_literal, &mut element_count)?;
95809609
}
9581-
let part_count: u32 = fstring
9582-
.iter()
9583-
.len()
9584-
.try_into()
9585-
.expect("BuildString size overflowed");
9586-
if part_count > 1 {
9587-
emit!(self, Instruction::BuildString { count: part_count });
9588-
}
9589-
9590-
Ok(())
9610+
self.finish_fstring(pending_literal, element_count)
95919611
}
95929612

9593-
fn compile_fstring_part(&mut self, part: &ast::FStringPart) -> CompileResult<()> {
9613+
fn compile_fstring_part_into(
9614+
&mut self,
9615+
part: &ast::FStringPart,
9616+
pending_literal: &mut Option<Wtf8Buf>,
9617+
element_count: &mut u32,
9618+
) -> CompileResult<()> {
95949619
match part {
95959620
ast::FStringPart::Literal(string) => {
9596-
if string.value.contains(char::REPLACEMENT_CHARACTER) {
9597-
// might have a surrogate literal; should reparse to be sure
9598-
let source = self.source_file.slice(string.range);
9599-
let value =
9600-
crate::string_parser::parse_string_literal(source, string.flags.into());
9601-
self.emit_load_const(ConstantData::Str {
9602-
value: value.into(),
9603-
});
9621+
let value = self.compile_fstring_part_literal_value(string);
9622+
if let Some(pending) = pending_literal.as_mut() {
9623+
pending.push_wtf8(value.as_ref());
96049624
} else {
9605-
self.emit_load_const(ConstantData::Str {
9606-
value: string.value.to_string().into(),
9607-
});
9625+
*pending_literal = Some(value);
96089626
}
96099627
Ok(())
96109628
}
9611-
ast::FStringPart::FString(fstring) => self.compile_fstring(fstring),
9629+
ast::FStringPart::FString(fstring) => self.compile_fstring_elements_into(
9630+
fstring.flags,
9631+
&fstring.elements,
9632+
pending_literal,
9633+
element_count,
9634+
),
96129635
}
96139636
}
96149637

9615-
fn compile_fstring(&mut self, fstring: &ast::FString) -> CompileResult<()> {
9616-
self.compile_fstring_elements(fstring.flags, &fstring.elements)
9638+
fn finish_fstring(
9639+
&mut self,
9640+
mut pending_literal: Option<Wtf8Buf>,
9641+
mut element_count: u32,
9642+
) -> CompileResult<()> {
9643+
if let Some(value) = pending_literal.take() {
9644+
self.emit_load_const(ConstantData::Str { value });
9645+
element_count += 1;
9646+
}
9647+
9648+
if element_count == 0 {
9649+
self.emit_load_const(ConstantData::Str {
9650+
value: Wtf8Buf::new(),
9651+
});
9652+
} else if element_count > 1 {
9653+
emit!(
9654+
self,
9655+
Instruction::BuildString {
9656+
count: element_count
9657+
}
9658+
);
9659+
}
9660+
9661+
Ok(())
96179662
}
96189663

96199664
/// Optimize `'format_str' % (args,)` into f-string bytecode.
@@ -9741,24 +9786,31 @@ impl Compiler {
97419786
fstring_elements: &ast::InterpolatedStringElements,
97429787
) -> CompileResult<()> {
97439788
let mut element_count = 0;
9789+
let mut pending_literal: Option<Wtf8Buf> = None;
9790+
self.compile_fstring_elements_into(
9791+
flags,
9792+
fstring_elements,
9793+
&mut pending_literal,
9794+
&mut element_count,
9795+
)?;
9796+
self.finish_fstring(pending_literal, element_count)
9797+
}
9798+
9799+
fn compile_fstring_elements_into(
9800+
&mut self,
9801+
flags: ast::FStringFlags,
9802+
fstring_elements: &ast::InterpolatedStringElements,
9803+
pending_literal: &mut Option<Wtf8Buf>,
9804+
element_count: &mut u32,
9805+
) -> CompileResult<()> {
97449806
for element in fstring_elements {
9745-
element_count += 1;
97469807
match element {
97479808
ast::InterpolatedStringElement::Literal(string) => {
9748-
if string.value.contains(char::REPLACEMENT_CHARACTER) {
9749-
// might have a surrogate literal; should reparse to be sure
9750-
let source = self.source_file.slice(string.range);
9751-
let value = crate::string_parser::parse_fstring_literal_element(
9752-
source.into(),
9753-
flags.into(),
9754-
);
9755-
self.emit_load_const(ConstantData::Str {
9756-
value: value.into(),
9757-
});
9809+
let value = self.compile_fstring_literal_value(string, flags);
9810+
if let Some(pending) = pending_literal.as_mut() {
9811+
pending.push_wtf8(value.as_ref());
97589812
} else {
9759-
self.emit_load_const(ConstantData::Str {
9760-
value: string.value.to_string().into(),
9761-
});
9813+
*pending_literal = Some(value);
97629814
}
97639815
}
97649816
ast::InterpolatedStringElement::Interpolation(fstring_expr) => {
@@ -9779,8 +9831,10 @@ impl Compiler {
97799831
]
97809832
.concat();
97819833

9782-
self.emit_load_const(ConstantData::Str { value: text.into() });
9783-
element_count += 1;
9834+
let text: Wtf8Buf = text.into();
9835+
pending_literal
9836+
.get_or_insert_with(Wtf8Buf::new)
9837+
.push_wtf8(text.as_ref());
97849838

97859839
// If debug text is present, apply repr conversion when no `format_spec` specified.
97869840
// See action_helpers.c: fstring_find_expr_replacement
@@ -9792,6 +9846,11 @@ impl Compiler {
97929846
}
97939847
}
97949848

9849+
if let Some(value) = pending_literal.take() {
9850+
self.emit_load_const(ConstantData::Str { value });
9851+
*element_count += 1;
9852+
}
9853+
97959854
self.compile_expression(&fstring_expr.expression)?;
97969855

97979856
match conversion {
@@ -9813,22 +9872,10 @@ impl Compiler {
98139872
emit!(self, Instruction::FormatSimple);
98149873
}
98159874
}
9816-
}
9817-
}
9818-
}
98199875

9820-
if element_count == 0 {
9821-
// ensure to put an empty string on the stack if there aren't any fstring elements
9822-
self.emit_load_const(ConstantData::Str {
9823-
value: Wtf8Buf::new(),
9824-
});
9825-
} else if element_count > 1 {
9826-
emit!(
9827-
self,
9828-
Instruction::BuildString {
9829-
count: element_count
9876+
*element_count += 1;
98309877
}
9831-
);
9878+
}
98329879
}
98339880

98349881
Ok(())
@@ -10597,6 +10644,70 @@ def f(obj):
1059710644
assert!(matches!(slice[2], ConstantData::None));
1059810645
}
1059910646

10647+
#[test]
10648+
fn test_exception_cleanup_jump_to_return_is_inlined() {
10649+
let code = compile_exec(
10650+
"\
10651+
def f(names, cls):
10652+
try:
10653+
cls.attr = names
10654+
except:
10655+
pass
10656+
return names
10657+
",
10658+
);
10659+
let f = find_code(&code, "f").expect("missing function code");
10660+
let return_count = f
10661+
.instructions
10662+
.iter()
10663+
.filter(|unit| matches!(unit.op, Instruction::ReturnValue))
10664+
.count();
10665+
10666+
assert_eq!(return_count, 2);
10667+
}
10668+
10669+
#[test]
10670+
fn test_fstring_adjacent_literals_are_merged() {
10671+
let code = compile_exec(
10672+
"\
10673+
def f(cls, proto):
10674+
raise TypeError(
10675+
f\"cannot pickle {cls.__name__!r} object: \"
10676+
f\"a class that defines __slots__ without \"
10677+
f\"defining __getstate__ cannot be pickled \"
10678+
f\"with protocol {proto}\"
10679+
)
10680+
",
10681+
);
10682+
let f = find_code(&code, "f").expect("missing function code");
10683+
let string_consts = f
10684+
.instructions
10685+
.iter()
10686+
.filter_map(|unit| match unit.op {
10687+
Instruction::LoadConst { consti } => {
10688+
Some(&f.constants[consti.get(OpArg::new(u32::from(u8::from(unit.arg))))])
10689+
}
10690+
_ => None,
10691+
})
10692+
.filter_map(|constant| match constant {
10693+
ConstantData::Str { value } => Some(value.to_string()),
10694+
_ => None,
10695+
})
10696+
.collect::<Vec<_>>();
10697+
10698+
assert!(
10699+
string_consts.iter().any(|value| {
10700+
value
10701+
== " object: a class that defines __slots__ without defining __getstate__ cannot be pickled with protocol "
10702+
}),
10703+
"expected merged trailing f-string literal, got {string_consts:?}"
10704+
);
10705+
assert!(
10706+
!string_consts.iter().any(|value| value == " object: "),
10707+
"did not expect split trailing literal, got {string_consts:?}"
10708+
);
10709+
}
10710+
1060010711
#[test]
1060110712
fn test_optimized_assert_preserves_nested_scope_order() {
1060210713
compile_exec_optimized(

0 commit comments

Comments
 (0)