Skip to content

JIT: Invalid ldp optimization with locals #85765

@jakobbotsch

Description

@jakobbotsch
// Generated by Fuzzlyn v1.5 on 2023-05-03 21:56:56
// Run on Arm64 Linux
// Seed: 12429971184890412752
// Reduced from 299.4 KiB to 0.4 KiB in 00:02:13
// Debug: Outputs False
// Release: Outputs True
public struct S0
{
    public S0(bool f1): this()
    {
    }
}

public struct S1
{
    public byte F0;
    public bool F1;
    public bool F2;
}

public class Program
{
    public static void Main()
    {
        S1 vr2 = M4();
        vr2.F2 |= vr2.F1;
        System.Console.WriteLine(vr2.F2);
    }

    public static S1 M4()
    {
        S1 var1 = default(S1);
        var vr0 = new S0(false);
        return var1;
    }
}

The codegen for Main looks odd:

; Assembly listing for method Program:Main()
; Emitting BLENDED_CODE for generic ARM64 CPU - Windows
; optimized code
; fp based frame
; partially interruptible
; No PGO data
; invoked as altjit
; Final local variable assignments
;
;  V00 loc0         [V00    ] (  5,  5   )  struct ( 8) [fp+18H]   do-not-enreg[SB] ld-addr-op
;# V01 OutArgs      [V01    ] (  1,  1   )  struct ( 0) [sp+00H]   do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
;  V02 tmp1         [V02,T03] (  1,  1   )   ubyte  ->  [fp+18H]   do-not-enreg[] V00.F0(offs=0x00) P-DEP "field V00.F0 (fldOffset=0x0)"
;  V03 tmp2         [V03,T02] (  2,  2   )    bool  ->  [fp+19H]   do-not-enreg[] V00.F1(offs=0x01) P-DEP "field V00.F1 (fldOffset=0x1)"
;  V04 tmp3         [V04,T00] (  4,  4   )    bool  ->  [fp+1AH]   do-not-enreg[] single-def V00.F2(offs=0x02) P-DEP "field V00.F2 (fldOffset=0x2)"
;  V05 rat0         [V05,T01] (  2,  4   )  struct ( 8) [fp+10H]   do-not-enreg[SF] "Return value temp for an odd struct return size"
;
; Lcl frame size = 16

G_M27646_IG01:  ;; offset=0000H
        A9BE7BFD          stp     fp, lr, [sp, #-0x20]!
        910003FD          mov     fp, sp
                                                ;; size=8 bbWeight=1 PerfScore 1.50
G_M27646_IG02:  ;; offset=0008H
        D29A6400          movz    x0, #0xD320      // code for Program:M4():S1
        F2AB0D00          movk    x0, #0x5868 LSL #16
        F2CFFF20          movk    x0, #0x7FF9 LSL #32
        F9400000          ldr     x0, [x0]
        D63F0000          blr     x0
        B90013A0          str     w0, [fp, #0x10]
        794023A0          ldrh    w0, [fp, #0x10]
        790033A0          strh    w0, [fp, #0x18]
        39404BA0          ldrb    w0, [fp, #0x12]
        39006BA0          strb    w0, [fp, #0x1A]
        294C83A1          ldp     w1, w0, [fp, #0x64]
        2A010000          orr     w0, w0, w1
        39006BA0          strb    w0, [fp, #0x1A]       // [V04 tmp3]
        39406BA0          ldrb    w0, [fp, #0x1A]       // [V04 tmp3]
        D28DC101          movz    x1, #0x6E08      // code for System.Console:WriteLine(bool)
        F2AB10A1          movk    x1, #0x5885 LSL #16
        F2CFFF21          movk    x1, #0x7FF9 LSL #32
        F9400021          ldr     x1, [x1]
        D63F0020          blr     x1
                                                ;; size=76 bbWeight=1 PerfScore 23.50
G_M27646_IG03:  ;; offset=0054H
        A8C27BFD          ldp     fp, lr, [sp], #0x20
        D65F03C0          ret     lr
                                                ;; size=8 bbWeight=1 PerfScore 2.00

; Total bytes of code 92, prolog size 8, PerfScore 36.20, instruction count 23, allocated bytes for code 92 (MethodHash=cb019401) for method Program:Main()
; ============================================================

The:

        294C83A1          ldp     w1, w0, [fp, #0x64]

is loading far outside the stack frame.

From the dump:

Generating: N021 (  2,  2) [000008] -----------                    t8 =    LCL_VAR   int    V04 tmp3         u:2 x0 (last use) REG x0 $201
IN000b:                           ldr     w0, [fp, #0x1A]
Generating: N023 (  3,  2) [000011] -----------                   t11 =    LCL_VAR   int    V03 tmp2         u:2 x1 (last use) REG x1 $200
Removing saved instruction in current IG G_M27646_IG02:
> IN000b:                           ldr     w0, [fp, #0x1A]
IN000b:                           ldp     w1, w0, [fp, #0x64]

Maybe the optimization does not handle unaligned loads from the stack frame correctly? (For some arm64 instructions those are not representable)

cc @dotnet/jit-contrib @kunalspathak

Metadata

Metadata

Assignees

Labels

Priority:1Work that is critical for the release, but we could probably ship withoutarea-CodeGen-coreclrCLR JIT compiler in src/coreclr/src/jit and related components such as SuperPMI

Type

No type

Projects

No projects

Milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions