Skip to content

JIT does not always fold inlined constants #62604

@jasper-d

Description

@jasper-d

It appears that JIT (x64) sometimes misses opportunities for folding inlined(?) constants.
The following code demonstrates the issue. Depending on the order of the addends in C#, constants are more or less folded.
I did assume that order in C# would not impact JITs ability to fold them.

There are two pairs of methods, CalculateMaxSize[No]ExtraAdd and CalculateMaxSize[No]ExtraInc.
The *No* variants look like what I had expected for either implementation.

using System.Runtime.CompilerServices;
internal readonly struct SubId {
    public static int MaxSize => 16;
}

internal readonly struct MyStruct
{
    public static ReadOnlySpan<byte> EoL => new []{(byte)'\r', (byte)'\n'};
    internal readonly byte[] _subject;

    public MyStruct(byte[] subject){
        _subject = subject;
    }

    /*
    mov      rax, gword ptr [rdi]
    mov      eax, dword ptr [rax+8]
    add      eax, 18
    */
    [MethodImpl(MethodImplOptions.NoInlining)]
    public int CalculateMaxSizeNoExtraAdd()
    {
        return SubId.MaxSize + EoL.Length + _subject.Length;
    }

    /*
    mov      rax, gword ptr [rdi]
    mov      eax, dword ptr [rax+8]
    add      eax, 16
    add      eax, 2
    */
    [MethodImpl(MethodImplOptions.NoInlining)]
    public int CalculateMaxSizeExtraAdd()
    {
        // This will emit two add instructions even when making SubId.MaxSize a const field
        return _subject.Length + SubId.MaxSize + EoL.Length;
    }

    /*
    mov      rax, gword ptr [rdi]
    mov      eax, dword ptr [rax+8]
    add      eax, 17
    */
    [MethodImpl(MethodImplOptions.NoInlining)]
    public int CalculateMaxSizeNoExtraInc()
    {
        return _subject.Length + SubId.MaxSize + 1;
    }


    /*
    mov      rax, gword ptr [rdi]
    mov      eax, dword ptr [rax+8]
    inc      eax                     // This does not repro on .NET 7.0, it now emits a single add eax, 17 ❤
    add      eax, 16
    */
    [MethodImpl(MethodImplOptions.NoInlining)]
    public int CalculateMaxSizeExtraInc()
    {
        return _subject.Length + 1 + SubId.MaxSize;
    }
}

Sharplap and JIT dump running 1d4b5f6

; Assembly listing for method foo.MyStruct:CalculateMaxSizeNoExtraAdd():int:this
; Emitting BLENDED_CODE for X64 CPU with AVX - Unix
; Tier-1 compilation
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; 1 inlinees with PGO data; 5 single block inlinees; 0 inlinees without PGO data
; Final local variable assignments
;
;  V00 this         [V00,T00] (  3,  3   )   byref  ->  rdi         this single-def
;* V01 loc0         [V01    ] (  0,  0   )  struct (16) zero-ref    ld-addr-op
;# V02 OutArgs      [V02    ] (  1,  1   )  lclBlk ( 0) [rsp+00H]   "OutgoingArgSpace"
;* V03 tmp1         [V03    ] (  0,  0   )     int  ->  zero-ref    "impAppendStmt"
;* V04 tmp2         [V04    ] (  0,  0   )  struct (16) zero-ref    "NewObj constructor temp"
;* V05 tmp3         [V05    ] (  0,  0   )  struct ( 8) zero-ref    "NewObj constructor temp"
;* V06 tmp4         [V06    ] (  0,  0   )   byref  ->  zero-ref    single-def V01._pointer(offs=0x00) P-INDEP "field V01._pointer (fldOffset=0x0)"
;* V07 tmp5         [V07    ] (  0,  0   )     int  ->  zero-ref    single-def V01._length(offs=0x08) P-INDEP "field V01._length (fldOffset=0x8)"
;* V08 tmp6         [V08    ] (  0,  0   )   byref  ->  zero-ref    V04._pointer(offs=0x00) P-INDEP "field V04._pointer (fldOffset=0x0)"
;* V09 tmp7         [V09    ] (  0,  0   )     int  ->  zero-ref    V04._length(offs=0x08) P-INDEP "field V04._length (fldOffset=0x8)"
;* V10 tmp8         [V10    ] (  0,  0   )   byref  ->  zero-ref    single-def V05._value(offs=0x00) P-INDEP "field V05._value (fldOffset=0x0)"
;
; Lcl frame size = 0

G_M57648_IG01:
                                                ;; bbWeight=1    PerfScore 0.00
G_M57648_IG02:
       mov      rax, gword ptr [rdi]
       mov      eax, dword ptr [rax+8]
       add      eax, 18
                                                ;; bbWeight=1    PerfScore 4.25
G_M57648_IG03:
       ret
                                                ;; bbWeight=1    PerfScore 1.00

; Total bytes of code 10, prolog size 0, PerfScore 6.25, instruction count 4, allocated bytes for code 10 (MethodHash=f2321ecf) for method foo.MyStruct:CalculateMaxSizeNoExtraAdd():int:this
; ============================================================

; Assembly listing for method foo.MyStruct:CalculateMaxSizeExtraAdd():int:this
; Emitting BLENDED_CODE for X64 CPU with AVX - Unix
; Tier-1 compilation
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; 1 inlinees with PGO data; 5 single block inlinees; 0 inlinees without PGO data
; Final local variable assignments
;
;  V00 this         [V00,T00] (  3,  3   )   byref  ->  rdi         this single-def
;* V01 loc0         [V01    ] (  0,  0   )  struct (16) zero-ref    ld-addr-op
;# V02 OutArgs      [V02    ] (  1,  1   )  lclBlk ( 0) [rsp+00H]   "OutgoingArgSpace"
;  V03 tmp1         [V03,T01] (  2,  4   )     int  ->  rax         "impAppendStmt"
;  V04 tmp2         [V04,T02] (  2,  4   )     int  ->  rax         "impAppendStmt"
;* V05 tmp3         [V05    ] (  0,  0   )  struct (16) zero-ref    "NewObj constructor temp"
;* V06 tmp4         [V06    ] (  0,  0   )  struct ( 8) zero-ref    "NewObj constructor temp"
;* V07 tmp5         [V07    ] (  0,  0   )   byref  ->  zero-ref    single-def V01._pointer(offs=0x00) P-INDEP "field V01._pointer (fldOffset=0x0)"
;* V08 tmp6         [V08    ] (  0,  0   )     int  ->  zero-ref    single-def V01._length(offs=0x08) P-INDEP "field V01._length (fldOffset=0x8)"
;* V09 tmp7         [V09    ] (  0,  0   )   byref  ->  zero-ref    V05._pointer(offs=0x00) P-INDEP "field V05._pointer (fldOffset=0x0)"
;* V10 tmp8         [V10    ] (  0,  0   )     int  ->  zero-ref    V05._length(offs=0x08) P-INDEP "field V05._length (fldOffset=0x8)"
;* V11 tmp9         [V11    ] (  0,  0   )   byref  ->  zero-ref    single-def V06._value(offs=0x00) P-INDEP "field V06._value (fldOffset=0x0)"
;
; Lcl frame size = 0

G_M14833_IG01:
                                                ;; bbWeight=1    PerfScore 0.00
G_M14833_IG02:
       mov      rax, gword ptr [rdi]
       mov      eax, dword ptr [rax+8]
       add      eax, 16
       add      eax, 2
                                                ;; bbWeight=1    PerfScore 4.50
G_M14833_IG03:
       ret
                                                ;; bbWeight=1    PerfScore 1.00

; Total bytes of code 13, prolog size 0, PerfScore 6.80, instruction count 5, allocated bytes for code 13 (MethodHash=cc45c60e) for method foo.MyStruct:CalculateMaxSizeExtraAdd():int:this
; ============================================================

; Assembly listing for method foo.MyStruct:CalculateMaxSizeNoExtraInc():int:this
; Emitting BLENDED_CODE for X64 CPU with AVX - Unix
; Tier-1 compilation
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; 0 inlinees with PGO data; 1 single block inlinees; 0 inlinees without PGO data
; Final local variable assignments
;
;  V00 this         [V00,T00] (  3,  3   )   byref  ->  rdi         this single-def
;# V01 OutArgs      [V01    ] (  1,  1   )  lclBlk ( 0) [rsp+00H]   "OutgoingArgSpace"
;  V02 tmp1         [V02,T01] (  2,  4   )     int  ->  rax         "impAppendStmt"
;
; Lcl frame size = 0

G_M11669_IG01:
                                                ;; bbWeight=1    PerfScore 0.00
G_M11669_IG02:
       mov      rax, gword ptr [rdi]
       mov      eax, dword ptr [rax+8]
       add      eax, 17
                                                ;; bbWeight=1    PerfScore 4.25
G_M11669_IG03:
       ret
                                                ;; bbWeight=1    PerfScore 1.00

; Total bytes of code 10, prolog size 0, PerfScore 6.25, instruction count 4, allocated bytes for code 10 (MethodHash=d8a9d26a) for method foo.MyStruct:CalculateMaxSizeNoExtraInc():int:this
; ============================================================

; Assembly listing for method foo.MyStruct:CalculateMaxSizeExtraInc():int:this
; Emitting BLENDED_CODE for X64 CPU with AVX - Unix
; Tier-1 compilation
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; 0 inlinees with PGO data; 1 single block inlinees; 0 inlinees without PGO data
; Final local variable assignments
;
;  V00 this         [V00,T00] (  3,  3   )   byref  ->  rdi         this single-def
;# V01 OutArgs      [V01    ] (  1,  1   )  lclBlk ( 0) [rsp+00H]   "OutgoingArgSpace"
;  V02 tmp1         [V02,T01] (  2,  4   )     int  ->  rax         "impAppendStmt"
;
; Lcl frame size = 0

G_M56724_IG01:
                                                ;; bbWeight=1    PerfScore 0.00
G_M56724_IG02:
       mov      rax, gword ptr [rdi]
       mov      eax, dword ptr [rax+8]
       inc      eax
       add      eax, 16
                                                ;; bbWeight=1    PerfScore 4.50
G_M56724_IG03:
       ret
                                                ;; bbWeight=1    PerfScore 1.00

; Total bytes of code 12, prolog size 0, PerfScore 6.70, instruction count 5, allocated bytes for code 12 (MethodHash=17e2226b) for method foo.MyStruct:CalculateMaxSizeExtraInc():int:this
; ============================================================                                                                                                   

category:implementation
theme:inlining
skill-level:intermediate
cost:medium
impact:medium

Metadata

Metadata

Assignees

No one assigned

    Labels

    area-CodeGen-coreclrCLR JIT compiler in src/coreclr/src/jit and related components such as SuperPMI

    Type

    No type

    Projects

    No projects

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions