-
Notifications
You must be signed in to change notification settings - Fork 5.3k
Open
Labels
area-CodeGen-coreclrCLR JIT compiler in src/coreclr/src/jit and related components such as SuperPMICLR JIT compiler in src/coreclr/src/jit and related components such as SuperPMI
Milestone
Description
It appears that JIT (x64) sometimes misses opportunities for folding inlined(?) constants.
The following code demonstrates the issue. Depending on the order of the addends in C#, constants are more or less folded.
I did assume that order in C# would not impact JITs ability to fold them.
There are two pairs of methods, CalculateMaxSize[No]ExtraAdd and CalculateMaxSize[No]ExtraInc.
The *No* variants look like what I had expected for either implementation.
using System.Runtime.CompilerServices;
internal readonly struct SubId {
public static int MaxSize => 16;
}
internal readonly struct MyStruct
{
public static ReadOnlySpan<byte> EoL => new []{(byte)'\r', (byte)'\n'};
internal readonly byte[] _subject;
public MyStruct(byte[] subject){
_subject = subject;
}
/*
mov rax, gword ptr [rdi]
mov eax, dword ptr [rax+8]
add eax, 18
*/
[MethodImpl(MethodImplOptions.NoInlining)]
public int CalculateMaxSizeNoExtraAdd()
{
return SubId.MaxSize + EoL.Length + _subject.Length;
}
/*
mov rax, gword ptr [rdi]
mov eax, dword ptr [rax+8]
add eax, 16
add eax, 2
*/
[MethodImpl(MethodImplOptions.NoInlining)]
public int CalculateMaxSizeExtraAdd()
{
// This will emit two add instructions even when making SubId.MaxSize a const field
return _subject.Length + SubId.MaxSize + EoL.Length;
}
/*
mov rax, gword ptr [rdi]
mov eax, dword ptr [rax+8]
add eax, 17
*/
[MethodImpl(MethodImplOptions.NoInlining)]
public int CalculateMaxSizeNoExtraInc()
{
return _subject.Length + SubId.MaxSize + 1;
}
/*
mov rax, gword ptr [rdi]
mov eax, dword ptr [rax+8]
inc eax // This does not repro on .NET 7.0, it now emits a single add eax, 17 ❤
add eax, 16
*/
[MethodImpl(MethodImplOptions.NoInlining)]
public int CalculateMaxSizeExtraInc()
{
return _subject.Length + 1 + SubId.MaxSize;
}
}Sharplap and JIT dump running 1d4b5f6
; Assembly listing for method foo.MyStruct:CalculateMaxSizeNoExtraAdd():int:this
; Emitting BLENDED_CODE for X64 CPU with AVX - Unix
; Tier-1 compilation
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; 1 inlinees with PGO data; 5 single block inlinees; 0 inlinees without PGO data
; Final local variable assignments
;
; V00 this [V00,T00] ( 3, 3 ) byref -> rdi this single-def
;* V01 loc0 [V01 ] ( 0, 0 ) struct (16) zero-ref ld-addr-op
;# V02 OutArgs [V02 ] ( 1, 1 ) lclBlk ( 0) [rsp+00H] "OutgoingArgSpace"
;* V03 tmp1 [V03 ] ( 0, 0 ) int -> zero-ref "impAppendStmt"
;* V04 tmp2 [V04 ] ( 0, 0 ) struct (16) zero-ref "NewObj constructor temp"
;* V05 tmp3 [V05 ] ( 0, 0 ) struct ( 8) zero-ref "NewObj constructor temp"
;* V06 tmp4 [V06 ] ( 0, 0 ) byref -> zero-ref single-def V01._pointer(offs=0x00) P-INDEP "field V01._pointer (fldOffset=0x0)"
;* V07 tmp5 [V07 ] ( 0, 0 ) int -> zero-ref single-def V01._length(offs=0x08) P-INDEP "field V01._length (fldOffset=0x8)"
;* V08 tmp6 [V08 ] ( 0, 0 ) byref -> zero-ref V04._pointer(offs=0x00) P-INDEP "field V04._pointer (fldOffset=0x0)"
;* V09 tmp7 [V09 ] ( 0, 0 ) int -> zero-ref V04._length(offs=0x08) P-INDEP "field V04._length (fldOffset=0x8)"
;* V10 tmp8 [V10 ] ( 0, 0 ) byref -> zero-ref single-def V05._value(offs=0x00) P-INDEP "field V05._value (fldOffset=0x0)"
;
; Lcl frame size = 0
G_M57648_IG01:
;; bbWeight=1 PerfScore 0.00
G_M57648_IG02:
mov rax, gword ptr [rdi]
mov eax, dword ptr [rax+8]
add eax, 18
;; bbWeight=1 PerfScore 4.25
G_M57648_IG03:
ret
;; bbWeight=1 PerfScore 1.00
; Total bytes of code 10, prolog size 0, PerfScore 6.25, instruction count 4, allocated bytes for code 10 (MethodHash=f2321ecf) for method foo.MyStruct:CalculateMaxSizeNoExtraAdd():int:this
; ============================================================
; Assembly listing for method foo.MyStruct:CalculateMaxSizeExtraAdd():int:this
; Emitting BLENDED_CODE for X64 CPU with AVX - Unix
; Tier-1 compilation
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; 1 inlinees with PGO data; 5 single block inlinees; 0 inlinees without PGO data
; Final local variable assignments
;
; V00 this [V00,T00] ( 3, 3 ) byref -> rdi this single-def
;* V01 loc0 [V01 ] ( 0, 0 ) struct (16) zero-ref ld-addr-op
;# V02 OutArgs [V02 ] ( 1, 1 ) lclBlk ( 0) [rsp+00H] "OutgoingArgSpace"
; V03 tmp1 [V03,T01] ( 2, 4 ) int -> rax "impAppendStmt"
; V04 tmp2 [V04,T02] ( 2, 4 ) int -> rax "impAppendStmt"
;* V05 tmp3 [V05 ] ( 0, 0 ) struct (16) zero-ref "NewObj constructor temp"
;* V06 tmp4 [V06 ] ( 0, 0 ) struct ( 8) zero-ref "NewObj constructor temp"
;* V07 tmp5 [V07 ] ( 0, 0 ) byref -> zero-ref single-def V01._pointer(offs=0x00) P-INDEP "field V01._pointer (fldOffset=0x0)"
;* V08 tmp6 [V08 ] ( 0, 0 ) int -> zero-ref single-def V01._length(offs=0x08) P-INDEP "field V01._length (fldOffset=0x8)"
;* V09 tmp7 [V09 ] ( 0, 0 ) byref -> zero-ref V05._pointer(offs=0x00) P-INDEP "field V05._pointer (fldOffset=0x0)"
;* V10 tmp8 [V10 ] ( 0, 0 ) int -> zero-ref V05._length(offs=0x08) P-INDEP "field V05._length (fldOffset=0x8)"
;* V11 tmp9 [V11 ] ( 0, 0 ) byref -> zero-ref single-def V06._value(offs=0x00) P-INDEP "field V06._value (fldOffset=0x0)"
;
; Lcl frame size = 0
G_M14833_IG01:
;; bbWeight=1 PerfScore 0.00
G_M14833_IG02:
mov rax, gword ptr [rdi]
mov eax, dword ptr [rax+8]
add eax, 16
add eax, 2
;; bbWeight=1 PerfScore 4.50
G_M14833_IG03:
ret
;; bbWeight=1 PerfScore 1.00
; Total bytes of code 13, prolog size 0, PerfScore 6.80, instruction count 5, allocated bytes for code 13 (MethodHash=cc45c60e) for method foo.MyStruct:CalculateMaxSizeExtraAdd():int:this
; ============================================================
; Assembly listing for method foo.MyStruct:CalculateMaxSizeNoExtraInc():int:this
; Emitting BLENDED_CODE for X64 CPU with AVX - Unix
; Tier-1 compilation
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; 0 inlinees with PGO data; 1 single block inlinees; 0 inlinees without PGO data
; Final local variable assignments
;
; V00 this [V00,T00] ( 3, 3 ) byref -> rdi this single-def
;# V01 OutArgs [V01 ] ( 1, 1 ) lclBlk ( 0) [rsp+00H] "OutgoingArgSpace"
; V02 tmp1 [V02,T01] ( 2, 4 ) int -> rax "impAppendStmt"
;
; Lcl frame size = 0
G_M11669_IG01:
;; bbWeight=1 PerfScore 0.00
G_M11669_IG02:
mov rax, gword ptr [rdi]
mov eax, dword ptr [rax+8]
add eax, 17
;; bbWeight=1 PerfScore 4.25
G_M11669_IG03:
ret
;; bbWeight=1 PerfScore 1.00
; Total bytes of code 10, prolog size 0, PerfScore 6.25, instruction count 4, allocated bytes for code 10 (MethodHash=d8a9d26a) for method foo.MyStruct:CalculateMaxSizeNoExtraInc():int:this
; ============================================================
; Assembly listing for method foo.MyStruct:CalculateMaxSizeExtraInc():int:this
; Emitting BLENDED_CODE for X64 CPU with AVX - Unix
; Tier-1 compilation
; optimized code
; rsp based frame
; partially interruptible
; No PGO data
; 0 inlinees with PGO data; 1 single block inlinees; 0 inlinees without PGO data
; Final local variable assignments
;
; V00 this [V00,T00] ( 3, 3 ) byref -> rdi this single-def
;# V01 OutArgs [V01 ] ( 1, 1 ) lclBlk ( 0) [rsp+00H] "OutgoingArgSpace"
; V02 tmp1 [V02,T01] ( 2, 4 ) int -> rax "impAppendStmt"
;
; Lcl frame size = 0
G_M56724_IG01:
;; bbWeight=1 PerfScore 0.00
G_M56724_IG02:
mov rax, gword ptr [rdi]
mov eax, dword ptr [rax+8]
inc eax
add eax, 16
;; bbWeight=1 PerfScore 4.50
G_M56724_IG03:
ret
;; bbWeight=1 PerfScore 1.00
; Total bytes of code 12, prolog size 0, PerfScore 6.70, instruction count 5, allocated bytes for code 12 (MethodHash=17e2226b) for method foo.MyStruct:CalculateMaxSizeExtraInc():int:this
; ============================================================ category:implementation
theme:inlining
skill-level:intermediate
cost:medium
impact:medium
Metadata
Metadata
Assignees
Labels
area-CodeGen-coreclrCLR JIT compiler in src/coreclr/src/jit and related components such as SuperPMICLR JIT compiler in src/coreclr/src/jit and related components such as SuperPMI