Use SpanHelpers.SequenceCompareTo instead of CompareOrdinalHelper#402
Conversation
Raised issue #404 |
|
I'm just an outsider, but - as @jkotas asked in dotnet/coreclr#22479 - "Could you please share the up to date perf numbers?". |
|
@benaadams, did you have perf numbers here? |
e6ef143 to
7d6304c
Compare
|
Ah, this additionally needs the intrinsicification of SequenceCompareTo or it cuts in very late with just Have the additional change, just testing it. |
src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs
Outdated
Show resolved
Hide resolved
src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs
Outdated
Show resolved
Hide resolved
src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs
Outdated
Show resolved
Hide resolved
src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs
Outdated
Show resolved
Hide resolved
src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs
Outdated
Show resolved
Hide resolved
src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs
Outdated
Show resolved
Hide resolved
29b89a7 to
385d550
Compare
G_M29673_IG01:
push rsi
vzeroupper
G_M29673_IG02:
cmp rcx, r8
+-< je SHORT G_M29673_IG10 ; Equal
|
| G_M29673_IG03:
| cmp edx, r9d
| jle SHORT G_M29673_IG04
| mov eax, r9d
| jmp SHORT G_M29673_IG05
|
| G_M29673_IG04:
| mov eax, edx
|
| G_M29673_IG05:
| movsxd r10, eax
| xor r11, r11
| cmp r10, 8
| jge G_M29673_IG15 ; IntrinsicsCompare ------------>+
| cmp r10, 4 |
| jl SHORT G_M29673_IG07 |
| |
| G_M29673_IG06: <-----+ (long) |
| mov rax, qword ptr [rcx+2*r11] | |
| mov rsi, qword ptr [r8+2*r11] | |
| xor rsi, rax L |
| test rsi, rsi O |
| jne SHORT G_M29673_IG12 O ; LongDifference ------->+ |
| add r11, 4 P | |
| lea rax, [r11+4] | | |
| cmp r10, rax | | |
| jge SHORT G_M29673_IG06 ------+ | |
| | |
| G_M29673_IG07: | |
| lea rax, [r11+2] | |
| cmp r10, rax | |
| jl SHORT G_M29673_IG08 | |
| mov eax, dword ptr [rcx+2*r11] | |
| cmp dword ptr [r8+2*r11], eax | |
| jne SHORT G_M29673_IG08 | |
| add r11, 2 | |
| | |
| G_M29673_IG08: | |
| cmp r11, r10 | |
+-< jge SHORT G_M29673_IG10 ; Equal | |
| | |
| G_M29673_IG09: <-----+ (char) | |
| lea rax, bword ptr [rcx+2*r11] | | |
| movzx rsi, word ptr [r8+2*r11] | | |
| movzx rax, word ptr [rax] L | |
| sub eax, esi O | |
| test eax, eax O | |
| jne SHORT G_M29673_IG14 P ; ResultDifference --->+ | |
| inc r11 | | | |
| cmp r11, r10 | | | |
| jl SHORT G_M29673_IG09 ------+ | | |
\ | | |
-> G_M29673_IG10: ; <--- Equal | | |
/ mov eax, edx | | |
| sub eax, r9d | | |
| | | |
| G_M29673_IG11: | | |
| vzeroupper | | |
| pop rsi | | |
| ret | | |
| | | |
| G_M29673_IG12: ; <-- LongDifference -----------+ |
| xor eax, eax | |
| tzcnt rax, rsi | |
| sar eax, 4 | |
| movsxd rax, eax | |
| add r11, rax | |
| | |
| G_M29673_IG13: ; <-- OffsetDifference -------|------+ |
| lea rax, bword ptr [rcx+2*r11] | | |
| movzx r10, word ptr [r8+2*r11] | | |
| movzx rax, word ptr [rax] | | |
| sub eax, r10d | | |
| | | |
| G_M29673_IG14: ; <-- ResultDifference--------+ | |
| vzeroupper | |
| pop rsi | |
| ret | |
| | |
| G_M29673_IG15: ; <-- IntrinsicsCompare ----------------+
| lea rax, [r10-16] |
| test rax, rax |
| jl SHORT G_M29673_IG18 |
| test rax, rax |
| jle SHORT G_M29673_IG17 |
| |
| G_M29673_IG16: <-----+ (Vector256) |
| vmovupd ymm0, ymmword ptr[rcx+2*r11] | |
| vmovupd ymm1, ymmword ptr[r8+2*r11] | |
| vpcmpeqw ymm0, ymm0, ymm1 L |
| vpmovmskb esi, ymm0 O |
| cmp esi, -1 O |
| jne G_M29673_IG21 P ; IntrinsicsDifference --->+ |
| add r11, 16 | | |
| cmp rax, r11 | | |
| jg SHORT G_M29673_IG16 ------+ | |
| | |
| G_M29673_IG17: | |
| mov r11, rax | |
| vmovupd ymm0, ymmword ptr[rcx+2*r11] | |
| vmovupd ymm1, ymmword ptr[r8+2*r11] | |
| vpcmpeqw ymm0, ymm0, ymm1 | |
| vpmovmskb esi, ymm0 | |
| cmp esi, -1 | |
| jne SHORT G_M29673_IG21 ; IntrinsicsDifference --->+ |
+-< jmp SHORT G_M29673_IG10 ; Equal | |
| | |
| G_M29673_IG18: | |
| lea rax, [r10-8] | |
| test rax, rax | |
| jle SHORT G_M29673_IG20 | |
| | |
| G_M29673_IG19: <-----+ (Vector128) | |
| vmovupd xmm0, xmmword ptr [rcx+2*r11] | | |
| vmovupd xmm1, xmmword ptr [r8+2*r11] | | |
| vpcmpeqw xmm0, xmm0, xmm1 L | |
| vpmovmskb esi, xmm0 O | |
| cmp esi, 0xFFFF O | |
| jne SHORT G_M29673_IG21 P ; IntrinsicsDifference --->+ |
| add r11, 8 | | |
| cmp rax, r11 | | |
| jg SHORT G_M29673_IG19 ------+ | |
| | |
| G_M29673_IG20: | |
| mov r11, rax | |
| vmovupd xmm0, xmmword ptr [rcx+2*r11] | |
| vmovupd xmm1, xmmword ptr [r8+2*r11] | |
| vpcmpeqw xmm0, xmm0, xmm1 | |
| vpmovmskb esi, xmm0 | |
| cmp esi, 0xFFFF | |
+-< je G_M29673_IG10 ; Equal | |
| |
G_M29673_IG21: ; <--------------------- IntrinsicsDifference ----+ |
mov eax, esi |
not eax |
tzcnt eax, eax |
sar eax, 1 |
movsxd rax, eax |
add r11, rax |
jmp G_M29673_IG13 ; OffsetDifference ------------->+
; Total bytes of code 355, prolog size 4, PerfScore 222.98, for method Program:SequenceCompareTo(byref,int,byref,int):int |
385d550 to
9431416
Compare
|
9431416 to
e278276
Compare
src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs
Outdated
Show resolved
Hide resolved
src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs
Outdated
Show resolved
Hide resolved
src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs
Outdated
Show resolved
Hide resolved
src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs
Outdated
Show resolved
Hide resolved
|
@stephentoub ready to go |
61c79d9 to
1f66e28
Compare
|
Rerunning failed jobs so hopefully we can merge this. |
|
@jkotas you signed off on this a while back. It’s green now. Do you believe this needs further review? |
|
I have signed off on much simpler version of this change. This should be reviewed by somebody with PhD in hardware intrinsics. |
|
@tannergooding knows such a person. He is a contended resource at the moment though.. |
|
I've marked this NO MERGE for now since the latest iteration hasn't gone through review. Once there's a review on the latest iteration feel free to remove this label. |
|
@benaadams, if you could resolve the merge conflicts then I can give this a review 😄 |
|
@tannergooding #41097 is good to go, while I resolve these conflicts 😉 |
| if (Sse2.IsSupported) | ||
| { | ||
| if (Vector.IsHardwareAccelerated && minLength >= (nuint)Vector<ushort>.Count) | ||
| // Calucate lengthToExamine here for test, rather than just testing as it used later, rather than doing it twice. |
| } | ||
| else if (Vector.IsHardwareAccelerated) | ||
| { | ||
| // Calucate lengthToExamine here for test, rather than just testing as it used later, rather than doing it twice. |
|
@benaadams do you think you'll be able to resolve those conflicts? I'm keeping an eye on this because it's one of our oldest PR's 🙂 |
|
Ping @benaadams can you please address the latest comments? |
|
Thanks for the PR, @benaadams . I'm going to close this, feel free to reopen if you plan to pick it up again. |
- `UninstallApp()` wasn't triggering for devices - mlaunch failures when running app didn't get detected Resolves dotnet#402



By string length (1- 64); difference in last position (lower is better)
By difference position (0-88) in long string (lower is better)

By difference position (0-1023) in long string (lower is better)
Coreclr PR: dotnet/coreclr#22479
Resolves: https://github.com/dotnet/coreclr/issues/22763