Skip to content

Optimize IndexOfAnyAsciiSearcher on Arm64#126678

Merged
tannergooding merged 3 commits intodotnet:mainfrom
tannergooding:ascii-search
Apr 12, 2026
Merged

Optimize IndexOfAnyAsciiSearcher on Arm64#126678
tannergooding merged 3 commits intodotnet:mainfrom
tannergooding:ascii-search

Conversation

@tannergooding
Copy link
Copy Markdown
Member

@tannergooding tannergooding commented Apr 9, 2026

This improves the codegen of the dedicated Count, IndexOf, and LastIndexOf APIs on Arm64 and correspondingly updates IndexOfAnyAsciiSearcher to consume them instead of ExtractMostSignificantBits

This is notably not strictly the "ideal" codegen as it still does an extra comparison, but that is something we can address in the JIT and should still provide a 5-50% performance increase in workloads using this API as part of their core loop.

Copilot AI review requested due to automatic review settings April 9, 2026 00:42
@tannergooding
Copy link
Copy Markdown
Member Author

@EgorBot -arm -linux_arm

using System.Buffers;
using BenchmarkDotNet.Attributes;

public class Benchmarks
{
    private static readonly SearchValues<byte> s_controlQuoteBackslash = SearchValues.Create(
        "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\u0008\u0009\u000A\u000B"u8 +
        "\u000C\u000D\u000E\u000F\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018"u8 +
        "\u0019\u001A\u001B\u001C\u001D\u001E\u001F"u8 + "\""u8 + "\\"u8);

    private byte[] _str = "Product description with some text that is a bit longer than usual\""u8.ToArray();

    [Benchmark]
    public int Medium() => _str.AsSpan().IndexOfAny(s_controlQuoteBackslash);
}

@tannergooding
Copy link
Copy Markdown
Member Author

@EgorBot -linux_azure_arm -arm

using System.Text;
using System.Text.Json;
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;

BenchmarkSwitcher.FromAssembly(typeof(Benchmarks).Assembly).Run(args);

[MemoryDiagnoser]
public class Benchmarks
{
    // ── TokenSerialization fields ────────────────────────────────────────────
    private List<object> _tokenObjects;
    [ThreadStatic] static Utf8JsonWriter t_writer;
    [ThreadStatic] static MemoryStream t_stream;

    [GlobalSetup]
    public void Setup()
    {
        // TokenSerialization
        _tokenObjects = new List<object>(200);
        for (int i = 0; i < 200; i++)
        {
            if (i % 3 == 0)
                _tokenObjects.Add(GenerateRecordJson(1));
            else
                _tokenObjects.Add(new Dictionary<string, object>
                {
                    ["seq"] = i,
                    ["label"] = $"item_{i}",
                    ["blob"] = new byte[100]
                });
        }
    }

    private static string GenerateRecordJson(int targetSizeKb = 150)
    {
        var sb = new StringBuilder(targetSizeKb * 1024 + 512);
        sb.Append("{");
        sb.Append("\"TypeName\":\"product\",");
        sb.Append("\"CategoryCode\":1,");
        sb.Append("\"Label\":\"Product\",");
        sb.Append("\"IsAction\":false,");
        sb.Append("\"IsActionMember\":false,");
        sb.Append("\"IsTrackingEnabled\":true,");
        sb.Append("\"IsAvailableLocal\":true,");
        sb.Append("\"IsChildRecord\":false,");
        sb.Append("\"IsLinksEnabled\":true,");
        sb.Append("\"IsCustomRecord\":false,");
        sb.Append("\"PrimaryKeyField\":\"productid\",");
        sb.Append("\"PrimaryLabelField\":\"title\",");
        sb.Append("\"Fields\":[");
        int targetBytes = targetSizeKb * 1024;
        int fieldIndex = 0;
        bool firstField = true;
        while (sb.Length < targetBytes - 512)
        {
            if (!firstField) sb.Append(",");
            firstField = false;
            sb.Append("{");
            sb.Append($"\"TypeName\":\"field_{fieldIndex}\",");
            sb.Append($"\"InternalName\":\"Field_{fieldIndex}\",");
            sb.Append($"\"FieldType\":\"String\",");
            sb.Append($"\"Label\":\"Field {fieldIndex}\",");
            sb.Append($"\"MaxSize\":100,");
            sb.Append($"\"IsReadable\":true,");
            sb.Append($"\"IsCreatable\":true,");
            sb.Append($"\"IsUpdatable\":true,");
            sb.Append($"\"IsTrackingEnabled\":false,");
            sb.Append($"\"IsPrimaryKey\":false,");
            sb.Append($"\"IsVirtual\":false,");
            sb.Append($"\"Requirement\":\"None\"");
            sb.Append("}");
            fieldIndex++;
        }
        sb.Append("]");
        sb.Append("}");
        return sb.ToString();
    }

    [Benchmark]
    public void TokenSerialization()
    {
        var stream = t_stream ??= new MemoryStream(64 * 1024);
        stream.Position = 0;
        stream.SetLength(0);
        var writer = t_writer;
        if (writer == null)
        {
            writer = new Utf8JsonWriter(stream, new JsonWriterOptions { SkipValidation = true });
            t_writer = writer;
        }
        else
            writer.Reset(stream);
        writer.WriteStartObject();
        writer.WriteStartArray("Catalog");
        foreach (var token in _tokenObjects)
        {
            if (token is string strToken)
            {
                if (!string.IsNullOrEmpty(strToken))
                    writer.WriteRawValue(strToken);
            }
            else if (token is Dictionary<string, object> dictToken)
            {
                writer.WriteStartObject();
                foreach (var kvp in dictToken)
                {
                    writer.WritePropertyName(kvp.Key);
                    JsonSerializer.Serialize(writer, kvp.Value);
                }
                writer.WriteEndObject();
            }
        }
        writer.WriteEndArray();
        writer.WriteEndObject();
        writer.Flush();
        if (stream.Length == 0) throw new Exception("unreachable");
    }
}

@dotnet-policy-service
Copy link
Copy Markdown
Contributor

Tagging subscribers to this area: @dotnet/area-system-buffers
See info in area-owners.md if you want to be subscribed.

Copy link
Copy Markdown
Contributor

Copilot AI left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pull request overview

This PR adds an Arm64/AdvSimd-specific fast-path for computing the first match index from a Vector128<byte> match result in IndexOfAnyAsciiSearcher, aiming to reduce overhead in IndexOfAny-style searches.

Changes:

  • Extended INegator with IndexOfFirstMatch(Vector128<byte> result) to centralize “find first match” logic.
  • Implemented an AdvSimd-based first-match index computation for both DontNegate and Negate.
  • Updated IndexOfAnyResultMapper’s Vector128 code paths to use the new IndexOfFirstMatch helper.

@EgorBo
Copy link
Copy Markdown
Member

EgorBo commented Apr 9, 2026

@EgorBot -aws_arm -profiler

using System.Buffers;
using BenchmarkDotNet.Attributes;

public class Benchmarks
{
    private static readonly SearchValues<byte> s_controlQuoteBackslash = SearchValues.Create(
        "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\u0008\u0009\u000A\u000B"u8 +
        "\u000C\u000D\u000E\u000F\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018"u8 +
        "\u0019\u001A\u001B\u001C\u001D\u001E\u001F"u8 + "\""u8 + "\\"u8);

    private byte[] _str = "Product description with some text that is a bit longer than usual\""u8.ToArray();

    [Benchmark]
    public int Medium() => _str.AsSpan().IndexOfAny(s_controlQuoteBackslash);
}

@EgorBo
Copy link
Copy Markdown
Member

EgorBo commented Apr 9, 2026

@EgorBot -aws_amd -profiler

using System.Text;
using System.Text.Json;
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;

BenchmarkSwitcher.FromAssembly(typeof(Benchmarks).Assembly).Run(args);

[MemoryDiagnoser]
public class Benchmarks
{
    // ── TokenSerialization fields ────────────────────────────────────────────
    private List<object> _tokenObjects;
    [ThreadStatic] static Utf8JsonWriter t_writer;
    [ThreadStatic] static MemoryStream t_stream;

    [GlobalSetup]
    public void Setup()
    {
        // TokenSerialization
        _tokenObjects = new List<object>(200);
        for (int i = 0; i < 200; i++)
        {
            if (i % 3 == 0)
                _tokenObjects.Add(GenerateRecordJson(1));
            else
                _tokenObjects.Add(new Dictionary<string, object>
                {
                    ["seq"] = i,
                    ["label"] = $"item_{i}",
                    ["blob"] = new byte[100]
                });
        }
    }

    private static string GenerateRecordJson(int targetSizeKb = 150)
    {
        var sb = new StringBuilder(targetSizeKb * 1024 + 512);
        sb.Append("{");
        sb.Append("\"TypeName\":\"product\",");
        sb.Append("\"CategoryCode\":1,");
        sb.Append("\"Label\":\"Product\",");
        sb.Append("\"IsAction\":false,");
        sb.Append("\"IsActionMember\":false,");
        sb.Append("\"IsTrackingEnabled\":true,");
        sb.Append("\"IsAvailableLocal\":true,");
        sb.Append("\"IsChildRecord\":false,");
        sb.Append("\"IsLinksEnabled\":true,");
        sb.Append("\"IsCustomRecord\":false,");
        sb.Append("\"PrimaryKeyField\":\"productid\",");
        sb.Append("\"PrimaryLabelField\":\"title\",");
        sb.Append("\"Fields\":[");
        int targetBytes = targetSizeKb * 1024;
        int fieldIndex = 0;
        bool firstField = true;
        while (sb.Length < targetBytes - 512)
        {
            if (!firstField) sb.Append(",");
            firstField = false;
            sb.Append("{");
            sb.Append($"\"TypeName\":\"field_{fieldIndex}\",");
            sb.Append($"\"InternalName\":\"Field_{fieldIndex}\",");
            sb.Append($"\"FieldType\":\"String\",");
            sb.Append($"\"Label\":\"Field {fieldIndex}\",");
            sb.Append($"\"MaxSize\":100,");
            sb.Append($"\"IsReadable\":true,");
            sb.Append($"\"IsCreatable\":true,");
            sb.Append($"\"IsUpdatable\":true,");
            sb.Append($"\"IsTrackingEnabled\":false,");
            sb.Append($"\"IsPrimaryKey\":false,");
            sb.Append($"\"IsVirtual\":false,");
            sb.Append($"\"Requirement\":\"None\"");
            sb.Append("}");
            fieldIndex++;
        }
        sb.Append("]");
        sb.Append("}");
        return sb.ToString();
    }

    [Benchmark]
    public void TokenSerialization()
    {
        var stream = t_stream ??= new MemoryStream(64 * 1024);
        stream.Position = 0;
        stream.SetLength(0);
        var writer = t_writer;
        if (writer == null)
        {
            writer = new Utf8JsonWriter(stream, new JsonWriterOptions { SkipValidation = true });
            t_writer = writer;
        }
        else
            writer.Reset(stream);
        writer.WriteStartObject();
        writer.WriteStartArray("Catalog");
        foreach (var token in _tokenObjects)
        {
            if (token is string strToken)
            {
                if (!string.IsNullOrEmpty(strToken))
                    writer.WriteRawValue(strToken);
            }
            else if (token is Dictionary<string, object> dictToken)
            {
                writer.WriteStartObject();
                foreach (var kvp in dictToken)
                {
                    writer.WritePropertyName(kvp.Key);
                    JsonSerializer.Serialize(writer, kvp.Value);
                }
                writer.WriteEndObject();
            }
        }
        writer.WriteEndArray();
        writer.WriteEndObject();
        writer.Flush();
        if (stream.Length == 0) throw new Exception("unreachable");
    }
}

@tannergooding tannergooding changed the title Try to optimize IndexOfAnyAsciiSearcher for finding the first index on Arm64 Optimize IndexOfAnyAsciiSearcher on Arm64 Apr 9, 2026
Copilot AI review requested due to automatic review settings April 10, 2026 16:42
Copy link
Copy Markdown
Contributor

Copilot AI left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pull request overview

Copilot reviewed 6 out of 6 changed files in this pull request and generated 6 comments.

Copilot AI review requested due to automatic review settings April 10, 2026 22:23
Copy link
Copy Markdown
Contributor

Copilot AI left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pull request overview

Copilot reviewed 6 out of 6 changed files in this pull request and generated 4 comments.

@tannergooding tannergooding marked this pull request as ready for review April 11, 2026 00:27
@tannergooding
Copy link
Copy Markdown
Member Author

CC. @EgorBo

Faster on Arm64 across the board. On x64, the pattern can sometimes change codegen to use EVEX kmask registers instead and so perf can be slightly slower or faster depending on hardware. It'd be something to handle separately and avoid kmask creation for V128/256 in this scenario, if we care.

Copy link
Copy Markdown
Member

@EgorBo EgorBo left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if we could also optimize IndexOfWhereAllBitsSet by just flipping all bits and searching for all zeroes using normal IndexOf. Not fully optimal but better than nothing?

Although, let's see if I can optimize it via #126790

@tannergooding tannergooding enabled auto-merge (squash) April 11, 2026 16:31
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
@tannergooding
Copy link
Copy Markdown
Member Author

GitHub failed to pickup the applied typo suggestions even though it committed them. Had to force push to get it to pick it up so the merge was actually possible, no changes.

@tannergooding
Copy link
Copy Markdown
Member Author

/ba-g machine timeout and known test errors (#126788, #126641)

Only change since last run was typo fixes in comments

@tannergooding tannergooding merged commit 2ebe5d9 into dotnet:main Apr 12, 2026
136 of 152 checks passed
@tannergooding tannergooding deleted the ascii-search branch April 12, 2026 01:50
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants