-
Notifications
You must be signed in to change notification settings - Fork 5.4k
Description
I noticed that there is a large difference between the Parser Performance in Utf8Parser and the Utf16 parser for Guid and Timespan.
Code is in the details at the end.
Timespan
For Format 'c' of Timespan the performance of Utf16 is pretty much equal to Utf8, for the other two formats it's quite different.
Guid
Guid suffers from similar problems,
looking into Guid.cs, I see that the checks for invalid symbols is fairly inefficient:
- Format 'N': Three full string scans (neither dashes, nor braces, nor parenthesis)
- Format 'D': Two full string scans (no braces nor parenthesis) and a few characters until the dash
- Format 'B' and 'P' behave 'best' as only one symbol is missing and one symbol is the first character.
I guess that's visible in the benchmark below too, but it does not appear to be the bulk of the difference (as 'B' and 'P' are still twice as slow as the UTF8 version).
If I didn't mess up the benchmarks too much (again, both @ahsonkhan and @stephentoub fixed my bad benchmark last time), it might be useful to port the utf8 code to the utf16 code base.
BenchmarkDotNet=v0.10.14, OS=Windows 10.0.17134
Intel Core i7-4790K CPU 4.00GHz (Haswell), 1 CPU, 8 logical and 4 physical cores
Frequency=3906246 Hz, Resolution=256.0003 ns, Timer=TSC
.NET Core SDK=2.1.300
[Host] : .NET Core 2.1.0 (CoreCLR 4.6.26515.07, CoreFX 4.6.26515.06), 64bit RyuJIT
DefaultJob : .NET Core 2.1.0 (CoreCLR 4.6.26515.07, CoreFX 4.6.26515.06), 64bit RyuJIT
| Method | Mean | Error | StdDev |
|---|---|---|---|
| UTF16_Guid_TryParse_Format_D | 215.37 ns | 1.5212 ns | 1.2702 ns |
| UTF8_Guid_TryParse_Format_D | 86.62 ns | 0.3426 ns | 0.3205 ns |
| UTF16_Guid_TryParse_Format_N | 281.59 ns | 4.3998 ns | 4.1156 ns |
| UTF8_Guid_TryParse_Format_N | 69.79 ns | 0.1989 ns | 0.1763 ns |
| UTF16_Guid_TryParse_Format_B | 202.41 ns | 1.8355 ns | 1.6271 ns |
| UTF8_Guid_TryParse_Format_B | 90.40 ns | 0.1929 ns | 0.1710 ns |
| UTF16_Guid_TryParse_Format_P | 203.18 ns | 0.1306 ns | 0.1158 ns |
| UTF8_Guid_TryParse_Format_P | 90.31 ns | 0.4293 ns | 0.4015 ns |
| UTF16_TimeSpan_TryParse_Format_c | 113.62 ns | 0.0920 ns | 0.0816 ns |
| UTF8_TimeSpan_TryParse_Format_c | 96.62 ns | 0.1512 ns | 0.1415 ns |
| UTF16_TimeSpan_TryParse_Format_G | 934.01 ns | 8.0425 ns | 7.5230 ns |
| UTF8_TimeSpan_TryParse_Format_G | 57.75 ns | 0.0955 ns | 0.0893 ns |
| UTF16_TimeSpan_TryParse_Format_g | 923.88 ns | 0.9195 ns | 0.7678 ns |
| UTF8_TimeSpan_TryParse_Format_g | 95.00 ns | 0.1921 ns | 0.1797 ns |
Details
public class ParserBenchmark
{
private static readonly Guid Guid = Guid.NewGuid();
private static readonly string GuidStringD = Guid.ToString("D");
private static readonly string GuidStringN = Guid.ToString("N");
private static readonly string GuidStringB = Guid.ToString("B");
private static readonly string GuidStringP = Guid.ToString("P");
private static readonly byte[] GuidBytesD = Encoding.UTF8.GetBytes(GuidStringD);
private static readonly byte[] GuidBytesN = Encoding.UTF8.GetBytes(GuidStringN);
private static readonly byte[] GuidBytesB = Encoding.UTF8.GetBytes(GuidStringB);
private static readonly byte[] GuidBytesP = Encoding.UTF8.GetBytes(GuidStringP);
private static readonly TimeSpan TimeSpan = TimeSpan.MinValue;
private static readonly string TimeSpanStringc = TimeSpan.ToString("c", CultureInfo.InvariantCulture);
private static readonly string TimeSpanStringG = TimeSpan.ToString("G", CultureInfo.InvariantCulture);
private static readonly string TimeSpanStringg = TimeSpan.ToString("g", CultureInfo.InvariantCulture);
private static readonly byte[] TimeSpanBytesc = Encoding.UTF8.GetBytes(TimeSpanStringc);
private static readonly byte[] TimeSpanBytesG = Encoding.UTF8.GetBytes(TimeSpanStringG);
private static readonly byte[] TimeSpanBytesg = Encoding.UTF8.GetBytes(TimeSpanStringg);
[Benchmark]
public Guid UTF16_Guid_TryParse_Format_D()
{
Guid.TryParseExact(GuidStringD, "D", out var result);
return result;
}
[Benchmark]
public Guid UTF8_Guid_TryParse_Format_D()
{
Utf8Parser.TryParse(GuidBytesD, out Guid result, out _, 'D');
return result;
}
[Benchmark]
public Guid UTF16_Guid_TryParse_Format_N()
{
Guid.TryParseExact(GuidStringN, "N", out var result);
return result;
}
[Benchmark]
public Guid UTF8_Guid_TryParse_Format_N()
{
Utf8Parser.TryParse(GuidBytesN, out Guid result, out _, 'N');
return result;
}
[Benchmark]
public Guid UTF16_Guid_TryParse_Format_B()
{
Guid.TryParseExact(GuidStringB, "B", out var result);
return result;
}
[Benchmark]
public Guid UTF8_Guid_TryParse_Format_B()
{
Utf8Parser.TryParse(GuidBytesB, out Guid result, out _, 'B');
return result;
}
[Benchmark]
public Guid UTF16_Guid_TryParse_Format_P()
{
Guid.TryParseExact(GuidStringP, "P", out var result);
return result;
}
[Benchmark]
public Guid UTF8_Guid_TryParse_Format_P()
{
Utf8Parser.TryParse(GuidBytesP, out Guid result, out _, 'P');
return result;
}
[Benchmark]
public TimeSpan UTF16_TimeSpan_TryParse_Format_c()
{
TimeSpan.TryParseExact(TimeSpanStringc, "c", CultureInfo.InvariantCulture, out var result);
return result;
}
[Benchmark]
public TimeSpan UTF8_TimeSpan_TryParse_Format_c()
{
Utf8Parser.TryParse(TimeSpanBytesc, out TimeSpan result, out _, 'c');
return result;
}
[Benchmark]
public TimeSpan UTF16_TimeSpan_TryParse_Format_G()
{
TimeSpan.TryParseExact(TimeSpanStringG, "G", CultureInfo.InvariantCulture, out var result);
return result;
}
[Benchmark]
public TimeSpan UTF8_TimeSpan_TryParse_Format_G()
{
Utf8Parser.TryParse(TimeSpanBytesG, out TimeSpan result, out _, 'G');
return result;
}
[Benchmark]
public TimeSpan UTF16_TimeSpan_TryParse_Format_g()
{
TimeSpan.TryParseExact(TimeSpanStringg, "g", CultureInfo.InvariantCulture, out var result);
return result;
}
[Benchmark]
public TimeSpan UTF8_TimeSpan_TryParse_Format_g()
{
Utf8Parser.TryParse(TimeSpanBytesg, out TimeSpan result, out _, 'g');
return result;
}
}