Skip to content

Commit c8bda3e

Browse files
Copilotadamsitnik
andauthored
Fix partial UTF-32 BOM across reads; restore non-blocking mode in finally; add test
Agent-Logs-Url: https://github.com/dotnet/runtime/sessions/8ed9ed48-a800-4d97-ac90-68590832e24e Co-authored-by: adamsitnik <6011991+adamsitnik@users.noreply.github.com>
1 parent d19dd3e commit c8bda3e

4 files changed

Lines changed: 145 additions & 11 deletions

File tree

src/libraries/System.Diagnostics.Process/src/System/Diagnostics/Process.Multiplexing.Unix.cs

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,14 @@ private IEnumerable<ProcessOutputLine> ReadPipesToLines(
5959
bool outputDone = false, errorDone = false;
6060
bool outputPreambleChecked = false, errorPreambleChecked = false;
6161

62+
// Four-byte BOM accumulation buffers: bytes are gathered here until we have
63+
// enough to unambiguously detect the encoding (needed to distinguish
64+
// UTF-32 LE BOM FF FE 00 00 from a UTF-16 LE BOM FF FE when the first read
65+
// delivers fewer than four bytes).
66+
byte[] outputBomAccum = new byte[4];
67+
byte[] errorBomAccum = new byte[4];
68+
int outputBomAccumLen = 0, errorBomAccumLen = 0;
69+
6270
List<ProcessOutputLine> lines = new();
6371

6472
while (!outputDone || !errorDone)
@@ -81,13 +89,15 @@ private IEnumerable<ProcessOutputLine> ReadPipesToLines(
8189
{
8290
HandlePipeLineRead(currentHandle, ref errorDecoder, ref errorEncoding, byteBuffer,
8391
ref errorCharBuffer, ref errorCharStart, ref errorCharEnd,
84-
ref errorPreambleChecked, ref errorDone, standardError: true, lines);
92+
ref errorPreambleChecked, ref errorDone, standardError: true, lines,
93+
errorBomAccum, ref errorBomAccumLen);
8594
}
8695
else
8796
{
8897
HandlePipeLineRead(currentHandle, ref outputDecoder, ref outputEncoding, byteBuffer,
8998
ref outputCharBuffer, ref outputCharStart, ref outputCharEnd,
90-
ref outputPreambleChecked, ref outputDone, standardError: false, lines);
99+
ref outputPreambleChecked, ref outputDone, standardError: false, lines,
100+
outputBomAccum, ref outputBomAccumLen);
91101
}
92102
}
93103

@@ -104,11 +114,13 @@ private IEnumerable<ProcessOutputLine> ReadPipesToLines(
104114
{
105115
if (outputRefAdded)
106116
{
117+
Interop.Sys.Fcntl.DangerousSetIsNonBlocking(outputHandle.DangerousGetHandle().ToInt32(), 0);
107118
outputHandle.DangerousRelease();
108119
}
109120

110121
if (errorRefAdded)
111122
{
123+
Interop.Sys.Fcntl.DangerousSetIsNonBlocking(errorHandle.DangerousGetHandle().ToInt32(), 0);
112124
errorHandle.DangerousRelease();
113125
}
114126

@@ -217,15 +229,18 @@ private static void HandlePipeLineRead(
217229
ref bool preambleChecked,
218230
ref bool done,
219231
bool standardError,
220-
List<ProcessOutputLine> lines)
232+
List<ProcessOutputLine> lines,
233+
byte[] bomAccum,
234+
ref int bomAccumLen)
221235
{
222236
int bytesRead = ReadNonBlocking(handle, byteBuffer, 0);
223237
if (bytesRead > 0)
224238
{
225-
DecodeBytesAndParseLines(ref decoder, ref encoding, byteBuffer, bytesRead, ref charBuffer, ref charStart, ref charEnd, ref preambleChecked, standardError, lines);
239+
DecodeBytesAndParseLines(ref decoder, ref encoding, byteBuffer, bytesRead, ref charBuffer, ref charStart, ref charEnd, ref preambleChecked, bomAccum, ref bomAccumLen, standardError, lines);
226240
}
227241
else if (bytesRead == 0)
228242
{
243+
FlushBomAccumulation(ref decoder, ref encoding, bomAccum, bomAccumLen, ref preambleChecked, ref charBuffer, ref charStart, ref charEnd, standardError, lines);
229244
done = FlushDecoderAndEmitRemainingChars(decoder, ref charBuffer, ref charStart, ref charEnd, standardError, lines);
230245
}
231246
// bytesRead < 0 means EAGAIN — nothing available yet, let poll retry.

src/libraries/System.Diagnostics.Process/src/System/Diagnostics/Process.Multiplexing.Windows.cs

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,14 @@ private IEnumerable<ProcessOutputLine> ReadPipesToLines(
6363
int errorCharStart = 0, errorCharEnd = 0;
6464
bool outputPreambleChecked = false, errorPreambleChecked = false;
6565

66+
// Four-byte BOM accumulation buffers: bytes are gathered here until we have
67+
// enough to unambiguously detect the encoding (needed to distinguish
68+
// UTF-32 LE BOM FF FE 00 00 from a UTF-16 LE BOM FF FE when the first read
69+
// delivers fewer than four bytes).
70+
byte[] outputBomAccum = new byte[4];
71+
byte[] errorBomAccum = new byte[4];
72+
int outputBomAccumLen = 0, errorBomAccumLen = 0;
73+
6674
unsafe
6775
{
6876
outputDone = !QueueRead(outputHandle, outputPin.GetAddressOfArrayData(),
@@ -101,12 +109,12 @@ private IEnumerable<ProcessOutputLine> ReadPipesToLines(
101109
if (isError)
102110
{
103111
DecodeBytesAndParseLines(ref errorDecoder, ref errorEncoding, errorByteBuffer, bytesRead, ref errorCharBuffer, ref errorCharStart,
104-
ref errorCharEnd, ref errorPreambleChecked, isError, lines);
112+
ref errorCharEnd, ref errorPreambleChecked, errorBomAccum, ref errorBomAccumLen, isError, lines);
105113
}
106114
else
107115
{
108116
DecodeBytesAndParseLines(ref outputDecoder, ref outputEncoding, outputByteBuffer, bytesRead, ref outputCharBuffer, ref outputCharStart,
109-
ref outputCharEnd, ref outputPreambleChecked, isError, lines);
117+
ref outputCharEnd, ref outputPreambleChecked, outputBomAccum, ref outputBomAccumLen, isError, lines);
110118
}
111119

112120
unsafe
@@ -131,10 +139,12 @@ private IEnumerable<ProcessOutputLine> ReadPipesToLines(
131139
{
132140
if (isError)
133141
{
142+
FlushBomAccumulation(ref errorDecoder, ref errorEncoding, errorBomAccum, errorBomAccumLen, ref errorPreambleChecked, ref errorCharBuffer, ref errorCharStart, ref errorCharEnd, isError, lines);
134143
errorDone = FlushDecoderAndEmitRemainingChars(errorDecoder, ref errorCharBuffer, ref errorCharStart, ref errorCharEnd, isError, lines);
135144
}
136145
else
137146
{
147+
FlushBomAccumulation(ref outputDecoder, ref outputEncoding, outputBomAccum, outputBomAccumLen, ref outputPreambleChecked, ref outputCharBuffer, ref outputCharStart, ref outputCharEnd, isError, lines);
138148
outputDone = FlushDecoderAndEmitRemainingChars(outputDecoder, ref outputCharBuffer, ref outputCharStart, ref outputCharEnd, isError, lines);
139149
}
140150

src/libraries/System.Diagnostics.Process/src/System/Diagnostics/Process.Multiplexing.cs

Lines changed: 79 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -342,19 +342,94 @@ private static void EmitRemainingCharsAsLine(
342342
}
343343
}
344344

345-
private static void DecodeBytesAndParseLines(ref Decoder decoder, ref Encoding encoding, byte[] byteBuffer, int bytesRead, ref char[] charBuffer, ref int charStart, ref int charEnd, ref bool preambleChecked, bool standardError, List<ProcessOutputLine> lines)
345+
/// <summary>
346+
/// Decodes bytes from <paramref name="byteBuffer"/> (and any previously accumulated BOM bytes)
347+
/// into <paramref name="charBuffer"/>, then parses complete lines. On the first call(s),
348+
/// bytes are accumulated into <paramref name="bomAccum"/> until four are available so that
349+
/// <see cref="SkipPreambleOrDetectEncoding"/> can unambiguously distinguish a UTF-32 LE BOM
350+
/// (<c>FF FE 00 00</c>) from a UTF-16 LE BOM (<c>FF FE</c>) even when the first OS read
351+
/// delivers only two bytes.
352+
/// </summary>
353+
private static void DecodeBytesAndParseLines(
354+
ref Decoder decoder, ref Encoding encoding,
355+
byte[] byteBuffer, int bytesRead,
356+
ref char[] charBuffer, ref int charStart, ref int charEnd,
357+
ref bool preambleChecked,
358+
byte[] bomAccum, ref int bomAccumLen,
359+
bool standardError, List<ProcessOutputLine> lines)
346360
{
347-
int byteOffset = 0;
348361
if (!preambleChecked)
349362
{
363+
// Accumulate initial bytes into bomAccum until we have 4, enough to unambiguously
364+
// detect all supported BOMs (UTF-8 = 3 bytes, UTF-16 LE/BE = 2 bytes, but FF FE
365+
// could be the start of a 4-byte UTF-32 LE BOM, so we need all 4 before deciding).
366+
int prevBomAccumLen = bomAccumLen;
367+
if (bomAccumLen < 4 && bytesRead > 0)
368+
{
369+
int toCopy = Math.Min(4 - bomAccumLen, bytesRead);
370+
byteBuffer.AsSpan(0, toCopy).CopyTo(bomAccum.AsSpan(bomAccumLen));
371+
bomAccumLen += toCopy;
372+
}
373+
374+
if (bomAccumLen < 4)
375+
{
376+
// Not enough bytes yet for a definitive BOM/preamble check. Defer to the next read.
377+
return;
378+
}
379+
350380
preambleChecked = true;
351-
byteOffset = SkipPreambleOrDetectEncoding(byteBuffer, bytesRead, ref encoding, ref decoder);
381+
int bomSkip = SkipPreambleOrDetectEncoding(bomAccum, bomAccumLen, ref encoding, ref decoder);
382+
383+
// Decode the accumulated BOM bytes (minus the BOM prefix to skip).
384+
int bomToDecode = bomAccumLen - bomSkip;
385+
if (bomToDecode > 0)
386+
{
387+
DecodeAndAppendChars(decoder, bomAccum, bomSkip, bomToDecode, flush: false, ref charBuffer, ref charStart, ref charEnd);
388+
}
389+
390+
// Decode remaining bytes from the current read (those not consumed into bomAccum).
391+
int consumedFromByteBuffer = bomAccumLen - prevBomAccumLen;
392+
int remaining = bytesRead - consumedFromByteBuffer;
393+
if (remaining > 0)
394+
{
395+
DecodeAndAppendChars(decoder, byteBuffer, consumedFromByteBuffer, remaining, flush: false, ref charBuffer, ref charStart, ref charEnd);
396+
}
397+
398+
ParseLinesFromCharBuffer(charBuffer, ref charStart, charEnd, standardError, lines);
399+
return;
352400
}
353401

354-
DecodeAndAppendChars(decoder, byteBuffer, byteOffset, bytesRead - byteOffset, flush: false, ref charBuffer, ref charStart, ref charEnd);
402+
DecodeAndAppendChars(decoder, byteBuffer, 0, bytesRead, flush: false, ref charBuffer, ref charStart, ref charEnd);
355403
ParseLinesFromCharBuffer(charBuffer, ref charStart, charEnd, standardError, lines);
356404
}
357405

406+
/// <summary>
407+
/// Resolves any bytes accumulated in <paramref name="bomAccum"/> when the pipe closes
408+
/// before four bytes have been gathered. The accumulated bytes are BOM-checked and decoded
409+
/// into <paramref name="charBuffer"/>; any complete lines are added to
410+
/// <paramref name="lines"/>. This must be called before
411+
/// <see cref="FlushDecoderAndEmitRemainingChars"/> at EOF.
412+
/// </summary>
413+
private static void FlushBomAccumulation(
414+
ref Decoder decoder, ref Encoding encoding,
415+
byte[] bomAccum, int bomAccumLen,
416+
ref bool preambleChecked,
417+
ref char[] charBuffer, ref int charStart, ref int charEnd,
418+
bool standardError, List<ProcessOutputLine> lines)
419+
{
420+
if (!preambleChecked && bomAccumLen > 0)
421+
{
422+
preambleChecked = true;
423+
int bomSkip = SkipPreambleOrDetectEncoding(bomAccum, bomAccumLen, ref encoding, ref decoder);
424+
int toDecode = bomAccumLen - bomSkip;
425+
if (toDecode > 0)
426+
{
427+
DecodeAndAppendChars(decoder, bomAccum, bomSkip, toDecode, flush: false, ref charBuffer, ref charStart, ref charEnd);
428+
ParseLinesFromCharBuffer(charBuffer, ref charStart, charEnd, standardError, lines);
429+
}
430+
}
431+
}
432+
358433
private static bool FlushDecoderAndEmitRemainingChars(Decoder decoder, ref char[] charBuffer, ref int charStart, ref int charEnd, bool standardError, List<ProcessOutputLine> lines)
359434
{
360435
DecodeAndAppendChars(decoder, Array.Empty<byte>(), 0, 0, flush: true, ref charBuffer, ref charStart, ref charEnd);

src/libraries/System.Diagnostics.Process/tests/ProcessStreamingTests.cs

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -588,7 +588,41 @@ public async Task ReadAllLines_HandlesMixedLineEndings(bool useAsync)
588588
Assert.True(process.WaitForExit(WaitInMS));
589589
}
590590

591-
private Process StartLinePrintingProcess(string stdOutText, string stdErrText)
591+
[ConditionalTheory(typeof(RemoteExecutor), nameof(RemoteExecutor.IsSupported))]
592+
[InlineData(true)]
593+
[InlineData(false)]
594+
public async Task ReadAllLines_HandlesPartialBomAcrossReads(bool useAsync)
595+
{
596+
// Write a UTF-32 LE BOM (FF FE 00 00) as two separate flushed writes so the
597+
// first read can deliver only the first two BOM bytes. Without BOM accumulation,
598+
// FF FE would be misclassified as a UTF-16 LE BOM and the content would be
599+
// decoded with the wrong encoding.
600+
using Process process = CreateProcess(static () =>
601+
{
602+
Stream stdout = Console.OpenStandardOutput();
603+
stdout.Write(new byte[] { 0xFF, 0xFE }); // First half of UTF-32 LE BOM
604+
stdout.Flush();
605+
stdout.Write(new byte[] { 0x00, 0x00 }); // Second half of BOM
606+
stdout.Write(Encoding.UTF32.GetBytes("hello\n")); // Content (no BOM from GetBytes)
607+
stdout.Flush();
608+
return RemoteExecutor.SuccessExitCode;
609+
});
610+
process.StartInfo.RedirectStandardOutput = true;
611+
process.StartInfo.RedirectStandardError = true;
612+
process.StartInfo.StandardOutputEncoding = Encoding.UTF32;
613+
process.Start();
614+
615+
List<string> capturedOutput = new();
616+
List<string> capturedError = new();
617+
618+
await EnumerateLines(process, useAsync, capturedOutput, capturedError);
619+
620+
Assert.Equal(new[] { "hello" }, capturedOutput);
621+
Assert.Empty(capturedError);
622+
Assert.True(process.WaitForExit(WaitInMS));
623+
}
624+
625+
592626
{
593627
Process process = CreateProcess((stdOut, stdErr) =>
594628
{

0 commit comments

Comments
 (0)