Skip to content

Commit bfc0dcb

Browse files
authored
GH-41302: [C#][Integration] Fix writing list and binary arrays with zero length offsets to IPC format (#41303)
### Rationale for this change Fixes the integration test failures caused by #41230 ### What changes are included in this PR? Only try to access the offset values if the array length is non-zero when writing list and binary arrays to IPC format. ### Are these changes tested? Yes, I've manually run the integration tests with C# and Java to verify they pass (when also including the changes from #41264), and also added new unit tests for this. ### Are there any user-facing changes? This may also be a bug that affects users but it isn't in a released version. * GitHub Issue: #41302 Authored-by: Adam Reeve <adreeve@gmail.com> Signed-off-by: Curt Hagenlocher <curt@hagenlocher.org>
1 parent 6cebede commit bfc0dcb

File tree

2 files changed

+66
-4
lines changed

2 files changed

+66
-4
lines changed

csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -165,8 +165,13 @@ public void Visit(ListArray array)
165165
_buffers.Add(CreateBitmapBuffer(array.NullBitmapBuffer, array.Offset, array.Length));
166166
_buffers.Add(CreateBuffer(GetZeroBasedValueOffsets(array.ValueOffsetsBuffer, array.Offset, array.Length)));
167167

168-
int valuesOffset = array.ValueOffsets[0];
169-
int valuesLength = array.ValueOffsets[array.Length] - valuesOffset;
168+
int valuesOffset = 0;
169+
int valuesLength = 0;
170+
if (array.Length > 0)
171+
{
172+
valuesOffset = array.ValueOffsets[0];
173+
valuesLength = array.ValueOffsets[array.Length] - valuesOffset;
174+
}
170175

171176
var values = array.Values;
172177
if (valuesOffset > 0 || valuesLength < values.Length)
@@ -206,8 +211,13 @@ public void Visit(BinaryArray array)
206211
_buffers.Add(CreateBitmapBuffer(array.NullBitmapBuffer, array.Offset, array.Length));
207212
_buffers.Add(CreateBuffer(GetZeroBasedValueOffsets(array.ValueOffsetsBuffer, array.Offset, array.Length)));
208213

209-
int valuesOffset = array.ValueOffsets[0];
210-
int valuesLength = array.ValueOffsets[array.Length] - valuesOffset;
214+
int valuesOffset = 0;
215+
int valuesLength = 0;
216+
if (array.Length > 0)
217+
{
218+
valuesOffset = array.ValueOffsets[0];
219+
valuesLength = array.ValueOffsets[array.Length] - valuesOffset;
220+
}
211221

212222
_buffers.Add(CreateSlicedBuffer<byte>(array.ValueBuffer, valuesOffset, valuesLength));
213223
}

csharp/test/Apache.Arrow.Tests/ArrowFileWriterTests.cs

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,5 +193,57 @@ public async Task WritesEmptyFileAsync()
193193
Assert.Null(readBatch);
194194
SchemaComparer.Compare(originalBatch.Schema, reader.Schema);
195195
}
196+
197+
[Fact]
198+
public async Task WriteBinaryArrayWithEmptyOffsets()
199+
{
200+
// Empty binary arrays generated by the C# builder have a single offset,
201+
// but some implementations may produce an empty offsets buffer.
202+
203+
var array = new BinaryArray(
204+
new BinaryType(),
205+
length: 0,
206+
valueOffsetsBuffer: ArrowBuffer.Empty,
207+
dataBuffer: ArrowBuffer.Empty,
208+
nullBitmapBuffer: ArrowBuffer.Empty,
209+
nullCount: 0);
210+
211+
var recordBatch = new RecordBatch.Builder().Append("x", true, array).Build();
212+
213+
var stream = new MemoryStream();
214+
var writer = new ArrowFileWriter(stream, recordBatch.Schema, leaveOpen: true);
215+
216+
await writer.WriteRecordBatchAsync(recordBatch);
217+
await writer.WriteEndAsync();
218+
219+
stream.Position = 0;
220+
221+
await ValidateRecordBatchFile(stream, recordBatch, strictCompare: false);
222+
}
223+
224+
[Fact]
225+
public async Task WriteListArrayWithEmptyOffsets()
226+
{
227+
var values = new Int32Array.Builder().Build();
228+
var array = new ListArray(
229+
new ListType(new Int32Type()),
230+
length: 0,
231+
valueOffsetsBuffer: ArrowBuffer.Empty,
232+
values: values,
233+
nullBitmapBuffer: ArrowBuffer.Empty,
234+
nullCount: 0);
235+
236+
var recordBatch = new RecordBatch.Builder().Append("x", true, array).Build();
237+
238+
var stream = new MemoryStream();
239+
var writer = new ArrowFileWriter(stream, recordBatch.Schema, leaveOpen: true);
240+
241+
await writer.WriteRecordBatchAsync(recordBatch);
242+
await writer.WriteEndAsync();
243+
244+
stream.Position = 0;
245+
246+
await ValidateRecordBatchFile(stream, recordBatch, strictCompare: false);
247+
}
196248
}
197249
}

0 commit comments

Comments
 (0)