Describe the bug, including details regarding any error messages, version, and platform.
It causes panic when reading larger than batchsize written with delta binary packed encoding with Nullable.
To reproduce:
t.Run("test", func(t *testing.T) {
size := 10
buf := new(bytes.Buffer)
mem := memory.NewGoAllocator()
// Define the schema for the test data
fields := []arrow.Field{
{Name: "int64", Type: arrow.PrimitiveTypes.Int64, Nullable: true},
}
schema := arrow.NewSchema(fields, nil)
// Create a record batch with the test data
b := array.NewRecordBuilder(mem, schema)
defer b.Release()
for i := 0; i < size; i++ {
b.Field(0).(*array.Int64Builder).Append(int64(i))
}
rec := b.NewRecord()
defer rec.Release()
// Write the data to Parquet using the file writer
props := parquet.NewWriterProperties(
parquet.WithCompression(compress.Codecs.Zstd),
parquet.WithCompressionLevel(3),
parquet.WithDictionaryDefault(false),
parquet.WithEncoding(parquet.Encodings.DeltaBinaryPacked))
writerProps := pqarrow.DefaultWriterProps()
pw, err := pqarrow.NewFileWriter(schema, buf, props, writerProps)
assert.NoError(t, err)
pw.Write(rec)
pw.Close()
// Read the data back from the Parquet file
reader, err := file.NewParquetReader(bytes.NewReader(buf.Bytes()))
assert.NoError(t, err)
defer reader.Close()
pr, err := pqarrow.NewFileReader(reader, pqarrow.ArrowReadProperties{BatchSize: 5}, memory.DefaultAllocator)
assert.NoError(t, err)
rr, err := pr.GetRecordReader(context.Background(), nil, nil)
assert.NoError(t, err)
totalRows := 0
for rr.Next() {
rec := rr.Record()
for i := 0; i < int(rec.NumRows()); i++ {
col := rec.Column(0).(*array.Int64)
val := col.Value(i)
assert.Equal(t, val, int64(totalRows+i))
}
totalRows += int(rec.NumRows())
}
if totalRows != size {
t.Fatalf("Expected %d rows, but got %d rows", size, totalRows)
}
})
Cause the follow error:
panic: runtime error: slice bounds out of range [4:0] [recovered]
panic: runtime error: slice bounds out of range [4:0]
goroutine 178 [running]:
testing.tRunner.func1.2({0x103f56e00, 0x14000aa40a8})
/opt/homebrew/Cellar/go@1.21/1.21.11/libexec/src/testing/testing.go:1545 +0x1c4
testing.tRunner.func1()
/opt/homebrew/Cellar/go@1.21/1.21.11/libexec/src/testing/testing.go:1548 +0x360
panic({0x103f56e00?, 0x14000aa40a8?})
/opt/homebrew/Cellar/go@1.21/1.21.11/libexec/src/runtime/panic.go:914 +0x218
github.com/apache/arrow/go/v12/parquet/internal/encoding.(*DeltaBitPackInt64Decoder).Decode(0x140008af618, {0x140005e8680?, 0x0?, 0x0?})
github.com/apache/arrow/go/v12@v12.0.1/parquet/internal/encoding/delta_bit_packing.go:273 +0x240
github.com/apache/arrow/go/v12/parquet/internal/encoding.DeltaBitPackInt64Decoder.DecodeSpaced({0x1400001f1e0, {0x0, 0x0, 0x0}}, {0x140005e8680, 0x5, 0x8}, 0x0, {0x140005e8700, 0x1, ...}, ...)
github.com/apache/arrow/go/v12@v12.0.1/parquet/internal/encoding/delta_bit_packing.go:291 +0x74
github.com/apache/arrow/go/v12/parquet/file.(*primitiveRecordReader).ReadValuesSpaced(0x140005e8380?, 0x5, 0x20?)
github.com/apache/arrow/go/v12@v12.0.1/parquet/file/record_reader.go:284 +0x2e4
github.com/apache/arrow/go/v12/parquet/file.(*recordReader).ReadRecordData(0x14000b8a9c0, 0x5)
github.com/apache/arrow/go/v12@v12.0.1/parquet/file/record_reader.go:548 +0x288
github.com/apache/arrow/go/v12/parquet/file.(*recordReader).ReadRecords(0x14000b8a9c0, 0x5)
github.com/apache/arrow/go/v12@v12.0.1/parquet/file/record_reader.go:574 +0x44
github.com/apache/arrow/go/v12/parquet/pqarrow.(*leafReader).LoadBatch(0x14000b8aa20, 0x5)
github.com/apache/arrow/go/v12@v12.0.1/parquet/pqarrow/column_readers.go:109 +0xe0
github.com/apache/arrow/go/v12/parquet/pqarrow.(*ColumnReader).NextBatch(0x14000a9c160, 0x103e7de80?)
github.com/apache/arrow/go/v12@v12.0.1/parquet/pqarrow/file_reader.go:131 +0x34
github.com/apache/arrow/go/v12/parquet/pqarrow.(*recordReader).next.func2(0x0, 0x0?)
github.com/apache/arrow/go/v12@v12.0.1/parquet/pqarrow/file_reader.go:665 +0x40
github.com/apache/arrow/go/v12/parquet/pqarrow.(*recordReader).next(0x14000b8ab40)
github.com/apache/arrow/go/v12@v12.0.1/parquet/pqarrow/file_reader.go:685 +0x1d0
github.com/apache/arrow/go/v12/parquet/pqarrow.(*recordReader).Next(0x14000b8ab40?)
github.com/apache/arrow/go/v12@v12.0.1/parquet/pqarrow/file_reader.go:760 +0x74
github.com/milvus-io/milvus/internal/storage.Test.func1(0x14000185a00)
Component(s)
Go
Describe the bug, including details regarding any error messages, version, and platform.
It causes panic when reading larger than batchsize written with delta binary packed encoding with Nullable.
To reproduce:
Cause the follow error:
Component(s)
Go