Skip to content

[Go][Parquet] Delta Binary Packed encoding with null read batchsize panic #43276

@shaoting-huang

Description

@shaoting-huang

Describe the bug, including details regarding any error messages, version, and platform.

It causes panic when reading larger than batchsize written with delta binary packed encoding with Nullable.

To reproduce:

	t.Run("test", func(t *testing.T) {
		size := 10
		buf := new(bytes.Buffer)
		mem := memory.NewGoAllocator()

		// Define the schema for the test data
		fields := []arrow.Field{
			{Name: "int64", Type: arrow.PrimitiveTypes.Int64, Nullable: true},
		}
		schema := arrow.NewSchema(fields, nil)

		// Create a record batch with the test data
		b := array.NewRecordBuilder(mem, schema)
		defer b.Release()

		for i := 0; i < size; i++ {
			b.Field(0).(*array.Int64Builder).Append(int64(i))
		}
		rec := b.NewRecord()
		defer rec.Release()

		// Write the data to Parquet using the file writer
		props := parquet.NewWriterProperties(
			parquet.WithCompression(compress.Codecs.Zstd),
			parquet.WithCompressionLevel(3),
			parquet.WithDictionaryDefault(false),
			parquet.WithEncoding(parquet.Encodings.DeltaBinaryPacked))
		writerProps := pqarrow.DefaultWriterProps()
		pw, err := pqarrow.NewFileWriter(schema, buf, props, writerProps)
		assert.NoError(t, err)
		pw.Write(rec)
		pw.Close()

		// Read the data back from the Parquet file
		reader, err := file.NewParquetReader(bytes.NewReader(buf.Bytes()))
		assert.NoError(t, err)
		defer reader.Close()

		pr, err := pqarrow.NewFileReader(reader, pqarrow.ArrowReadProperties{BatchSize: 5}, memory.DefaultAllocator)
		assert.NoError(t, err)

		rr, err := pr.GetRecordReader(context.Background(), nil, nil)
		assert.NoError(t, err)

		totalRows := 0
		for rr.Next() {
			rec := rr.Record()
			for i := 0; i < int(rec.NumRows()); i++ {
				col := rec.Column(0).(*array.Int64)

				val := col.Value(i)
				assert.Equal(t, val, int64(totalRows+i))
			}
			totalRows += int(rec.NumRows())
		}

		if totalRows != size {
			t.Fatalf("Expected %d rows, but got %d rows", size, totalRows)
		}
	})

Cause the follow error:

panic: runtime error: slice bounds out of range [4:0] [recovered]
        panic: runtime error: slice bounds out of range [4:0]

goroutine 178 [running]:
testing.tRunner.func1.2({0x103f56e00, 0x14000aa40a8})
        /opt/homebrew/Cellar/go@1.21/1.21.11/libexec/src/testing/testing.go:1545 +0x1c4
testing.tRunner.func1()
        /opt/homebrew/Cellar/go@1.21/1.21.11/libexec/src/testing/testing.go:1548 +0x360
panic({0x103f56e00?, 0x14000aa40a8?})
        /opt/homebrew/Cellar/go@1.21/1.21.11/libexec/src/runtime/panic.go:914 +0x218
github.com/apache/arrow/go/v12/parquet/internal/encoding.(*DeltaBitPackInt64Decoder).Decode(0x140008af618, {0x140005e8680?, 0x0?, 0x0?})
        github.com/apache/arrow/go/v12@v12.0.1/parquet/internal/encoding/delta_bit_packing.go:273 +0x240
github.com/apache/arrow/go/v12/parquet/internal/encoding.DeltaBitPackInt64Decoder.DecodeSpaced({0x1400001f1e0, {0x0, 0x0, 0x0}}, {0x140005e8680, 0x5, 0x8}, 0x0, {0x140005e8700, 0x1, ...}, ...)
        github.com/apache/arrow/go/v12@v12.0.1/parquet/internal/encoding/delta_bit_packing.go:291 +0x74
github.com/apache/arrow/go/v12/parquet/file.(*primitiveRecordReader).ReadValuesSpaced(0x140005e8380?, 0x5, 0x20?)
        github.com/apache/arrow/go/v12@v12.0.1/parquet/file/record_reader.go:284 +0x2e4
github.com/apache/arrow/go/v12/parquet/file.(*recordReader).ReadRecordData(0x14000b8a9c0, 0x5)
        github.com/apache/arrow/go/v12@v12.0.1/parquet/file/record_reader.go:548 +0x288
github.com/apache/arrow/go/v12/parquet/file.(*recordReader).ReadRecords(0x14000b8a9c0, 0x5)
        github.com/apache/arrow/go/v12@v12.0.1/parquet/file/record_reader.go:574 +0x44
github.com/apache/arrow/go/v12/parquet/pqarrow.(*leafReader).LoadBatch(0x14000b8aa20, 0x5)
        github.com/apache/arrow/go/v12@v12.0.1/parquet/pqarrow/column_readers.go:109 +0xe0
github.com/apache/arrow/go/v12/parquet/pqarrow.(*ColumnReader).NextBatch(0x14000a9c160, 0x103e7de80?)
        github.com/apache/arrow/go/v12@v12.0.1/parquet/pqarrow/file_reader.go:131 +0x34
github.com/apache/arrow/go/v12/parquet/pqarrow.(*recordReader).next.func2(0x0, 0x0?)
        github.com/apache/arrow/go/v12@v12.0.1/parquet/pqarrow/file_reader.go:665 +0x40
github.com/apache/arrow/go/v12/parquet/pqarrow.(*recordReader).next(0x14000b8ab40)
        github.com/apache/arrow/go/v12@v12.0.1/parquet/pqarrow/file_reader.go:685 +0x1d0
github.com/apache/arrow/go/v12/parquet/pqarrow.(*recordReader).Next(0x14000b8ab40?)
        github.com/apache/arrow/go/v12@v12.0.1/parquet/pqarrow/file_reader.go:760 +0x74
github.com/milvus-io/milvus/internal/storage.Test.func1(0x14000185a00)

Component(s)

Go

Metadata

Metadata

Assignees

Type

No type
No fields configured for issues without a type.

Projects

No projects

Milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions