Skip to content

Commit 0aadd5a

Browse files
authored
GH-39309: [Go][Parquet] handle nil bitWriter for DeltaBinaryPacked (#39347)
### Rationale for this change If using the DeltaBinaryPacked encoding, we end up with a nil pointer dereference if we end up with an empty column. ### What changes are included in this PR? Add a nil check in `EstimatedDataEncodedSize` for the base `deltaBitPackEncoder`. This should only ever occur if we have an empty column with this encoding when closing a row group. ### Are these changes tested? Yes a unit test was added to verify the fix. * Closes: #39309 Authored-by: Matt Topol <zotthewizard@gmail.com> Signed-off-by: Matt Topol <zotthewizard@gmail.com>
1 parent 1362122 commit 0aadd5a

2 files changed

Lines changed: 41 additions & 0 deletions

File tree

go/parquet/internal/encoding/delta_bit_packing.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -466,6 +466,10 @@ func (enc *deltaBitPackEncoder) FlushValues() (Buffer, error) {
466466

467467
// EstimatedDataEncodedSize returns the current amount of data actually flushed out and written
468468
func (enc *deltaBitPackEncoder) EstimatedDataEncodedSize() int64 {
469+
if enc.bitWriter == nil {
470+
return 0
471+
}
472+
469473
return int64(enc.bitWriter.Written())
470474
}
471475

go/parquet/pqarrow/encode_arrow_test.go

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1983,3 +1983,40 @@ func TestWriteTableMemoryAllocation(t *testing.T) {
19831983

19841984
require.Zero(t, mem.CurrentAlloc())
19851985
}
1986+
1987+
func TestEmptyListDeltaBinaryPacked(t *testing.T) {
1988+
schema := arrow.NewSchema([]arrow.Field{
1989+
{Name: "ts", Type: arrow.ListOf(arrow.PrimitiveTypes.Uint64),
1990+
Metadata: arrow.NewMetadata([]string{"PARQUET:field_id"}, []string{"-1"})}}, nil)
1991+
builder := array.NewRecordBuilder(memory.DefaultAllocator, schema)
1992+
defer builder.Release()
1993+
1994+
listBuilder := builder.Field(0).(*array.ListBuilder)
1995+
listBuilder.Append(true)
1996+
arrowRec := builder.NewRecord()
1997+
defer arrowRec.Release()
1998+
1999+
var buf bytes.Buffer
2000+
wr, err := pqarrow.NewFileWriter(schema, &buf,
2001+
parquet.NewWriterProperties(
2002+
parquet.WithDictionaryFor("ts.list.element", false),
2003+
parquet.WithEncodingFor("ts.list.element", parquet.Encodings.DeltaBinaryPacked)),
2004+
pqarrow.DefaultWriterProps())
2005+
require.NoError(t, err)
2006+
2007+
require.NoError(t, wr.WriteBuffered(arrowRec))
2008+
require.NoError(t, wr.Close())
2009+
2010+
rdr, err := file.NewParquetReader(bytes.NewReader(buf.Bytes()))
2011+
require.NoError(t, err)
2012+
reader, err := pqarrow.NewFileReader(rdr, pqarrow.ArrowReadProperties{}, memory.DefaultAllocator)
2013+
require.NoError(t, err)
2014+
defer rdr.Close()
2015+
2016+
tbl, err := reader.ReadTable(context.Background())
2017+
require.NoError(t, err)
2018+
defer tbl.Release()
2019+
2020+
assert.True(t, schema.Equal(tbl.Schema()))
2021+
assert.EqualValues(t, 1, tbl.NumRows())
2022+
}

0 commit comments

Comments
 (0)