Skip to content

Commit 85e2a68

Browse files
authored
GH-39925: [Go][Parquet] Fix re-slicing in maybeReplaceValidity function (#39926)
### Rationale for this change See #39925. ### What changes are included in this PR? Fixes re-slicing logic for multiple data-types and negative length bug. ### Are these changes tested? There is a new test in the PR. ### Are there any user-facing changes? No, it just fixes a bug. * Closes: #39925 Authored-by: Morrison-Reed Elliot (BEG/EVS1-NA) <Elliot.Morrison-Reed@de.bosch.com> Signed-off-by: Matt Topol <zotthewizard@gmail.com>
1 parent 5856421 commit 85e2a68

2 files changed

Lines changed: 42 additions & 1 deletion

File tree

go/parquet/file/column_writer.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -660,7 +660,10 @@ func (w *columnWriter) maybeReplaceValidity(values arrow.Array, newNullCount int
660660

661661
if values.Data().Offset() > 0 {
662662
data := values.Data()
663-
buffers[1] = memory.NewBufferBytes(data.Buffers()[1].Bytes()[data.Offset()*arrow.Int32SizeBytes : data.Len()*arrow.Int32SizeBytes])
663+
elemSize := data.DataType().(arrow.FixedWidthDataType).Bytes()
664+
start := data.Offset() * elemSize
665+
end := start + data.Len()*elemSize
666+
buffers[1] = memory.NewBufferBytes(data.Buffers()[1].Bytes()[start:end])
664667
}
665668

666669
data := array.NewData(values.DataType(), values.Len(), buffers, nil, int(newNullCount), 0)

go/parquet/file/column_writer_test.go

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ import (
2424
"sync"
2525
"testing"
2626

27+
"github.com/apache/arrow/go/v16/arrow"
28+
"github.com/apache/arrow/go/v16/arrow/array"
2729
"github.com/apache/arrow/go/v16/arrow/bitutil"
2830
"github.com/apache/arrow/go/v16/arrow/memory"
2931
arrutils "github.com/apache/arrow/go/v16/internal/utils"
@@ -36,6 +38,7 @@ import (
3638
"github.com/apache/arrow/go/v16/parquet/internal/testutils"
3739
"github.com/apache/arrow/go/v16/parquet/internal/utils"
3840
"github.com/apache/arrow/go/v16/parquet/metadata"
41+
"github.com/apache/arrow/go/v16/parquet/pqarrow"
3942
"github.com/apache/arrow/go/v16/parquet/schema"
4043
"github.com/stretchr/testify/assert"
4144
"github.com/stretchr/testify/mock"
@@ -736,3 +739,38 @@ func (b *BooleanValueWriterSuite) TestAlternateBooleanValues() {
736739
b.Equal(i%2 == 0, b.ValuesOut.([]bool)[i])
737740
}
738741
}
742+
743+
func TestDictionaryReslice(t *testing.T) {
744+
pts := []arrow.DataType{
745+
arrow.PrimitiveTypes.Int8,
746+
arrow.PrimitiveTypes.Int16,
747+
arrow.PrimitiveTypes.Int32,
748+
arrow.PrimitiveTypes.Int64,
749+
arrow.PrimitiveTypes.Uint8,
750+
arrow.PrimitiveTypes.Uint16,
751+
arrow.PrimitiveTypes.Uint32,
752+
arrow.PrimitiveTypes.Uint64,
753+
}
754+
for _, pt := range pts {
755+
t.Run(pt.String(), func(t *testing.T) {
756+
mem := memory.NewGoAllocator()
757+
dt := &arrow.DictionaryType{
758+
IndexType: pt,
759+
ValueType: &arrow.StringType{},
760+
}
761+
field := arrow.Field{Name: "test_field", Type: dt, Nullable: true}
762+
schema := arrow.NewSchema([]arrow.Field{field}, nil)
763+
b := array.NewRecordBuilder(mem, schema)
764+
for i := 0; i < 2000; i++ {
765+
b.Field(0).(*array.BinaryDictionaryBuilder).AppendString("test_value")
766+
}
767+
rec := b.NewRecord()
768+
out := &bytes.Buffer{}
769+
pqw, err := pqarrow.NewFileWriter(rec.Schema(), out, nil, pqarrow.NewArrowWriterProperties())
770+
assert.NoError(t, err)
771+
err = pqw.WriteBuffered(rec)
772+
assert.NoError(t, err)
773+
774+
})
775+
}
776+
}

0 commit comments

Comments
 (0)