@@ -130,7 +130,8 @@ std::shared_ptr<ArrayData> ArrayData::Make(std::shared_ptr<DataType> type, int64
130130}
131131
132132std::shared_ptr<ArrayData> ArrayData::Slice (int64_t off, int64_t len) const {
133- ARROW_CHECK_LE (off, length) << " Slice offset greater than array length" ;
133+ ARROW_CHECK_LE (off, length) << " Slice offset (" << off
134+ << " ) greater than array length (" << length << " )" ;
134135 len = std::min (length - off, len);
135136 off += offset;
136137
@@ -228,22 +229,20 @@ void ArraySpan::SetMembers(const ArrayData& data) {
228229namespace {
229230
230231template <typename offset_type>
231- void SetOffsetsForScalar (ArraySpan* span, offset_type* buffer, int64_t value_size,
232- int buffer_index = 1 ) {
233- buffer[0 ] = 0 ;
234- buffer[1 ] = static_cast <offset_type>(value_size);
235- span->buffers [buffer_index].data = reinterpret_cast <uint8_t *>(buffer);
236- span->buffers [buffer_index].size = 2 * sizeof (offset_type);
232+ BufferSpan OffsetsForScalar (uint8_t * scratch_space, offset_type value_size) {
233+ auto * offsets = reinterpret_cast <offset_type*>(scratch_space);
234+ offsets[0 ] = 0 ;
235+ offsets[1 ] = static_cast <offset_type>(value_size);
236+ return {scratch_space, sizeof (offset_type) * 2 };
237237}
238238
239239int GetNumBuffers (const DataType& type) {
240240 switch (type.id ()) {
241241 case Type::NA:
242242 case Type::STRUCT:
243243 case Type::FIXED_SIZE_LIST:
244- return 1 ;
245244 case Type::RUN_END_ENCODED:
246- return 0 ;
245+ return 1 ;
247246 case Type::BINARY:
248247 case Type::LARGE_BINARY:
249248 case Type::STRING:
@@ -265,16 +264,19 @@ int GetNumBuffers(const DataType& type) {
265264namespace internal {
266265
267266void FillZeroLengthArray (const DataType* type, ArraySpan* span) {
268- memset (span->scratch_space , 0x00 , sizeof (span->scratch_space ));
269-
270267 span->type = type;
271268 span->length = 0 ;
272269 int num_buffers = GetNumBuffers (*type);
273270 for (int i = 0 ; i < num_buffers; ++i) {
274- span->buffers [i].data = reinterpret_cast <uint8_t *>(span->scratch_space );
271+ alignas (int64_t ) static std::array<uint8_t , sizeof (int64_t ) * 2 > kZeros {0 };
272+ span->buffers [i].data = kZeros .data ();
275273 span->buffers [i].size = 0 ;
276274 }
277275
276+ if (!HasValidityBitmap (type->id ())) {
277+ span->buffers [0 ] = {};
278+ }
279+
278280 for (int i = num_buffers; i < 3 ; ++i) {
279281 span->buffers [i] = {};
280282 }
@@ -304,9 +306,13 @@ void ArraySpan::FillFromScalar(const Scalar& value) {
304306
305307 Type::type type_id = value.type ->id ();
306308
307- // Populate null count and validity bitmap (only for non-union/null types)
308- this ->null_count = value.is_valid ? 0 : 1 ;
309- if (!is_union (type_id) && type_id != Type::NA) {
309+ if (type_id == Type::NA) {
310+ this ->null_count = 1 ;
311+ } else if (!internal::HasValidityBitmap (type_id)) {
312+ this ->null_count = 0 ;
313+ } else {
314+ // Populate null count and validity bitmap
315+ this ->null_count = value.is_valid ? 0 : 1 ;
310316 this ->buffers [0 ].data = value.is_valid ? &kTrueBit : &kFalseBit ;
311317 this ->buffers [0 ].size = 1 ;
312318 }
@@ -329,20 +335,19 @@ void ArraySpan::FillFromScalar(const Scalar& value) {
329335 }
330336 } else if (is_base_binary_like (type_id)) {
331337 const auto & scalar = checked_cast<const BaseBinaryScalar&>(value);
332- this -> buffers [ 1 ]. data = reinterpret_cast < uint8_t *>( this -> scratch_space );
338+
333339 const uint8_t * data_buffer = nullptr ;
334340 int64_t data_size = 0 ;
335341 if (scalar.is_valid ) {
336342 data_buffer = scalar.value ->data ();
337343 data_size = scalar.value ->size ();
338344 }
339345 if (is_binary_like (type_id)) {
340- SetOffsetsForScalar< int32_t >( this , reinterpret_cast < int32_t *>( this -> scratch_space ),
341- data_size);
346+ this -> buffers [ 1 ] =
347+ OffsetsForScalar (scalar. scratch_space_ , static_cast < int32_t >( data_size) );
342348 } else {
343349 // is_large_binary_like
344- SetOffsetsForScalar<int64_t >(this , reinterpret_cast <int64_t *>(this ->scratch_space ),
345- data_size);
350+ this ->buffers [1 ] = OffsetsForScalar (scalar.scratch_space_ , data_size);
346351 }
347352 this ->buffers [2 ].data = const_cast <uint8_t *>(data_buffer);
348353 this ->buffers [2 ].size = data_size;
@@ -367,11 +372,10 @@ void ArraySpan::FillFromScalar(const Scalar& value) {
367372 }
368373
369374 if (type_id == Type::LIST || type_id == Type::MAP) {
370- SetOffsetsForScalar< int32_t >( this , reinterpret_cast < int32_t *>( this -> scratch_space ),
371- value_length);
375+ this -> buffers [ 1 ] =
376+ OffsetsForScalar (scalar. scratch_space_ , static_cast < int32_t >( value_length) );
372377 } else if (type_id == Type::LARGE_LIST) {
373- SetOffsetsForScalar<int64_t >(this , reinterpret_cast <int64_t *>(this ->scratch_space ),
374- value_length);
378+ this ->buffers [1 ] = OffsetsForScalar (scalar.scratch_space_ , value_length);
375379 } else {
376380 // FIXED_SIZE_LIST: does not have a second buffer
377381 this ->buffers [1 ] = {};
@@ -384,26 +388,31 @@ void ArraySpan::FillFromScalar(const Scalar& value) {
384388 this ->child_data [i].FillFromScalar (*scalar.value [i]);
385389 }
386390 } else if (is_union (type_id)) {
391+ // Dense union needs scratch space to store both offsets and a type code
392+ struct UnionScratchSpace {
393+ alignas (int64_t ) int8_t type_code;
394+ alignas (int64_t ) uint8_t offsets[sizeof (int32_t ) * 2 ];
395+ };
396+ static_assert (sizeof (UnionScratchSpace) <= sizeof (UnionScalar::scratch_space_));
397+ auto * union_scratch_space = reinterpret_cast <UnionScratchSpace*>(
398+ &checked_cast<const UnionScalar&>(value).scratch_space_ );
399+
387400 // First buffer is kept null since unions have no validity vector
388401 this ->buffers [0 ] = {};
389402
390- this ->buffers [1 ].data = reinterpret_cast <uint8_t *>(this ->scratch_space );
403+ union_scratch_space->type_code = checked_cast<const UnionScalar&>(value).type_code ;
404+ this ->buffers [1 ].data = reinterpret_cast <uint8_t *>(&union_scratch_space->type_code );
391405 this ->buffers [1 ].size = 1 ;
392- int8_t * type_codes = reinterpret_cast <int8_t *>(this ->scratch_space );
393- type_codes[0 ] = checked_cast<const UnionScalar&>(value).type_code ;
394406
395407 this ->child_data .resize (this ->type ->num_fields ());
396408 if (type_id == Type::DENSE_UNION) {
397409 const auto & scalar = checked_cast<const DenseUnionScalar&>(value);
398- // Has offset; start 4 bytes in so it's aligned to a 32-bit boundaries
399- SetOffsetsForScalar<int32_t >(this ,
400- reinterpret_cast <int32_t *>(this ->scratch_space ) + 1 , 1 ,
401- /* buffer_index=*/ 2 );
410+ this ->buffers [2 ] =
411+ OffsetsForScalar (union_scratch_space->offsets , static_cast <int32_t >(1 ));
402412 // We can't "see" the other arrays in the union, but we put the "active"
403413 // union array in the right place and fill zero-length arrays for the
404414 // others
405- const std::vector<int >& child_ids =
406- checked_cast<const UnionType*>(this ->type )->child_ids ();
415+ const auto & child_ids = checked_cast<const UnionType*>(this ->type )->child_ids ();
407416 DCHECK_GE (scalar.type_code , 0 );
408417 DCHECK_LT (scalar.type_code , static_cast <int >(child_ids.size ()));
409418 for (int i = 0 ; i < static_cast <int >(this ->child_data .size ()); ++i) {
@@ -429,6 +438,32 @@ void ArraySpan::FillFromScalar(const Scalar& value) {
429438
430439 // Restore the extension type
431440 this ->type = value.type .get ();
441+ } else if (type_id == Type::RUN_END_ENCODED) {
442+ const auto & scalar = checked_cast<const RunEndEncodedScalar&>(value);
443+ this ->child_data .resize (2 );
444+
445+ auto set_run_end = [&](auto run_end) {
446+ auto & e = this ->child_data [0 ];
447+ e.type = scalar.run_end_type ().get ();
448+ e.length = 1 ;
449+ e.null_count = 0 ;
450+ e.buffers [1 ].data = scalar.scratch_space_ ;
451+ e.buffers [1 ].size = sizeof (run_end);
452+ reinterpret_cast <decltype (run_end)*>(scalar.scratch_space_ )[0 ] = run_end;
453+ };
454+
455+ switch (scalar.run_end_type ()->id ()) {
456+ case Type::INT16:
457+ set_run_end (static_cast <int16_t >(1 ));
458+ break ;
459+ case Type::INT32:
460+ set_run_end (static_cast <int32_t >(1 ));
461+ break ;
462+ default :
463+ DCHECK_EQ (scalar.run_end_type ()->id (), Type::INT64);
464+ set_run_end (static_cast <int64_t >(1 ));
465+ }
466+ this ->child_data [1 ].FillFromScalar (*scalar.value );
432467 } else {
433468 DCHECK_EQ (Type::NA, type_id) << " should be unreachable: " << *value.type ;
434469 }
0 commit comments