Skip to content

Commit 37a8bf0

Browse files
authored
GH-39049: [C++] Use Cast() instead of CastTo() for Dictionary Scalar in test (#39362)
### Rationale for this change Remove legacy code ### What changes are included in this PR? Replace the legacy scalar CastTo implementation for Dictionary Scalar in test. ### Are these changes tested? Yes. It is passed by existing test cases. ### Are there any user-facing changes? No. * Closes: #39049 Authored-by: Hyunseok Seo <hsseo0501@gmail.com> Signed-off-by: Sutou Kouhei <kou@clear-code.com>
1 parent 1c1fa74 commit 37a8bf0

3 files changed

Lines changed: 25 additions & 12 deletions

File tree

cpp/src/arrow/compute/kernels/scalar_cast_dictionary.cc

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -36,14 +36,22 @@ Status CastToDictionary(KernelContext* ctx, const ExecSpan& batch, ExecResult* o
3636
const CastOptions& options = CastState::Get(ctx);
3737
const auto& out_type = checked_cast<const DictionaryType&>(*out->type());
3838

39+
std::shared_ptr<ArrayData> in_array = batch[0].array.ToArrayData();
40+
3941
// if out type is same as in type, return input
4042
if (out_type.Equals(*batch[0].type())) {
4143
/// XXX: This is the wrong place to do a zero-copy optimization
42-
out->value = batch[0].array.ToArrayData();
44+
out->value = in_array;
4345
return Status::OK();
4446
}
4547

46-
std::shared_ptr<ArrayData> in_array = batch[0].array.ToArrayData();
48+
// If the input type is STRING, it is first encoded as a dictionary to facilitate
49+
// processing. This approach allows the subsequent code to uniformly handle STRING
50+
// inputs as if they were originally provided in dictionary format. Encoding as a
51+
// dictionary helps in reusing the same logic for dictionary operations.
52+
if (batch[0].type()->id() == Type::STRING) {
53+
in_array = DictionaryEncode(in_array)->array();
54+
}
4755
const auto& in_type = checked_cast<const DictionaryType&>(*in_array->type);
4856

4957
ArrayData* out_array = out->array_data().get();
@@ -77,17 +85,21 @@ Status CastToDictionary(KernelContext* ctx, const ExecSpan& batch, ExecResult* o
7785
return Status::OK();
7886
}
7987

80-
std::vector<std::shared_ptr<CastFunction>> GetDictionaryCasts() {
81-
auto func = std::make_shared<CastFunction>("cast_dictionary", Type::DICTIONARY);
82-
83-
AddCommonCasts(Type::DICTIONARY, kOutputTargetType, func.get());
84-
ScalarKernel kernel({InputType(Type::DICTIONARY)}, kOutputTargetType, CastToDictionary);
88+
template <typename SrcType>
89+
void AddDictionaryCast(CastFunction* func) {
90+
ScalarKernel kernel({InputType(SrcType::type_id)}, kOutputTargetType, CastToDictionary);
8591
kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE;
8692
kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
93+
DCHECK_OK(func->AddKernel(SrcType::type_id, std::move(kernel)));
94+
}
8795

88-
DCHECK_OK(func->AddKernel(Type::DICTIONARY, std::move(kernel)));
96+
std::vector<std::shared_ptr<CastFunction>> GetDictionaryCasts() {
97+
auto cast_dict = std::make_shared<CastFunction>("cast_dictionary", Type::DICTIONARY);
98+
AddCommonCasts(Type::DICTIONARY, kOutputTargetType, cast_dict.get());
99+
AddDictionaryCast<DictionaryType>(cast_dict.get());
100+
AddDictionaryCast<StringType>(cast_dict.get());
89101

90-
return {func};
102+
return {cast_dict};
91103
}
92104

93105
} // namespace internal

cpp/src/arrow/dataset/partition_test.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -316,7 +316,7 @@ TEST_F(TestPartitioning, DirectoryPartitioningFormatDictionary) {
316316
ArrayVector{dictionary});
317317
written_schema_ = partitioning_->schema();
318318

319-
ASSERT_OK_AND_ASSIGN(auto dict_hello, MakeScalar("hello")->CastTo(DictStr("")->type()));
319+
ASSERT_OK_AND_ASSIGN(auto dict_hello, Cast(MakeScalar("hello"), DictStr("")->type()));
320320
AssertFormat(equal(field_ref("alpha"), literal(dict_hello)), "hello");
321321
}
322322

@@ -329,7 +329,7 @@ TEST_F(TestPartitioning, DirectoryPartitioningFormatDictionaryCustomIndex) {
329329
schema({field("alpha", dict_type)}), ArrayVector{dictionary});
330330
written_schema_ = partitioning_->schema();
331331

332-
ASSERT_OK_AND_ASSIGN(auto dict_hello, MakeScalar("hello")->CastTo(dict_type));
332+
ASSERT_OK_AND_ASSIGN(auto dict_hello, Cast(MakeScalar("hello"), dict_type));
333333
AssertFormat(equal(field_ref("alpha"), literal(dict_hello)), "hello");
334334
}
335335

cpp/src/arrow/scalar_test.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1490,7 +1490,8 @@ TEST(TestDictionaryScalar, Cast) {
14901490
auto alpha =
14911491
dict->IsValid(i) ? MakeScalar(dict->GetString(i)) : MakeNullScalar(utf8());
14921492
// Cast string to dict(..., string)
1493-
ASSERT_OK_AND_ASSIGN(auto cast_alpha, alpha->CastTo(ty));
1493+
ASSERT_OK_AND_ASSIGN(auto cast_alpha_datum, Cast(alpha, ty));
1494+
const auto& cast_alpha = cast_alpha_datum.scalar();
14941495
ASSERT_OK(cast_alpha->ValidateFull());
14951496
ASSERT_OK_AND_ASSIGN(
14961497
auto roundtripped_alpha,

0 commit comments

Comments
 (0)