Skip to content

Commit c5f60a0

Browse files
Cerdorepitrou
andauthored
GH-39864: [C++] DataType::ToString support optionally show metadata (#39888)
### Rationale for this change Support showing metadata of nested DataType which have child fields. ### What changes are included in this PR? Add an optional argument "show_metadata" to the ToString() of DataType and other DataType derived class. And we also add it to TypeHolder::ToString(). ### Are these changes tested? Yes, I add tests for changes. ### Are there any user-facing changes? No. Closes: #39864 * Closes: #39864 Lead-authored-by: xiansen.chen <xiansen.chen@openpie.com> Co-authored-by: Antoine Pitrou <antoine@python.org> Signed-off-by: Antoine Pitrou <antoine@python.org>
1 parent 40a8a68 commit c5f60a0

12 files changed

Lines changed: 140 additions & 163 deletions

File tree

cpp/gdb_arrow.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -956,10 +956,12 @@ def storage_type(self):
956956

957957
def to_string(self):
958958
"""
959-
The result of calling ToString().
959+
The result of calling ToString(show_metadata=True).
960960
"""
961+
# XXX `show_metadata` is an optional argument, but gdb doesn't allow
962+
# omitting it.
961963
return StdString(gdb.parse_and_eval(
962-
f"{for_evaluation(self.val)}.ToString()"))
964+
f"{for_evaluation(self.val)}.ToString(true)"))
963965

964966

965967
class Schema:

cpp/src/arrow/engine/simple_extension_type_internal.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,9 @@ class SimpleExtensionType : public ExtensionType {
7070

7171
std::string extension_name() const override { return std::string(kExtensionName); }
7272

73-
std::string ToString() const override { return "extension<" + this->Serialize() + ">"; }
74-
73+
std::string ToString(bool show_metadata = false) const override {
74+
return "extension<" + this->Serialize() + ">";
75+
}
7576
/// \brief A comparator which returns true iff all parameter properties are equal
7677
struct ExtensionEqualsImpl {
7778
ExtensionEqualsImpl(const Params& l, const Params& r) : left_(l), right_(r) {

cpp/src/arrow/extension/fixed_shape_tensor.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,10 +108,10 @@ bool FixedShapeTensorType::ExtensionEquals(const ExtensionType& other) const {
108108
permutation_equivalent;
109109
}
110110

111-
std::string FixedShapeTensorType::ToString() const {
111+
std::string FixedShapeTensorType::ToString(bool show_metadata) const {
112112
std::stringstream ss;
113113
ss << "extension<" << this->extension_name()
114-
<< "[value_type=" << value_type_->ToString()
114+
<< "[value_type=" << value_type_->ToString(show_metadata)
115115
<< ", shape=" << ::arrow::internal::PrintVector{shape_, ","};
116116

117117
if (!permutation_.empty()) {

cpp/src/arrow/extension/fixed_shape_tensor.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ class ARROW_EXPORT FixedShapeTensorType : public ExtensionType {
6161
dim_names_(dim_names) {}
6262

6363
std::string extension_name() const override { return "arrow.fixed_shape_tensor"; }
64-
std::string ToString() const override;
64+
std::string ToString(bool show_metadata = false) const override;
6565

6666
/// Number of dimensions of tensor elements
6767
size_t ndim() const { return shape_.size(); }

cpp/src/arrow/extension_type.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ using internal::checked_cast;
4141

4242
DataTypeLayout ExtensionType::layout() const { return storage_type_->layout(); }
4343

44-
std::string ExtensionType::ToString() const {
44+
std::string ExtensionType::ToString(bool show_metadata) const {
4545
std::stringstream ss;
4646
ss << "extension<" << this->extension_name() << ">";
4747
return ss.str();

cpp/src/arrow/extension_type.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ class ARROW_EXPORT ExtensionType : public DataType {
5050

5151
DataTypeLayout layout() const override;
5252

53-
std::string ToString() const override;
53+
std::string ToString(bool show_metadata = false) const override;
5454

5555
std::string name() const override { return "extension"; }
5656

cpp/src/arrow/testing/gtest_util.cc

Lines changed: 8 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -232,21 +232,12 @@ void AssertBufferEqual(const Buffer& buffer, const Buffer& expected) {
232232
ASSERT_TRUE(buffer.Equals(expected));
233233
}
234234

235-
template <typename T>
236-
std::string ToStringWithMetadata(const T& t, bool show_metadata) {
237-
return t.ToString(show_metadata);
238-
}
239-
240-
std::string ToStringWithMetadata(const DataType& t, bool show_metadata) {
241-
return t.ToString();
242-
}
243-
244235
template <typename T>
245236
void AssertFingerprintablesEqual(const T& left, const T& right, bool check_metadata,
246237
const char* types_plural) {
247238
ASSERT_TRUE(left.Equals(right, check_metadata))
248-
<< types_plural << " '" << ToStringWithMetadata(left, check_metadata) << "' and '"
249-
<< ToStringWithMetadata(right, check_metadata) << "' should have compared equal";
239+
<< types_plural << " '" << left.ToString(check_metadata) << "' and '"
240+
<< right.ToString(check_metadata) << "' should have compared equal";
250241
auto lfp = left.fingerprint();
251242
auto rfp = right.fingerprint();
252243
// Note: all types tested in this file should implement fingerprinting,
@@ -256,9 +247,8 @@ void AssertFingerprintablesEqual(const T& left, const T& right, bool check_metad
256247
rfp += right.metadata_fingerprint();
257248
}
258249
ASSERT_EQ(lfp, rfp) << "Fingerprints for " << types_plural << " '"
259-
<< ToStringWithMetadata(left, check_metadata) << "' and '"
260-
<< ToStringWithMetadata(right, check_metadata)
261-
<< "' should have compared equal";
250+
<< left.ToString(check_metadata) << "' and '"
251+
<< right.ToString(check_metadata) << "' should have compared equal";
262252
}
263253

264254
template <typename T>
@@ -274,8 +264,8 @@ template <typename T>
274264
void AssertFingerprintablesNotEqual(const T& left, const T& right, bool check_metadata,
275265
const char* types_plural) {
276266
ASSERT_FALSE(left.Equals(right, check_metadata))
277-
<< types_plural << " '" << ToStringWithMetadata(left, check_metadata) << "' and '"
278-
<< ToStringWithMetadata(right, check_metadata) << "' should have compared unequal";
267+
<< types_plural << " '" << left.ToString(check_metadata) << "' and '"
268+
<< right.ToString(check_metadata) << "' should have compared unequal";
279269
auto lfp = left.fingerprint();
280270
auto rfp = right.fingerprint();
281271
// Note: all types tested in this file should implement fingerprinting,
@@ -286,8 +276,8 @@ void AssertFingerprintablesNotEqual(const T& left, const T& right, bool check_me
286276
rfp += right.metadata_fingerprint();
287277
}
288278
ASSERT_NE(lfp, rfp) << "Fingerprints for " << types_plural << " '"
289-
<< ToStringWithMetadata(left, check_metadata) << "' and '"
290-
<< ToStringWithMetadata(right, check_metadata)
279+
<< left.ToString(check_metadata) << "' and '"
280+
<< right.ToString(check_metadata)
291281
<< "' should have compared unequal";
292282
}
293283
}

cpp/src/arrow/type.cc

Lines changed: 48 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -874,7 +874,7 @@ bool Field::IsCompatibleWith(const std::shared_ptr<Field>& other) const {
874874

875875
std::string Field::ToString(bool show_metadata) const {
876876
std::stringstream ss;
877-
ss << name_ << ": " << type_->ToString();
877+
ss << name_ << ": " << type_->ToString(show_metadata);
878878
if (!nullable_) {
879879
ss << " not null";
880880
}
@@ -919,14 +919,15 @@ std::ostream& operator<<(std::ostream& os, const TypeHolder& type) {
919919
// ----------------------------------------------------------------------
920920
// TypeHolder
921921

922-
std::string TypeHolder::ToString(const std::vector<TypeHolder>& types) {
922+
std::string TypeHolder::ToString(const std::vector<TypeHolder>& types,
923+
bool show_metadata) {
923924
std::stringstream ss;
924925
ss << "(";
925926
for (size_t i = 0; i < types.size(); ++i) {
926927
if (i > 0) {
927928
ss << ", ";
928929
}
929-
ss << types[i].type->ToString();
930+
ss << types[i].type->ToString(show_metadata);
930931
}
931932
ss << ")";
932933
return ss.str();
@@ -984,27 +985,27 @@ BaseBinaryType::~BaseBinaryType() {}
984985

985986
BaseListType::~BaseListType() {}
986987

987-
std::string ListType::ToString() const {
988+
std::string ListType::ToString(bool show_metadata) const {
988989
std::stringstream s;
989-
s << "list<" << value_field()->ToString() << ">";
990+
s << "list<" << value_field()->ToString(show_metadata) << ">";
990991
return s.str();
991992
}
992993

993-
std::string LargeListType::ToString() const {
994+
std::string LargeListType::ToString(bool show_metadata) const {
994995
std::stringstream s;
995-
s << "large_list<" << value_field()->ToString() << ">";
996+
s << "large_list<" << value_field()->ToString(show_metadata) << ">";
996997
return s.str();
997998
}
998999

999-
std::string ListViewType::ToString() const {
1000+
std::string ListViewType::ToString(bool show_metadata) const {
10001001
std::stringstream s;
1001-
s << "list_view<" << value_field()->ToString() << ">";
1002+
s << "list_view<" << value_field()->ToString(show_metadata) << ">";
10021003
return s.str();
10031004
}
10041005

1005-
std::string LargeListViewType::ToString() const {
1006+
std::string LargeListViewType::ToString(bool show_metadata) const {
10061007
std::stringstream s;
1007-
s << "large_list_view<" << value_field()->ToString() << ">";
1008+
s << "large_list_view<" << value_field()->ToString(show_metadata) << ">";
10081009
return s.str();
10091010
}
10101011

@@ -1047,7 +1048,7 @@ Result<std::shared_ptr<DataType>> MapType::Make(std::shared_ptr<Field> value_fie
10471048
return std::make_shared<MapType>(std::move(value_field), keys_sorted);
10481049
}
10491050

1050-
std::string MapType::ToString() const {
1051+
std::string MapType::ToString(bool show_metadata) const {
10511052
std::stringstream s;
10521053

10531054
const auto print_field_name = [](std::ostream& os, const Field& field,
@@ -1058,7 +1059,7 @@ std::string MapType::ToString() const {
10581059
};
10591060
const auto print_field = [&](std::ostream& os, const Field& field,
10601061
const char* std_name) {
1061-
os << field.type()->ToString();
1062+
os << field.type()->ToString(show_metadata);
10621063
print_field_name(os, field, std_name);
10631064
};
10641065

@@ -1074,23 +1075,24 @@ std::string MapType::ToString() const {
10741075
return s.str();
10751076
}
10761077

1077-
std::string FixedSizeListType::ToString() const {
1078+
std::string FixedSizeListType::ToString(bool show_metadata) const {
10781079
std::stringstream s;
1079-
s << "fixed_size_list<" << value_field()->ToString() << ">[" << list_size_ << "]";
1080+
s << "fixed_size_list<" << value_field()->ToString(show_metadata) << ">[" << list_size_
1081+
<< "]";
10801082
return s.str();
10811083
}
10821084

1083-
std::string BinaryType::ToString() const { return "binary"; }
1085+
std::string BinaryType::ToString(bool show_metadata) const { return "binary"; }
10841086

1085-
std::string BinaryViewType::ToString() const { return "binary_view"; }
1087+
std::string BinaryViewType::ToString(bool show_metadata) const { return "binary_view"; }
10861088

1087-
std::string LargeBinaryType::ToString() const { return "large_binary"; }
1089+
std::string LargeBinaryType::ToString(bool show_metadata) const { return "large_binary"; }
10881090

1089-
std::string StringType::ToString() const { return "string"; }
1091+
std::string StringType::ToString(bool show_metadata) const { return "string"; }
10901092

1091-
std::string StringViewType::ToString() const { return "string_view"; }
1093+
std::string StringViewType::ToString(bool show_metadata) const { return "string_view"; }
10921094

1093-
std::string LargeStringType::ToString() const { return "large_string"; }
1095+
std::string LargeStringType::ToString(bool show_metadata) const { return "large_string"; }
10941096

10951097
int FixedSizeBinaryType::bit_width() const { return CHAR_BIT * byte_width(); }
10961098

@@ -1105,7 +1107,7 @@ Result<std::shared_ptr<DataType>> FixedSizeBinaryType::Make(int32_t byte_width)
11051107
return std::make_shared<FixedSizeBinaryType>(byte_width);
11061108
}
11071109

1108-
std::string FixedSizeBinaryType::ToString() const {
1110+
std::string FixedSizeBinaryType::ToString(bool show_metadata) const {
11091111
std::stringstream ss;
11101112
ss << "fixed_size_binary[" << byte_width_ << "]";
11111113
return ss.str();
@@ -1122,9 +1124,13 @@ Date32Type::Date32Type() : DateType(Type::DATE32) {}
11221124

11231125
Date64Type::Date64Type() : DateType(Type::DATE64) {}
11241126

1125-
std::string Date64Type::ToString() const { return std::string("date64[ms]"); }
1127+
std::string Date64Type::ToString(bool show_metadata) const {
1128+
return std::string("date64[ms]");
1129+
}
11261130

1127-
std::string Date32Type::ToString() const { return std::string("date32[day]"); }
1131+
std::string Date32Type::ToString(bool show_metadata) const {
1132+
return std::string("date32[day]");
1133+
}
11281134

11291135
// ----------------------------------------------------------------------
11301136
// Time types
@@ -1137,7 +1143,7 @@ Time32Type::Time32Type(TimeUnit::type unit) : TimeType(Type::TIME32, unit) {
11371143
<< "Must be seconds or milliseconds";
11381144
}
11391145

1140-
std::string Time32Type::ToString() const {
1146+
std::string Time32Type::ToString(bool show_metadata) const {
11411147
std::stringstream ss;
11421148
ss << "time32[" << this->unit_ << "]";
11431149
return ss.str();
@@ -1148,7 +1154,7 @@ Time64Type::Time64Type(TimeUnit::type unit) : TimeType(Type::TIME64, unit) {
11481154
<< "Must be microseconds or nanoseconds";
11491155
}
11501156

1151-
std::string Time64Type::ToString() const {
1157+
std::string Time64Type::ToString(bool show_metadata) const {
11521158
std::stringstream ss;
11531159
ss << "time64[" << this->unit_ << "]";
11541160
return ss.str();
@@ -1175,7 +1181,7 @@ std::ostream& operator<<(std::ostream& os, TimeUnit::type unit) {
11751181
// ----------------------------------------------------------------------
11761182
// Timestamp types
11771183

1178-
std::string TimestampType::ToString() const {
1184+
std::string TimestampType::ToString(bool show_metadata) const {
11791185
std::stringstream ss;
11801186
ss << "timestamp[" << this->unit_;
11811187
if (this->timezone_.size() > 0) {
@@ -1186,7 +1192,7 @@ std::string TimestampType::ToString() const {
11861192
}
11871193

11881194
// Duration types
1189-
std::string DurationType::ToString() const {
1195+
std::string DurationType::ToString(bool show_metadata) const {
11901196
std::stringstream ss;
11911197
ss << "duration[" << this->unit_ << "]";
11921198
return ss.str();
@@ -1245,7 +1251,7 @@ uint8_t UnionType::max_type_code() const {
12451251
: *std::max_element(type_codes_.begin(), type_codes_.end());
12461252
}
12471253

1248-
std::string UnionType::ToString() const {
1254+
std::string UnionType::ToString(bool show_metadata) const {
12491255
std::stringstream s;
12501256

12511257
s << name() << "<";
@@ -1254,7 +1260,7 @@ std::string UnionType::ToString() const {
12541260
if (i) {
12551261
s << ", ";
12561262
}
1257-
s << children_[i]->ToString() << "=" << static_cast<int>(type_codes_[i]);
1263+
s << children_[i]->ToString(show_metadata) << "=" << static_cast<int>(type_codes_[i]);
12581264
}
12591265
s << ">";
12601266
return s.str();
@@ -1291,10 +1297,10 @@ RunEndEncodedType::RunEndEncodedType(std::shared_ptr<DataType> run_end_type,
12911297

12921298
RunEndEncodedType::~RunEndEncodedType() = default;
12931299

1294-
std::string RunEndEncodedType::ToString() const {
1300+
std::string RunEndEncodedType::ToString(bool show_metadata) const {
12951301
std::stringstream s;
1296-
s << name() << "<run_ends: " << run_end_type()->ToString()
1297-
<< ", values: " << value_type()->ToString() << ">";
1302+
s << name() << "<run_ends: " << run_end_type()->ToString(show_metadata)
1303+
<< ", values: " << value_type()->ToString(show_metadata) << ">";
12981304
return s.str();
12991305
}
13001306

@@ -1350,15 +1356,15 @@ StructType::StructType(const FieldVector& fields)
13501356

13511357
StructType::~StructType() {}
13521358

1353-
std::string StructType::ToString() const {
1359+
std::string StructType::ToString(bool show_metadata) const {
13541360
std::stringstream s;
13551361
s << "struct<";
13561362
for (int i = 0; i < this->num_fields(); ++i) {
13571363
if (i > 0) {
13581364
s << ", ";
13591365
}
13601366
std::shared_ptr<Field> field = this->field(i);
1361-
s << field->ToString();
1367+
s << field->ToString(show_metadata);
13621368
}
13631369
s << ">";
13641370
return s.str();
@@ -1523,17 +1529,18 @@ DataTypeLayout DictionaryType::layout() const {
15231529
return layout;
15241530
}
15251531

1526-
std::string DictionaryType::ToString() const {
1532+
std::string DictionaryType::ToString(bool show_metadata) const {
15271533
std::stringstream ss;
1528-
ss << this->name() << "<values=" << value_type_->ToString()
1529-
<< ", indices=" << index_type_->ToString() << ", ordered=" << ordered_ << ">";
1534+
ss << this->name() << "<values=" << value_type_->ToString(show_metadata)
1535+
<< ", indices=" << index_type_->ToString(show_metadata) << ", ordered=" << ordered_
1536+
<< ">";
15301537
return ss.str();
15311538
}
15321539

15331540
// ----------------------------------------------------------------------
15341541
// Null type
15351542

1536-
std::string NullType::ToString() const { return name(); }
1543+
std::string NullType::ToString(bool show_metadata) const { return name(); }
15371544

15381545
// ----------------------------------------------------------------------
15391546
// FieldPath
@@ -3304,13 +3311,13 @@ std::shared_ptr<DataType> decimal256(int32_t precision, int32_t scale) {
33043311
return std::make_shared<Decimal256Type>(precision, scale);
33053312
}
33063313

3307-
std::string Decimal128Type::ToString() const {
3314+
std::string Decimal128Type::ToString(bool show_metadata) const {
33083315
std::stringstream s;
33093316
s << "decimal128(" << precision_ << ", " << scale_ << ")";
33103317
return s.str();
33113318
}
33123319

3313-
std::string Decimal256Type::ToString() const {
3320+
std::string Decimal256Type::ToString(bool show_metadata) const {
33143321
std::stringstream s;
33153322
s << "decimal256(" << precision_ << ", " << scale_ << ")";
33163323
return s.str();

0 commit comments

Comments
 (0)