Skip to content

Commit 26801f1

Browse files
authored
GH-39769: [C++][Device] Fix Importing nested and string types for DeviceArray (#39770)
### Rationale for this change In my testing with libcudf and other GPU data, I discovered a deficiency in ImportDeviceArray and thus ImportDeviceRecordBatch where the device type and memory manager aren't propagated to child importers and it fails to import offset-based types such as strings. ### What changes are included in this PR? These are relatively easily handled by first ensuring that `ImportChild` propagates the device_type and memory manager from the parent. Then for importing offset based values we merely need to use the memory manager to copy the final offset value to the CPU to use for the buffer size computation. This will work for any device which has implemented CopyBufferTo/From ### Are these changes tested? A new test is added to test these situations. * Closes: #39769 Authored-by: Matt Topol <zotthewizard@gmail.com> Signed-off-by: Matt Topol <zotthewizard@gmail.com>
1 parent de53aac commit 26801f1

3 files changed

Lines changed: 44 additions & 3 deletions

File tree

cpp/src/arrow/c/bridge.cc

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1543,6 +1543,8 @@ struct ArrayImporter {
15431543
if (recursion_level_ >= kMaxImportRecursionLevel) {
15441544
return Status::Invalid("Recursion level in ArrowArray struct exceeded");
15451545
}
1546+
device_type_ = parent->device_type_;
1547+
memory_mgr_ = parent->memory_mgr_;
15461548
// Child buffers will keep the entire parent import alive.
15471549
// Perhaps we can move the child structs to an owned area
15481550
// when the parent ImportedArrayData::Release() gets called,
@@ -1857,10 +1859,25 @@ struct ArrayImporter {
18571859
template <typename OffsetType>
18581860
Status ImportStringValuesBuffer(int32_t offsets_buffer_id, int32_t buffer_id,
18591861
int64_t byte_width = 1) {
1860-
auto offsets = data_->GetValues<OffsetType>(offsets_buffer_id);
1862+
if (device_type_ == DeviceAllocationType::kCPU) {
1863+
auto offsets = data_->GetValues<OffsetType>(offsets_buffer_id);
1864+
// Compute visible size of buffer
1865+
int64_t buffer_size =
1866+
(c_struct_->length > 0) ? byte_width * offsets[c_struct_->length] : 0;
1867+
return ImportBuffer(buffer_id, buffer_size);
1868+
}
1869+
1870+
// we only need the value of the last offset so let's just copy that
1871+
// one value from device to host.
1872+
auto single_value_buf =
1873+
SliceBuffer(data_->buffers[offsets_buffer_id],
1874+
c_struct_->length * sizeof(OffsetType), sizeof(OffsetType));
1875+
ARROW_ASSIGN_OR_RAISE(
1876+
auto cpubuf, Buffer::ViewOrCopy(single_value_buf, default_cpu_memory_manager()));
1877+
auto offsets = cpubuf->data_as<OffsetType>();
18611878
// Compute visible size of buffer
1862-
int64_t buffer_size =
1863-
(c_struct_->length > 0) ? byte_width * offsets[c_struct_->length] : 0;
1879+
int64_t buffer_size = (c_struct_->length > 0) ? byte_width * offsets[0] : 0;
1880+
18641881
return ImportBuffer(buffer_id, buffer_size);
18651882
}
18661883

cpp/src/arrow/c/bridge_test.cc

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4320,6 +4320,16 @@ TEST_F(TestDeviceArrayRoundtrip, Primitive) {
43204320
TestWithJSON(mm, int32(), "[4, 5, null]");
43214321
}
43224322

4323+
TEST_F(TestDeviceArrayRoundtrip, Struct) {
4324+
std::shared_ptr<Device> device = std::make_shared<MyDevice>(1);
4325+
auto mm = device->default_memory_manager();
4326+
auto type = struct_({field("ints", int16()), field("strs", utf8())});
4327+
4328+
TestWithJSON(mm, type, "[]");
4329+
TestWithJSON(mm, type, R"([[4, "foo"], [5, "bar"]])");
4330+
TestWithJSON(mm, type, R"([[4, null], null, [5, "foo"]])");
4331+
}
4332+
43234333
////////////////////////////////////////////////////////////////////////////
43244334
// Array stream export tests
43254335

cpp/src/arrow/device.cc

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,13 @@ Result<std::shared_ptr<Buffer>> CPUMemoryManager::ViewBufferFrom(
195195
if (!from->is_cpu()) {
196196
return nullptr;
197197
}
198+
// in this case the memory manager we're coming from is visible on the CPU,
199+
// but uses an allocation type other than CPU. Since we know the data is visible
200+
// to the CPU a "View" of this should use the CPUMemoryManager as the listed memory
201+
// manager.
202+
if (buf->device_type() != DeviceAllocationType::kCPU) {
203+
return std::make_shared<Buffer>(buf->address(), buf->size(), shared_from_this(), buf);
204+
}
198205
return buf;
199206
}
200207

@@ -220,6 +227,13 @@ Result<std::shared_ptr<Buffer>> CPUMemoryManager::ViewBufferTo(
220227
if (!to->is_cpu()) {
221228
return nullptr;
222229
}
230+
// in this case the memory manager we're coming from is visible on the CPU,
231+
// but uses an allocation type other than CPU. Since we know the data is visible
232+
// to the CPU a "View" of this should use the CPUMemoryManager as the listed memory
233+
// manager.
234+
if (buf->device_type() != DeviceAllocationType::kCPU) {
235+
return std::make_shared<Buffer>(buf->address(), buf->size(), to, buf);
236+
}
223237
return buf;
224238
}
225239

0 commit comments

Comments
 (0)