Skip to content

Commit 87d1889

Browse files
authored
ARROW-16977: [R] Update dataset row counting so no integer overflow on large datasets (#13514)
Follow up to #13482 after this one was missed. Authored-by: Nic Crane <thisisnic@gmail.com> Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
1 parent 5d86e9f commit 87d1889

File tree

16 files changed

+113
-80
lines changed

16 files changed

+113
-80
lines changed

r/NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -388,6 +388,7 @@ importFrom(purrr,map)
388388
importFrom(purrr,map2)
389389
importFrom(purrr,map2_chr)
390390
importFrom(purrr,map_chr)
391+
importFrom(purrr,map_dbl)
391392
importFrom(purrr,map_dfr)
392393
importFrom(purrr,map_int)
393394
importFrom(purrr,map_lgl)

r/R/arrow-package.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
#' @importFrom stats quantile median na.omit na.exclude na.pass na.fail
1919
#' @importFrom R6 R6Class
20-
#' @importFrom purrr as_mapper map map2 map_chr map2_chr map_dfr map_int map_lgl keep imap imap_chr flatten
20+
#' @importFrom purrr as_mapper map map2 map_chr map2_chr map_dbl map_dfr map_int map_lgl keep imap imap_chr flatten
2121
#' @importFrom assertthat assert_that is.string
2222
#' @importFrom rlang list2 %||% is_false abort dots_n warn enquo quo_is_null enquos is_integerish quos
2323
#' @importFrom rlang eval_tidy new_data_mask syms env new_environment env_bind set_names exec

r/R/record-batch.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -196,8 +196,8 @@ rbind.RecordBatch <- function(...) {
196196
}
197197

198198
cbind_check_length <- function(inputs, call = caller_env()) {
199-
sizes <- map_int(inputs, NROW)
200-
ok_lengths <- sizes %in% c(head(sizes, 1), 1L)
199+
sizes <- map_dbl(inputs, NROW)
200+
ok_lengths <- sizes %in% c(head(sizes, 1), 1)
201201
if (!all(ok_lengths)) {
202202
first_bad_one <- which.min(ok_lengths)
203203
abort(

r/R/util.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ as_writable_table <- function(x) {
158158
#' @keywords internal
159159
recycle_scalars <- function(arrays) {
160160
# Get lengths of items in arrays
161-
arr_lens <- map_int(arrays, NROW)
161+
arr_lens <- map_dbl(arrays, NROW)
162162

163163
is_scalar <- arr_lens == 1
164164

r/src/array.cpp

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -244,15 +244,15 @@ int32_t ListArray__value_length(const std::shared_ptr<arrow::ListArray>& array,
244244
}
245245

246246
// [[arrow::export]]
247-
int64_t LargeListArray__value_length(const std::shared_ptr<arrow::LargeListArray>& array,
248-
int64_t i) {
249-
return array->value_length(i);
247+
r_vec_size LargeListArray__value_length(
248+
const std::shared_ptr<arrow::LargeListArray>& array, int64_t i) {
249+
return r_vec_size(array->value_length(i));
250250
}
251251

252252
// [[arrow::export]]
253-
int64_t FixedSizeListArray__value_length(
253+
r_vec_size FixedSizeListArray__value_length(
254254
const std::shared_ptr<arrow::FixedSizeListArray>& array, int64_t i) {
255-
return array->value_length(i);
255+
return r_vec_size(array->value_length(i));
256256
}
257257

258258
// [[arrow::export]]
@@ -262,15 +262,15 @@ int32_t ListArray__value_offset(const std::shared_ptr<arrow::ListArray>& array,
262262
}
263263

264264
// [[arrow::export]]
265-
int64_t LargeListArray__value_offset(const std::shared_ptr<arrow::LargeListArray>& array,
266-
int64_t i) {
267-
return array->value_offset(i);
265+
r_vec_size LargeListArray__value_offset(
266+
const std::shared_ptr<arrow::LargeListArray>& array, int64_t i) {
267+
return r_vec_size(array->value_offset(i));
268268
}
269269

270270
// [[arrow::export]]
271-
int64_t FixedSizeListArray__value_offset(
271+
r_vec_size FixedSizeListArray__value_offset(
272272
const std::shared_ptr<arrow::FixedSizeListArray>& array, int64_t i) {
273-
return array->value_offset(i);
273+
return r_vec_size(array->value_offset(i));
274274
}
275275

276276
// [[arrow::export]]
@@ -319,8 +319,8 @@ bool Array__Same(const std::shared_ptr<arrow::Array>& x,
319319
}
320320

321321
// [[arrow::export]]
322-
int64_t Array__ReferencedBufferSize(const std::shared_ptr<arrow::Array>& x) {
323-
return ValueOrStop(arrow::util::ReferencedBufferSize(*x));
322+
r_vec_size Array__ReferencedBufferSize(const std::shared_ptr<arrow::Array>& x) {
323+
return r_vec_size(ValueOrStop(arrow::util::ReferencedBufferSize(*x)));
324324
}
325325

326326
// [[arrow::export]]

r/src/arrowExports.cpp

Lines changed: 25 additions & 25 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

r/src/buffer.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,13 @@ void Buffer__ZeroPadding(const std::shared_ptr<arrow::Buffer>& buffer) {
2828
}
2929

3030
// [[arrow::export]]
31-
int64_t Buffer__capacity(const std::shared_ptr<arrow::Buffer>& buffer) {
32-
return buffer->capacity();
31+
r_vec_size Buffer__capacity(const std::shared_ptr<arrow::Buffer>& buffer) {
32+
return r_vec_size(buffer->capacity());
3333
}
3434

3535
// [[arrow::export]]
36-
int64_t Buffer__size(const std::shared_ptr<arrow::Buffer>& buffer) {
37-
return buffer->size();
36+
r_vec_size Buffer__size(const std::shared_ptr<arrow::Buffer>& buffer) {
37+
return r_vec_size(buffer->size());
3838
}
3939

4040
// [[arrow::export]]

r/src/chunkedarray.cpp

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,18 +22,21 @@
2222
#include <arrow/util/byte_size.h>
2323

2424
// [[arrow::export]]
25-
int ChunkedArray__length(const std::shared_ptr<arrow::ChunkedArray>& chunked_array) {
26-
return chunked_array->length();
25+
r_vec_size ChunkedArray__length(
26+
const std::shared_ptr<arrow::ChunkedArray>& chunked_array) {
27+
return r_vec_size(chunked_array->length());
2728
}
2829

2930
// [[arrow::export]]
30-
int ChunkedArray__null_count(const std::shared_ptr<arrow::ChunkedArray>& chunked_array) {
31-
return chunked_array->null_count();
31+
r_vec_size ChunkedArray__null_count(
32+
const std::shared_ptr<arrow::ChunkedArray>& chunked_array) {
33+
return r_vec_size(chunked_array->null_count());
3234
}
3335

3436
// [[arrow::export]]
35-
int ChunkedArray__num_chunks(const std::shared_ptr<arrow::ChunkedArray>& chunked_array) {
36-
return chunked_array->num_chunks();
37+
r_vec_size ChunkedArray__num_chunks(
38+
const std::shared_ptr<arrow::ChunkedArray>& chunked_array) {
39+
return r_vec_size(chunked_array->num_chunks());
3740
}
3841

3942
// [[arrow::export]]
@@ -144,7 +147,7 @@ std::shared_ptr<arrow::ChunkedArray> ChunkedArray__from_list(cpp11::list chunks,
144147
}
145148

146149
// [[arrow::export]]
147-
int64_t ChunkedArray__ReferencedBufferSize(
150+
r_vec_size ChunkedArray__ReferencedBufferSize(
148151
const std::shared_ptr<arrow::ChunkedArray>& chunked_array) {
149-
return ValueOrStop(arrow::util::ReferencedBufferSize(*chunked_array));
152+
return r_vec_size(ValueOrStop(arrow::util::ReferencedBufferSize(*chunked_array)));
150153
}

r/src/dataset.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -511,8 +511,8 @@ std::shared_ptr<arrow::Table> dataset___Scanner__TakeRows(
511511
}
512512

513513
// [[dataset::export]]
514-
int64_t dataset___Scanner__CountRows(const std::shared_ptr<ds::Scanner>& scanner) {
515-
return ValueOrStop(scanner->CountRows());
514+
r_vec_size dataset___Scanner__CountRows(const std::shared_ptr<ds::Scanner>& scanner) {
515+
return r_vec_size(ValueOrStop(scanner->CountRows()));
516516
}
517517

518518
#endif

r/src/filesystem.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,9 @@ void fs___FileInfo__set_path(const std::shared_ptr<fs::FileInfo>& x,
7171
}
7272

7373
// [[arrow::export]]
74-
int64_t fs___FileInfo__size(const std::shared_ptr<fs::FileInfo>& x) { return x->size(); }
74+
r_vec_size fs___FileInfo__size(const std::shared_ptr<fs::FileInfo>& x) {
75+
return r_vec_size(x->size());
76+
}
7577

7678
// [[arrow::export]]
7779
void fs___FileInfo__set_size(const std::shared_ptr<fs::FileInfo>& x, int64_t size) {

0 commit comments

Comments
 (0)