Skip to content

[C++] duration add missing kernels #39233

@randolf-scholz

Description

@randolf-scholz

Describe the enhancement requested

There are lots of kernels missing on the duration type for feature-parity with e.g. numpy.timedelta64. Since duration types are basically just wrapped integers, most of the integer kernels should be transferrable, with some exceptions (e.g. prod does not make sense since it would change the physical units)

I ran a script to figure out what is currently supported.
import pyarrow as pa
from pyarrow.lib import ArrowNotImplementedError

i8 = pa.int8()
i64 = pa.int64()
i32 = pa.int32()
f64 = pa.float64()
td64 = pa.duration("s")
b = pa.bool_()

duration_arr = pa.array([-3, 2, -1, 1], type=td64)
int_arr = pa.array([-3, 2, -1, 1], type=i64)
float_arr = pa.array([-3, 2, -1, 1], type=f64)

# td64 = pa.int64()
# duration_arr = pa.array([-3, 2, -1, 1], type=i64)

unary_ops = [
    (pa.compute.negate, duration_arr, td64),
    (pa.compute.negate_checked, duration_arr, td64),
    (pa.compute.abs, duration_arr, td64),
    (pa.compute.abs_checked, duration_arr, td64),
    (pa.compute.sign, duration_arr, i8),
    # tests
    (pa.compute.is_null, duration_arr, b),
    (pa.compute.is_valid, duration_arr, b),
    (pa.compute.is_finite, duration_arr, b),
    (pa.compute.is_inf, duration_arr, b),
    (pa.compute.is_nan, duration_arr, b),
    (pa.compute.true_unless_null, duration_arr, b),
    # aggregations
    (pa.compute.min_max, duration_arr, pa.struct([('min', td64), ('max', td64)])),
    (pa.compute.max, duration_arr, td64),
    (pa.compute.min, duration_arr, td64),
    (pa.compute.sum, duration_arr, td64),
    (pa.compute.mode, duration_arr, pa.struct([('mode', td64), ('count', i64)])),
    # cumulative aggregations
    (pa.compute.cumulative_sum, duration_arr, td64),
    (pa.compute.cumulative_sum_checked, duration_arr, td64),
    (pa.compute.cumulative_min, duration_arr, td64),
    (pa.compute.cumulative_max, duration_arr, td64),
]

binary_ops = [
    # arithmetic
    (pa.compute.add, duration_arr, duration_arr, td64),
    (pa.compute.add_checked, duration_arr, duration_arr, td64),
    (pa.compute.subtract, duration_arr, duration_arr, td64),
    (pa.compute.subtract_checked, duration_arr, duration_arr, td64),
    (pa.compute.multiply, duration_arr, int_arr, td64),
    (pa.compute.multiply_checked, duration_arr, int_arr, td64),
    (pa.compute.divide, duration_arr, duration_arr, f64),
    (pa.compute.divide, duration_arr, int_arr, td64),
    (pa.compute.divide_checked, duration_arr, duration_arr, f64),
    (pa.compute.divide_checked, duration_arr, int_arr, td64),
    # comparisons
    (pa.compute.less, duration_arr, duration_arr, b),
    (pa.compute.less_equal, duration_arr, duration_arr, b),
    (pa.compute.greater, duration_arr, duration_arr, b),
    (pa.compute.greater_equal, duration_arr, duration_arr, b),
    (pa.compute.equal, duration_arr, duration_arr, b),
    (pa.compute.not_equal, duration_arr, duration_arr, b),
    # min/max
    (pa.compute.max_element_wise, duration_arr, duration_arr, td64),
    (pa.compute.min_element_wise, duration_arr, duration_arr, td64),
    # containment
    (pa.compute.is_in, duration_arr, duration_arr, b),
    (pa.compute.index_in, duration_arr, duration_arr, i32),
    # functions that require rounding
]

rounding_ops = [
    # operations that require rounding
    (pa.compute.mean, duration_arr, td64),
    (pa.compute.quantile, duration_arr, td64),
    (pa.compute.approximate_median, duration_arr, td64),
    (pa.compute.multiply, duration_arr, float_arr, td64),
    (pa.compute.multiply_checked, duration_arr, float_arr, td64),
    (pa.compute.divide, duration_arr, float_arr, td64),
    (pa.compute.divide_checked, duration_arr, float_arr, td64),
]

for op, *operands, dtype in unary_ops + binary_ops:
    try:
        result = op(*operands)
    except ArrowNotImplementedError as e:
        x = " "
    else:
        x = "x"
        assert result.type == dtype, f"{op}: got {result.type} expected {dtype}"

    formatted_ops = ", ".join(f"{op.type!s:<11}" for op in operands)
    print(f" [{x}] {op.__name__:<24}({formatted_ops}) -> {dtype}")

EDIT: updated with pyarrow 16.0

Unary Ops

  • negate(duration[s]) -> duration[s]
  • negate_checked(duration[s]) -> duration[s]
  • abs(duration[s]) -> duration[s]
  • abs_checked(duration[s]) -> duration[s]
  • sign(duration[s]) -> int8
  • is_null(duration[s]) -> bool
  • is_valid(duration[s]) -> bool
  • is_finite(duration[s]) -> bool
  • is_inf(duration[s]) -> bool
  • is_nan(duration[s]) -> bool
  • true_unless_null(duration[s]) -> bool
  • max(duration[s]) -> duration[s]
  • min(duration[s]) -> duration[s]
  • sum(duration[s]) -> duration[s]
  • mode(duration[s]) -> duration[s]
  • cumulative_sum(duration[s]) -> duration[s]
  • cumulative_sum_checked(duration[s]) -> duration[s]
  • cumulative_min(duration[s]) -> duration[s]
  • cumulative_max(duration[s]) -> duration[s]

Binary Ops

  • add(duration[s], duration[s]) -> duration[s]
  • add_checked(duration[s], duration[s]) -> duration[s]
  • subtract(duration[s], duration[s]) -> duration[s]
  • subtract_checked(duration[s], duration[s]) -> duration[s]
  • multiply(duration[s], int64 ) -> duration[s]
  • multiply_checked(duration[s], int64 ) -> duration[s]
  • divide(duration[s], duration[s]) -> double
  • divide(duration[s], int64 ) -> duration[s]
  • divide_checked(duration[s], duration[s]) -> double
  • divide_checked(duration[s], int64 ) -> duration[s]
  • less(duration[s], duration[s]) -> bool
  • less_equal(duration[s], duration[s]) -> bool
  • greater(duration[s], duration[s]) -> bool
  • greater_equal(duration[s], duration[s]) -> bool
  • equal(duration[s], duration[s]) -> bool
  • not_equal(duration[s], duration[s]) -> bool
  • max_element_wise(duration[s], duration[s]) -> duration[s]
  • min_element_wise(duration[s], duration[s]) -> duration[s]
  • is_in(duration[s], duration[s]) -> bool
  • index_in(duration[s], duration[s]) -> int32

Additional Ops

These are somewhat questionable, as they require rounding. They are supported by numpy.timedelta64 arrays.

  • mean(duration[s]) -> duration[s]
  • multiply(duration[s], double) -> duration[s]
  • multiply_checked(duration[s], double) -> duration[s]
  • divide(duration[s], double) -> duration[s]
  • divide_checked(duration[s], double) -> duration[s]
  • quantile(duration[s], double) -> duration[s]
  • approximate_median(duration[s]) -> duration[s]

Component(s)

C++

Metadata

Metadata

Assignees

Type

No type

Projects

No projects

Milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions