-
Notifications
You must be signed in to change notification settings - Fork 4.1k
[C++] duration add missing kernels #39233
Copy link
Copy link
Closed
Description
Describe the enhancement requested
There are lots of kernels missing on the duration type for feature-parity with e.g. numpy.timedelta64. Since duration types are basically just wrapped integers, most of the integer kernels should be transferrable, with some exceptions (e.g. prod does not make sense since it would change the physical units)
I ran a script to figure out what is currently supported.
import pyarrow as pa
from pyarrow.lib import ArrowNotImplementedError
i8 = pa.int8()
i64 = pa.int64()
i32 = pa.int32()
f64 = pa.float64()
td64 = pa.duration("s")
b = pa.bool_()
duration_arr = pa.array([-3, 2, -1, 1], type=td64)
int_arr = pa.array([-3, 2, -1, 1], type=i64)
float_arr = pa.array([-3, 2, -1, 1], type=f64)
# td64 = pa.int64()
# duration_arr = pa.array([-3, 2, -1, 1], type=i64)
unary_ops = [
(pa.compute.negate, duration_arr, td64),
(pa.compute.negate_checked, duration_arr, td64),
(pa.compute.abs, duration_arr, td64),
(pa.compute.abs_checked, duration_arr, td64),
(pa.compute.sign, duration_arr, i8),
# tests
(pa.compute.is_null, duration_arr, b),
(pa.compute.is_valid, duration_arr, b),
(pa.compute.is_finite, duration_arr, b),
(pa.compute.is_inf, duration_arr, b),
(pa.compute.is_nan, duration_arr, b),
(pa.compute.true_unless_null, duration_arr, b),
# aggregations
(pa.compute.min_max, duration_arr, pa.struct([('min', td64), ('max', td64)])),
(pa.compute.max, duration_arr, td64),
(pa.compute.min, duration_arr, td64),
(pa.compute.sum, duration_arr, td64),
(pa.compute.mode, duration_arr, pa.struct([('mode', td64), ('count', i64)])),
# cumulative aggregations
(pa.compute.cumulative_sum, duration_arr, td64),
(pa.compute.cumulative_sum_checked, duration_arr, td64),
(pa.compute.cumulative_min, duration_arr, td64),
(pa.compute.cumulative_max, duration_arr, td64),
]
binary_ops = [
# arithmetic
(pa.compute.add, duration_arr, duration_arr, td64),
(pa.compute.add_checked, duration_arr, duration_arr, td64),
(pa.compute.subtract, duration_arr, duration_arr, td64),
(pa.compute.subtract_checked, duration_arr, duration_arr, td64),
(pa.compute.multiply, duration_arr, int_arr, td64),
(pa.compute.multiply_checked, duration_arr, int_arr, td64),
(pa.compute.divide, duration_arr, duration_arr, f64),
(pa.compute.divide, duration_arr, int_arr, td64),
(pa.compute.divide_checked, duration_arr, duration_arr, f64),
(pa.compute.divide_checked, duration_arr, int_arr, td64),
# comparisons
(pa.compute.less, duration_arr, duration_arr, b),
(pa.compute.less_equal, duration_arr, duration_arr, b),
(pa.compute.greater, duration_arr, duration_arr, b),
(pa.compute.greater_equal, duration_arr, duration_arr, b),
(pa.compute.equal, duration_arr, duration_arr, b),
(pa.compute.not_equal, duration_arr, duration_arr, b),
# min/max
(pa.compute.max_element_wise, duration_arr, duration_arr, td64),
(pa.compute.min_element_wise, duration_arr, duration_arr, td64),
# containment
(pa.compute.is_in, duration_arr, duration_arr, b),
(pa.compute.index_in, duration_arr, duration_arr, i32),
# functions that require rounding
]
rounding_ops = [
# operations that require rounding
(pa.compute.mean, duration_arr, td64),
(pa.compute.quantile, duration_arr, td64),
(pa.compute.approximate_median, duration_arr, td64),
(pa.compute.multiply, duration_arr, float_arr, td64),
(pa.compute.multiply_checked, duration_arr, float_arr, td64),
(pa.compute.divide, duration_arr, float_arr, td64),
(pa.compute.divide_checked, duration_arr, float_arr, td64),
]
for op, *operands, dtype in unary_ops + binary_ops:
try:
result = op(*operands)
except ArrowNotImplementedError as e:
x = " "
else:
x = "x"
assert result.type == dtype, f"{op}: got {result.type} expected {dtype}"
formatted_ops = ", ".join(f"{op.type!s:<11}" for op in operands)
print(f" [{x}] {op.__name__:<24}({formatted_ops}) -> {dtype}")EDIT: updated with pyarrow 16.0
Unary Ops
- negate(duration[s]) -> duration[s]
- negate_checked(duration[s]) -> duration[s]
- abs(duration[s]) -> duration[s]
- abs_checked(duration[s]) -> duration[s]
- sign(duration[s]) -> int8
- is_null(duration[s]) -> bool
- is_valid(duration[s]) -> bool
- is_finite(duration[s]) -> bool
- is_inf(duration[s]) -> bool
- is_nan(duration[s]) -> bool
- true_unless_null(duration[s]) -> bool
- max(duration[s]) -> duration[s]
- min(duration[s]) -> duration[s]
- sum(duration[s]) -> duration[s]
- mode(duration[s]) -> duration[s]
- cumulative_sum(duration[s]) -> duration[s]
- cumulative_sum_checked(duration[s]) -> duration[s]
- cumulative_min(duration[s]) -> duration[s]
- cumulative_max(duration[s]) -> duration[s]
Binary Ops
- add(duration[s], duration[s]) -> duration[s]
- add_checked(duration[s], duration[s]) -> duration[s]
- subtract(duration[s], duration[s]) -> duration[s]
- subtract_checked(duration[s], duration[s]) -> duration[s]
- multiply(duration[s], int64 ) -> duration[s]
- multiply_checked(duration[s], int64 ) -> duration[s]
- divide(duration[s], duration[s]) -> double
- divide(duration[s], int64 ) -> duration[s]
- divide_checked(duration[s], duration[s]) -> double
- divide_checked(duration[s], int64 ) -> duration[s]
- less(duration[s], duration[s]) -> bool
- less_equal(duration[s], duration[s]) -> bool
- greater(duration[s], duration[s]) -> bool
- greater_equal(duration[s], duration[s]) -> bool
- equal(duration[s], duration[s]) -> bool
- not_equal(duration[s], duration[s]) -> bool
- max_element_wise(duration[s], duration[s]) -> duration[s]
- min_element_wise(duration[s], duration[s]) -> duration[s]
- is_in(duration[s], duration[s]) -> bool
- index_in(duration[s], duration[s]) -> int32
Additional Ops
These are somewhat questionable, as they require rounding. They are supported by numpy.timedelta64 arrays.
- mean(duration[s]) -> duration[s]
- multiply(duration[s], double) -> duration[s]
- multiply_checked(duration[s], double) -> duration[s]
- divide(duration[s], double) -> duration[s]
- divide_checked(duration[s], double) -> duration[s]
- quantile(duration[s], double) -> duration[s]
- approximate_median(duration[s]) -> duration[s]
Component(s)
C++
Reactions are currently unavailable