Skip to content

Commit 63b1ae6

Browse files
xwang233facebook-github-bot
authored andcommitted
Fix overflow in torch.remainder when dividend is very large (#37758)
Summary: This will fix the GPU implementation in #37743 and #24861. Please also check my [comment](#37743 (comment)). The fixed `remainder_kernel` follows the similar implementation in numpy. See https://github.com/numpy/numpy/blob/79d7bc276afbe89c746e462d28d4bfbb4fc56148/numpy/core/src/npymath/npy_math_internal.h.src#L649-L658 I also slightly update the doc for `torch.remainder`, to make it similar to `torch.fmod`. I'm not sure how to modify the Vec256 code of CPU remainder_kernel, so I just leave it there. Pull Request resolved: #37758 Differential Revision: D21388417 Pulled By: ngimel fbshipit-source-id: 770ba5801cf34619b2b68b8b0cf95d8cfa52e6f6
1 parent fdc4061 commit 63b1ae6

3 files changed

Lines changed: 26 additions & 3 deletions

File tree

aten/src/ATen/native/cuda/BinaryArithmeticKernel.cu

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,9 @@ void remainder_kernel_cuda(TensorIterator& iter) {
7777
AT_DISPATCH_FLOATING_TYPES_AND_HALF(iter.dtype(), "remainder_cuda", [&]() {
7878
gpu_kernel_with_scalars(iter,
7979
[]GPU_LAMBDA(scalar_t a, scalar_t b) __ubsan_ignore_float_divide_by_zero__ -> scalar_t {
80-
return a - b * static_cast<scalar_t>(std::floor(a / b));
80+
auto mod = ::fmod(a, b);
81+
if ((mod != 0) && ((b < 0) != (mod < 0))) mod += b;
82+
return mod;
8183
});
8284
});
8385
}

test/test_torch.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15449,6 +15449,27 @@ def test_remainder(self, device, dtype):
1544915449
long_res1 = long_m1.clone()
1545015450
long_res1.remainder_(long_qs.unsqueeze(0).expand_as(long_res1))
1545115451

15452+
# remove onlyCUDA after CPU impl of remainder_kernel be fixed
15453+
@onlyCUDA
15454+
@dtypes(torch.float, torch.double)
15455+
def test_remainder_fmod_large_dividend(self, device, dtype):
15456+
alarge = 1e9
15457+
pi = 3.14159265358979
15458+
for avalue in [alarge, -alarge]:
15459+
for bvalue in [pi, -pi]:
15460+
a = torch.tensor([avalue], dtype=dtype, device=device)
15461+
b = torch.tensor([bvalue], dtype=dtype, device=device)
15462+
c = torch.remainder(a, b)
15463+
d = torch.fmod(a, b)
15464+
self.assertTrue((b[0] > 0) == (c[0] > 0)) # remainder has same sign as divisor
15465+
self.assertTrue((a[0] > 0) == (d[0] > 0)) # fmod has same sign as dividend
15466+
self.assertTrue(abs(c[0]) < abs(b[0])) # remainder is within range of divisor
15467+
self.assertTrue(abs(d[0]) < abs(b[0])) # fmod is within range of divisor
15468+
if ((a[0] > 0) == (b[0] > 0)):
15469+
self.assertTrue(c[0] == d[0]) # remainder is same as fmod
15470+
else:
15471+
self.assertTrue(abs(c[0] - d[0]) == abs(b[0])) # differ by one divisor
15472+
1545215473
@dtypes(torch.int64, torch.float64)
1545315474
def test_remainder_edge_cases(self, device, dtype):
1545415475
# Test variations of negative values used as input

torch/_torch_docs.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4902,8 +4902,8 @@ def merge_dicts(*dicts):
49024902
49034903
Computes the element-wise remainder of division.
49044904
4905-
The divisor and dividend may contain both for integer and floating point
4906-
numbers. The remainder has the same sign as the divisor.
4905+
The dividend and divisor may contain both for integer and floating point
4906+
numbers. The remainder has the same sign as the divisor :attr:`other`.
49074907
49084908
When :attr:`other` is a tensor, the shapes of :attr:`input` and
49094909
:attr:`other` must be :ref:`broadcastable <broadcasting-semantics>`.

0 commit comments

Comments
 (0)