Add prod_sequence to avoid creating vector

okuta · okuta · commit efd6c6115829 · 2019-11-24T14:34:36.000+09:00
diff --git a/cupy/core/_routines_indexing.pyx b/cupy/core/_routines_indexing.pyx
@@ -687,14 +687,14 @@ cdef _scatter_op_single(
 
     lshape = a_shape[:li]
     rshape = a_shape[ri + 1:]
-    adim = internal.prod(a_shape[li:ri + 1])
+    adim = internal.prod_sequence(a_shape[li:ri + 1])
 
     indices_shape = indices.shape
     v_shape = lshape + indices_shape + rshape
     v = _manipulation.broadcast_to(v, v_shape)
 
     cdim = indices.size
-    rdim = internal.prod(rshape)
+    rdim = internal.prod_sequence(rshape)
     indices = _manipulation._reshape(
         indices,
         (1,) * len(lshape) + indices_shape + (1,) * len(rshape))
diff --git a/cupy/core/_routines_manipulation.pyx b/cupy/core/_routines_manipulation.pyx
@@ -765,7 +765,7 @@ cdef ndarray _concatenate_single_kernel(
 
     ret = core.ndarray(shape, dtype=dtype)
     if same_shape_and_contiguous:
-        base = internal.prod(shape[axis:]) // len(arrays)
+        base = internal.prod_sequence(shape[axis:]) // len(arrays)
         _concatenate_kernel_same_size(x, base, ret)
         return ret
 
diff --git a/cupy/core/core.pyx b/cupy/core/core.pyx
@@ -2763,7 +2763,7 @@ cpdef ndarray _convert_object_with_cuda_array_interface(a):
         for sh, st in zip(shape, strides):
             nbytes = max(nbytes, abs(sh * st))
     else:
-        nbytes = internal.prod(shape) * dtype.itemsize
+        nbytes = internal.prod_sequence(shape) * dtype.itemsize
     mem = memory_module.UnownedMemory(desc['data'][0], nbytes, a)
     memptr = memory.MemoryPointer(mem, 0)
     return ndarray(shape, dtype, memptr, strides)
diff --git a/cupy/core/internal.pxd b/cupy/core/internal.pxd
@@ -5,6 +5,8 @@ from libc.stdint cimport uint16_t
 
 cpdef Py_ssize_t prod(const vector.vector[Py_ssize_t]& args)
 
+cpdef Py_ssize_t prod_sequence(object args)
+
 cpdef tuple get_size(object size)
 
 cpdef bint vector_equal(
diff --git a/cupy/core/internal.pyx b/cupy/core/internal.pyx
@@ -19,6 +19,14 @@ cpdef inline Py_ssize_t prod(const vector.vector[Py_ssize_t]& args):
     return n
 
 
+@cython.profile(False)
+cpdef inline Py_ssize_t prod_sequence(object args):
+    cdef Py_ssize_t i, n = 1
+    for i in args:
+        n *= i
+    return n
+
+
 @cython.profile(False)
 cpdef inline tuple get_size(object size):
     if size is None:
diff --git a/cupy/cudnn.pyx b/cupy/cudnn.pyx
@@ -837,7 +837,7 @@ def get_rnn_lin_layer_matrix_params(
         cudnn.destroyFilterDescriptor(mat_desc)
     byte_size = _get_byte_size(data_type)
     offset = (ptr - w.data.ptr) // byte_size
-    size = internal.prod(dim)
+    size = internal.prod_sequence(dim)
     mat = w[offset:offset + size]
     return mat
 
@@ -855,7 +855,7 @@ def get_rnn_lin_layer_bias_params(
         cudnn.destroyFilterDescriptor(bias_desc)
     byte_size = _get_byte_size(data_type)
     offset = (ptr - w.data.ptr) // byte_size
-    size = internal.prod(dim)
+    size = internal.prod_sequence(dim)
     bias = w[offset:offset + size]
     return bias
 
diff --git a/cupy/linalg/einsum.py b/cupy/linalg/einsum.py
@@ -285,8 +285,8 @@ def _flatten_transpose(a, axeses):
         transpose_axes.extend(axes)
         shapes.append([a.shape[axis] for axis in axes])
     return (
-        a.transpose(transpose_axes).reshape(
-            tuple(cupy.core.internal.prod(shape) for shape in shapes)),
+        a.transpose(transpose_axes).reshape(tuple([
+            cupy.core.internal.prod_sequence(shape) for shape in shapes])),
         shapes
     )
 
diff --git a/cupy/linalg/product.py b/cupy/linalg/product.py
@@ -310,7 +310,7 @@ def tensordot(a, b, axes=2):
 
     ret_shape = a.shape[sum_ndim:] + b.shape[sum_ndim:]
 
-    k = internal.prod(a.shape[:sum_ndim])
+    k = internal.prod_sequence(a.shape[:sum_ndim])
     # Avoid division by zero: core.tensordot_core returns zeros without
     # checking n, m consistency, thus allowing 0-length dimensions to work
     n = a.size // k if k != 0 else 0
diff --git a/cupy/linalg/solve.py b/cupy/linalg/solve.py
@@ -163,7 +163,7 @@ def tensorsolve(a, b, axes=None):
         a = a.transpose(allaxes)
 
     oldshape = a.shape[-(a.ndim - b.ndim):]
-    prod = cupy.internal.prod(oldshape)
+    prod = cupy.internal.prod_sequence(oldshape)
 
     a = a.reshape(-1, prod)
     b = b.ravel()
@@ -458,7 +458,7 @@ def tensorinv(a, ind=2):
         raise ValueError('Invalid ind argument')
     oldshape = a.shape
     invshape = oldshape[ind:] + oldshape[:ind]
-    prod = cupy.internal.prod(oldshape[ind:])
+    prod = cupy.internal.prod_sequence(oldshape[ind:])
     a = a.reshape(prod, -1)
     a_inv = inv(a)
     return a_inv.reshape(*invshape)
diff --git a/tests/cupy_tests/core_tests/test_internal.py b/tests/cupy_tests/core_tests/test_internal.py
@@ -18,16 +18,16 @@ def test_two(self):
         self.assertEqual(internal.prod([2, 3]), 6)
 
 
-class TestProdSsizeT(unittest.TestCase):
+class TestProdSequence(unittest.TestCase):
 
     def test_empty(self):
-        self.assertEqual(internal.prod([]), 1)
+        self.assertEqual(internal.prod_sequence(()), 1)
 
     def test_one(self):
-        self.assertEqual(internal.prod([2]), 2)
+        self.assertEqual(internal.prod_sequence((2,)), 2)
 
     def test_two(self):
-        self.assertEqual(internal.prod([2, 3]), 6)
+        self.assertEqual(internal.prod_sequence((2, 3)), 6)
 
 
 class TestGetSize(unittest.TestCase):