Fix some backend tests (#2335)

hariharans29 · wschin · commit bd35e6231156 · 2019-09-18T19:05:56.000-07:00
* Fix some node tests

* PR comments and docs

* Update Changelog.md
diff --git a/docs/Changelog.md b/docs/Changelog.md
@@ -11434,11 +11434,9 @@ This version of the operator has been available since version 11 of the default
 ### <a name="Hardmax-11"></a>**Hardmax-11**</a>
 
   The operator computes the hardmax (1 for the first maximum value, and 0 for all others) values for each layer in the batch
-   of the given input. The input is a 2-D tensor (Tensor<float>) of size
-  (batch_size x input_feature_dimensions). The output tensor has the same shape
-  and contains the hardmax values of the corresponding input.
+   of the given input.
   
-  Input does not need to explicitly be a 2D vector; rather, it will be
+  The input does not need to explicitly be a 2D vector; rather, it will be
   coerced into one. For an arbitrary n-dimensional tensor
   input \in [a_0, a_1, ..., a_{k-1}, a_k, ..., a_{n-1}] and k is
   the axis provided, then input will be coerced into a 2-dimensional tensor with
@@ -11447,7 +11445,8 @@ This version of the operator has been available since version 11 of the default
   of dimensions [a_0, a_1 * ... * a_{n-1}], where a_0 is often the batch size.
   In this situation, we must have a_0 = N and a_1 * ... * a_{n-1} = D.
   Each of these dimensions must be matched correctly, or else the operator
-  will throw errors.
+  will throw errors. The output tensor has the same shape
+  and contains the hardmax values of the corresponding input.
 
 #### Version
 
@@ -11484,11 +11483,9 @@ This version of the operator has been available since version 11 of the default
 ### <a name="LogSoftmax-11"></a>**LogSoftmax-11**</a>
 
   The operator computes the logsoftmax (log of softmax) values for each layer in the batch
-   of the given input. The input is a 2-D tensor (Tensor<float>) of size
-  (batch_size x input_feature_dimensions). The output tensor has the same shape
-  and contains the logsoftmax values of the corresponding input.
+   of the given input.
   
-  Input does not need to explicitly be a 2D vector; rather, it will be
+  The input does not need to explicitly be a 2D vector; rather, it will be
   coerced into one. For an arbitrary n-dimensional tensor
   input \in [a_0, a_1, ..., a_{k-1}, a_k, ..., a_{n-1}] and k is
   the axis provided, then input will be coerced into a 2-dimensional tensor with
@@ -11497,7 +11494,8 @@ This version of the operator has been available since version 11 of the default
   of dimensions [a_0, a_1 * ... * a_{n-1}], where a_0 is often the batch size.
   In this situation, we must have a_0 = N and a_1 * ... * a_{n-1} = D.
   Each of these dimensions must be matched correctly, or else the operator
-  will throw errors.
+  will throw errors. The output tensor has the same shape
+  and contains the logsoftmax values of the corresponding input.
 
 #### Version
 
@@ -13362,11 +13360,9 @@ This version of the operator has been available since version 11 of the default
 ### <a name="Softmax-11"></a>**Softmax-11**</a>
 
   The operator computes the softmax (normalized exponential) values for each layer in the batch
-   of the given input. The input is a 2-D tensor (Tensor<float>) of size
-  (batch_size x input_feature_dimensions). The output tensor has the same shape
-  and contains the softmax values of the corresponding input.
+   of the given input.
   
-  Input does not need to explicitly be a 2D vector; rather, it will be
+  The input does not need to explicitly be a 2D vector; rather, it will be
   coerced into one. For an arbitrary n-dimensional tensor
   input \in [a_0, a_1, ..., a_{k-1}, a_k, ..., a_{n-1}] and k is
   the axis provided, then input will be coerced into a 2-dimensional tensor with
@@ -13375,7 +13371,8 @@ This version of the operator has been available since version 11 of the default
   of dimensions [a_0, a_1 * ... * a_{n-1}], where a_0 is often the batch size.
   In this situation, we must have a_0 = N and a_1 * ... * a_{n-1} = D.
   Each of these dimensions must be matched correctly, or else the operator
-  will throw errors.
+  will throw errors. The output tensor has the same shape
+  and contains the softmax values of the corresponding input.
 
 #### Version
 
diff --git a/docs/Operators.md b/docs/Operators.md
@@ -5221,7 +5221,7 @@ node = onnx.helper.make_node(
     'Gather',
     inputs=['data', 'indices'],
     outputs=['y'],
-    axis=1,
+    axis=0,
 )
 data = np.arange(10).astype(np.float32)
 indices = np.array([0, -9, -10])
@@ -5738,7 +5738,7 @@ node = onnx.helper.make_node(
 )
 a = np.random.ranf([2, 3]).astype(np.float32)
 b = np.random.ranf([3, 4]).astype(np.float32)
-c = np.array(3.14)
+c = np.array(3.14).astype(np.float32)
 y = gemm_reference_implementation(a, b, c)
 expect(node, inputs=[a, b, c], outputs=[y],
        name='test_gemm_default_scalar_bias')
@@ -6218,11 +6218,9 @@ expect(node, inputs=[x], outputs=[y],
 ### <a name="Hardmax"></a><a name="hardmax">**Hardmax**</a>
 
   The operator computes the hardmax (1 for the first maximum value, and 0 for all others) values for each layer in the batch
-   of the given input. The input is a 2-D tensor (Tensor<float>) of size
-  (batch_size x input_feature_dimensions). The output tensor has the same shape
-  and contains the hardmax values of the corresponding input.
+   of the given input.
   
-  Input does not need to explicitly be a 2D vector; rather, it will be
+  The input does not need to explicitly be a 2D vector; rather, it will be
   coerced into one. For an arbitrary n-dimensional tensor
   input \in [a_0, a_1, ..., a_{k-1}, a_k, ..., a_{n-1}] and k is
   the axis provided, then input will be coerced into a 2-dimensional tensor with
@@ -6231,7 +6229,8 @@ expect(node, inputs=[x], outputs=[y],
   of dimensions [a_0, a_1 * ... * a_{n-1}], where a_0 is often the batch size.
   In this situation, we must have a_0 = N and a_1 * ... * a_{n-1} = D.
   Each of these dimensions must be matched correctly, or else the operator
-  will throw errors.
+  will throw errors. The output tensor has the same shape
+  and contains the hardmax values of the corresponding input.
 
 #### Version
 
@@ -7303,11 +7302,9 @@ expect(node, inputs=[x], outputs=[y],
 ### <a name="LogSoftmax"></a><a name="logsoftmax">**LogSoftmax**</a>
 
   The operator computes the logsoftmax (log of softmax) values for each layer in the batch
-   of the given input. The input is a 2-D tensor (Tensor<float>) of size
-  (batch_size x input_feature_dimensions). The output tensor has the same shape
-  and contains the logsoftmax values of the corresponding input.
+   of the given input.
   
-  Input does not need to explicitly be a 2D vector; rather, it will be
+  The input does not need to explicitly be a 2D vector; rather, it will be
   coerced into one. For an arbitrary n-dimensional tensor
   input \in [a_0, a_1, ..., a_{k-1}, a_k, ..., a_{n-1}] and k is
   the axis provided, then input will be coerced into a 2-dimensional tensor with
@@ -7316,7 +7313,8 @@ expect(node, inputs=[x], outputs=[y],
   of dimensions [a_0, a_1 * ... * a_{n-1}], where a_0 is often the batch size.
   In this situation, we must have a_0 = N and a_1 * ... * a_{n-1} = D.
   Each of these dimensions must be matched correctly, or else the operator
-  will throw errors.
+  will throw errors. The output tensor has the same shape
+  and contains the logsoftmax values of the corresponding input.
 
 #### Version
 
@@ -16427,11 +16425,9 @@ expect(node, inputs=[x, starts, ends, axes, steps], outputs=[y],
 ### <a name="Softmax"></a><a name="softmax">**Softmax**</a>
 
   The operator computes the softmax (normalized exponential) values for each layer in the batch
-   of the given input. The input is a 2-D tensor (Tensor<float>) of size
-  (batch_size x input_feature_dimensions). The output tensor has the same shape
-  and contains the softmax values of the corresponding input.
+   of the given input.
   
-  Input does not need to explicitly be a 2D vector; rather, it will be
+  The input does not need to explicitly be a 2D vector; rather, it will be
   coerced into one. For an arbitrary n-dimensional tensor
   input \in [a_0, a_1, ..., a_{k-1}, a_k, ..., a_{n-1}] and k is
   the axis provided, then input will be coerced into a 2-dimensional tensor with
@@ -16440,7 +16436,8 @@ expect(node, inputs=[x, starts, ends, axes, steps], outputs=[y],
   of dimensions [a_0, a_1 * ... * a_{n-1}], where a_0 is often the batch size.
   In this situation, we must have a_0 = N and a_1 * ... * a_{n-1} = D.
   Each of these dimensions must be matched correctly, or else the operator
-  will throw errors.
+  will throw errors. The output tensor has the same shape
+  and contains the softmax values of the corresponding input.
 
 #### Version
 
diff --git a/docs/TestCoverage.md b/docs/TestCoverage.md
@@ -2898,7 +2898,7 @@ node = onnx.helper.make_node(
     'Gather',
     inputs=['data', 'indices'],
     outputs=['y'],
-    axis=1,
+    axis=0,
 )
 data = np.arange(10).astype(np.float32)
 indices = np.array([0, -9, -10])
@@ -3147,7 +3147,7 @@ node = onnx.helper.make_node(
 )
 a = np.random.ranf([2, 3]).astype(np.float32)
 b = np.random.ranf([3, 4]).astype(np.float32)
-c = np.array(3.14)
+c = np.array(3.14).astype(np.float32)
 y = gemm_reference_implementation(a, b, c)
 expect(node, inputs=[a, b, c], outputs=[y],
        name='test_gemm_default_scalar_bias')
diff --git a/onnx/backend/test/case/node/gather.py b/onnx/backend/test/case/node/gather.py
@@ -48,7 +48,7 @@ def export_gather_negative_indices():  # type: () -> None
             'Gather',
             inputs=['data', 'indices'],
             outputs=['y'],
-            axis=1,
+            axis=0,
         )
         data = np.arange(10).astype(np.float32)
         indices = np.array([0, -9, -10])
diff --git a/onnx/backend/test/case/node/gemm.py b/onnx/backend/test/case/node/gemm.py
@@ -60,7 +60,7 @@ def export_default_scalar_bias():  # type: () -> None
         )
         a = np.random.ranf([2, 3]).astype(np.float32)
         b = np.random.ranf([3, 4]).astype(np.float32)
-        c = np.array(3.14)
+        c = np.array(3.14).astype(np.float32)
         y = gemm_reference_implementation(a, b, c)
         expect(node, inputs=[a, b, c], outputs=[y],
                name='test_gemm_default_scalar_bias')
diff --git a/onnx/backend/test/data/node/test_gather_negative_indices/model.onnx b/onnx/backend/test/data/node/test_gather_negative_indices/model.onnx
diff --git a/onnx/backend/test/data/node/test_gemm_default_scalar_bias/test_data_set_0/input_2.pb b/onnx/backend/test/data/node/test_gemm_default_scalar_bias/test_data_set_0/input_2.pb
@@ -1 +1 @@
-BcJ��Q�	@
+BcJ��H@
diff --git a/onnx/defs/math/defs.cc b/onnx/defs/math/defs.cc
@@ -41,11 +41,9 @@ std::function<void(OpSchema&)> SoftmaxFamilyDocGenerator(
   return [=](OpSchema& schema) {
     std::string doc = R"DOC(
 The operator computes the {name} ({description}) values for each layer in the batch
- of the given input. The input is a 2-D tensor (Tensor<float>) of size
-(batch_size x input_feature_dimensions). The output tensor has the same shape
-and contains the {name} values of the corresponding input.
+ of the given input.
 
-Input does not need to explicitly be a 2D vector; rather, it will be
+The input does not need to explicitly be a 2D vector; rather, it will be
 coerced into one. For an arbitrary n-dimensional tensor
 input \in [a_0, a_1, ..., a_{k-1}, a_k, ..., a_{n-1}] and k is
 the axis provided, then input will be coerced into a 2-dimensional tensor with
@@ -54,7 +52,8 @@ case where axis=1, this means the input tensor will be coerced into a 2D tensor
 of dimensions [a_0, a_1 * ... * a_{n-1}], where a_0 is often the batch size.
 In this situation, we must have a_0 = N and a_1 * ... * a_{n-1} = D.
 Each of these dimensions must be matched correctly, or else the operator
-will throw errors.
+will throw errors. The output tensor has the same shape
+and contains the {name} values of the corresponding input.
 )DOC";
     ReplaceAll(doc, "{name}", name);
     ReplaceAll(doc, "{description}", description);
@@ -84,27 +83,26 @@ will throw errors.
         {"tensor(float16)", "tensor(float)", "tensor(double)"},
         "Constrain input and output types to float tensors.");
     schema.TypeAndShapeInferenceFunction([](InferenceContext& ctx) {
+      // Type inference
       propagateElemTypeFromInputToOutput(ctx, 0, 0);
+      
+      // Shape inference starts
       if (!hasNInputShapes(ctx, 1)) {
         return;
       }
-      propagateShapeFromInputToOutput(ctx, 0, 0);
+
+      // Validate the value of 'axis'
       const TensorShapeProto& input_shape =
         ctx.getInputType(0)->tensor_type().shape();
       int r = input_shape.dim_size();
-      if (r != 2) {
-        fail_shape_inference("Input tensor must have rank == 2");
-      }
       int axis = static_cast<int>(getAttribute(ctx, "axis", 1));
-      if (axis){
-        if (axis < -r || axis >= r) {
+      if (axis < -r || axis >= r) {
           fail_shape_inference(
-            "'axis' must be in [-rank(indices), rank(indices)-1]");
-        }
-        if (axis < 0) {
-          axis += r;
-        }
+         "'axis' must be in [" -r, " , " , (r-1) , "]. Its actual value is: ", axis);
       }
+
+      // Shape inference
+      propagateShapeFromInputToOutput(ctx, 0, 0);
     });
   };
 }
diff --git a/onnx/test/shape_inference_test.py b/onnx/test/shape_inference_test.py
@@ -1135,13 +1135,55 @@ def test_GLU(self):  # type: () -> None
                                       make_tensor_value_info('a', TensorProto.FLOAT, (5, 3, 7)),
                                       make_tensor_value_info('b', TensorProto.FLOAT, (5, 3, 7))])
 
-    def test_softmax(self):  # type: () -> None
+    def test_softmax_2d(self):  # type: () -> None
         graph = self._make_graph(
             [('x', TensorProto.FLOAT, (4, 5))],
             [make_node('Softmax', ['x'], 'z')],
             [])
         self._assert_inferred(graph, [make_tensor_value_info('z', TensorProto.FLOAT, (4, 5))])
 
+    def test_softmax_3d(self):  # type: () -> None
+        graph = self._make_graph(
+            [('x', TensorProto.FLOAT, (4, 5, 6))],
+            [make_node('Softmax', ['x'], 'z')],
+            [])
+        self._assert_inferred(graph, [make_tensor_value_info('z', TensorProto.FLOAT, (4, 5, 6))])
+
+    def test_hardmax_2d(self):  # type: () -> None
+        graph = self._make_graph(
+            [('x', TensorProto.FLOAT, (4, 5))],
+            [make_node('Hardmax', ['x'], 'z')],
+            [])
+        self._assert_inferred(graph, [make_tensor_value_info('z', TensorProto.FLOAT, (4, 5))])
+
+    def test_hardmax_3d(self):  # type: () -> None
+        graph = self._make_graph(
+            [('x', TensorProto.FLOAT, (4, 5, 6))],
+            [make_node('Hardmax', ['x'], 'z')],
+            [])
+        self._assert_inferred(graph, [make_tensor_value_info('z', TensorProto.FLOAT, (4, 5, 6))])
+
+    def test_logsoftmax_2d(self):  # type: () -> None
+        graph = self._make_graph(
+            [('x', TensorProto.FLOAT, (4, 5))],
+            [make_node('LogSoftmax', ['x'], 'z')],
+            [])
+        self._assert_inferred(graph, [make_tensor_value_info('z', TensorProto.FLOAT, (4, 5))])
+
+    def test_logsoftmax_3d(self):  # type: () -> None
+        graph = self._make_graph(
+            [('x', TensorProto.FLOAT, (4, 5, 6))],
+            [make_node('LogSoftmax', ['x'], 'z')],
+            [])
+        self._assert_inferred(graph, [make_tensor_value_info('z', TensorProto.FLOAT, (4, 5, 6))])
+
+    def test_logsoftmax_3d_negative_axis(self):  # type: () -> None
+        graph = self._make_graph(
+            [('x', TensorProto.FLOAT, (4, 5, 6))],
+            [make_node('LogSoftmax', ['x'], 'z', axis=-1)],
+            [])
+        self._assert_inferred(graph, [make_tensor_value_info('z', TensorProto.FLOAT, (4, 5, 6))])
+
     def test_maxpool(self):  # type: () -> None
         graph = self._make_graph(
             [("X", TensorProto.FLOAT, (5, 3, 4, 4))],

Original file line number	Diff line number	Diff line change
`@@ -48,7 +48,7 @@ def export_gather_negative_indices(): # type: () -> None`
`48`	`48`	`'Gather',`
`49`	`49`	`inputs=['data', 'indices'],`
`50`	`50`	`outputs=['y'],`
`51`		`- axis=1,`
	`51`	`+ axis=0,`
`52`	`52`	`)`
`53`	`53`	`data = np.arange(10).astype(np.float32)`
`54`	`54`	`indices = np.array([0, -9, -10])`
Original file line number	Diff line number	Diff line change
`@@ -60,7 +60,7 @@ def export_default_scalar_bias(): # type: () -> None`
`60`	`60`	`)`
`61`	`61`	`a = np.random.ranf([2, 3]).astype(np.float32)`
`62`	`62`	`b = np.random.ranf([3, 4]).astype(np.float32)`
`63`		`- c = np.array(3.14)`
	`63`	`+ c = np.array(3.14).astype(np.float32)`
`64`	`64`	`y = gemm_reference_implementation(a, b, c)`
`65`	`65`	`expect(node, inputs=[a, b, c], outputs=[y],`
`66`	`66`	`name='test_gemm_default_scalar_bias')`