implement fuse reduce->unsqueeze + fix assumption in nop_dropout pass (#1565)

ArmenAg · houseroad · commit ea694bf5cbcc · 2018-11-20T16:22:25.000+08:00
* implement fuse reduce-&gt;unsqueeze + fix assumption in nop_dropout pass

* fix bugs

* remove nop code

* correct output shape calculation

* fix linting issue
diff --git a/onnx/common/interned_strings.h b/onnx/common/interned_strings.h
@@ -79,6 +79,7 @@ namespace ONNX_NAMESPACE {
   _(ratio)                        \
   _(size)                         \
   _(dim)                          \
+  _(keepdims)                     \
   _(perm)                         \
   _(shape)                        \
   _(axes)                         \
@@ -145,7 +146,17 @@ namespace ONNX_NAMESPACE {
   _(__control_inputs)             \
   _(count_include_pad)            \
   _(storage_order)                \
-  _(Unsqueeze)
+  _(Unsqueeze)                    \
+  _(ReduceL1)                     \
+  _(ReduceL2)                     \
+  _(ReduceLogSum)                 \
+  _(ReduceLogSumExp)              \
+  _(ReduceMax)                    \
+  _(ReduceMean)                   \
+  _(ReduceMin)                    \
+  _(ReduceProd)                   \
+  _(ReduceSum)                    \
+  _(ReduceSumSquare)
 
 enum BuiltinSymbol {
 #define DEFINE_SYMBOL(s) k##s,
diff --git a/onnx/examples/optimize_onnx.ipynb b/onnx/examples/optimize_onnx.ipynb
@@ -60,6 +60,7 @@
       "\tfuse_bn_into_conv\n",
       "\tfuse_consecutive_concats\n",
       "\tfuse_consecutive_log_softmax\n",
+      "\tfuse_consecutive_reduce_unsqueeze\n",
       "\tfuse_consecutive_squeezes\n",
       "\tfuse_consecutive_transposes\n",
       "\tfuse_transpose_into_gemm\n",
@@ -120,7 +121,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.4"
+   "version": "3.7.1"
   }
  },
  "nbformat": 4,
diff --git a/onnx/optimizer/pass_registry.h b/onnx/optimizer/pass_registry.h
@@ -7,8 +7,8 @@
 #include "onnx/common/ir_pb_converter.h"
 #include "onnx/common/stl_backports.h"
 #include "onnx/optimizer/passes/eliminate_deadend.h"
-#include "onnx/optimizer/passes/eliminate_nop_dropout.h"
 #include "onnx/optimizer/passes/eliminate_identity.h"
+#include "onnx/optimizer/passes/eliminate_nop_dropout.h"
 #include "onnx/optimizer/passes/eliminate_nop_monotone_argmax.h"
 #include "onnx/optimizer/passes/eliminate_nop_pad.h"
 #include "onnx/optimizer/passes/eliminate_nop_transpose.h"
@@ -18,6 +18,7 @@
 #include "onnx/optimizer/passes/fuse_bn_into_conv.h"
 #include "onnx/optimizer/passes/fuse_consecutive_concats.h"
 #include "onnx/optimizer/passes/fuse_consecutive_log_softmax.h"
+#include "onnx/optimizer/passes/fuse_consecutive_reduce_unsqueeze.h"
 #include "onnx/optimizer/passes/fuse_consecutive_squeezes.h"
 #include "onnx/optimizer/passes/fuse_consecutive_transposes.h"
 #include "onnx/optimizer/passes/fuse_transpose_into_gemm.h"
@@ -51,6 +52,7 @@ struct GlobalPassRegistry {
     registerPass<FuseBNIntoConv>();
     registerPass<FuseConsecutiveConcats>();
     registerPass<FuseConsecutiveLogSoftmax>();
+    registerPass<FuseConsecutiveReduceUnsqueeze>();
     registerPass<FuseConsecutiveSqueezes>();
     registerPass<FuseConsecutiveTransposes>();
     registerPass<FuseTransposeIntoGemm>();
diff --git a/onnx/optimizer/passes/eliminate_nop_dropout.h b/onnx/optimizer/passes/eliminate_nop_dropout.h
@@ -26,7 +26,10 @@ struct EliminateNopDropout final : public PredicateBasedPass {
 
   bool runTransform(Node* node, Graph&, NodeDestroyType& destroy_current)
       override {
-    node->output()->replaceAllUsesWith(node->input());
+    // Don't assume that theres only one output.
+    for (size_t i = 0; i < node->outputs().size(); ++i) {
+      node->outputs()[i]->replaceAllUsesWith(node->input());
+    }
     destroy_current = NodeDestroyType::DestroyOne;
     return true;
   }
diff --git a/onnx/optimizer/passes/fuse_consecutive_reduce_unsqueeze.h b/onnx/optimizer/passes/fuse_consecutive_reduce_unsqueeze.h
@@ -0,0 +1,65 @@
+// ATTENTION: The code in this file is highly EXPERIMENTAL.
+// Adventurous users should note that the APIs will probably change.
+
+#pragma once
+
+#include "onnx/optimizer/pass.h"
+
+namespace ONNX_NAMESPACE {
+namespace optimization {
+
+const std::unordered_set<NodeKind> reduction_operators{kReduceL1,
+                                                       kReduceL2,
+                                                       kReduceLogSum,
+                                                       kReduceLogSumExp,
+                                                       kReduceMax,
+                                                       kReduceMean,
+                                                       kReduceMin,
+                                                       kReduceProd,
+                                                       kReduceSum,
+                                                       kReduceSumSquare};
+
+struct FuseConsecutiveReduceUnsqueeze final : public PredicateBasedPass {
+  explicit FuseConsecutiveReduceUnsqueeze()
+      : PredicateBasedPass(
+            PassType::Fuse,
+            PassEfficiency::Complete,
+            PassOptimizationType::Compute) {}
+
+  std::string getPassName() const override {
+    return "fuse_consecutive_reduce_unsqueeze";
+  }
+  bool patternMatchPredicate(Node* node) override {
+    // check that the current node is of type Unsqueeze and has defined axes
+    bool cur_node_check =
+        node->kind() == kUnsqueeze && node->hasAttribute(kaxes);
+    if (cur_node_check) {
+      Node* prev_node = node->input()->node();
+      // check that the previous node a reduction operator and has defined
+      // axes/keepdims
+      bool reduction_node_check = reduction_operators.find(prev_node->kind()) !=
+              reduction_operators.end() &&
+          prev_node->hasAttribute(kaxes) && prev_node->hasAttribute(kkeepdims);
+      if (reduction_node_check) {
+        // insure that keepdims is set to false currently
+        return prev_node->i(kkeepdims) == 0 && node->is(kaxes) == prev_node->is(kaxes);
+      }
+    }
+    return false;
+  }
+  bool runTransform(Node* node, Graph&, NodeDestroyType& destroy_current)
+      override {
+    Node* reduction_op = node->input()->node();
+    // set keepdims flag to be true
+    reduction_op->i_(kkeepdims, 1);
+    // remove unnecessary unsqueeze
+    reduction_op->output()->setSizes(node->output()->sizes());
+    reduction_op->output()->setElemType(node->output()->elemType());
+    node->output()->replaceAllUsesWith(node->input());
+    destroy_current = NodeDestroyType::DestroyOne;
+    return true;
+  }
+};
+
+} // namespace optimization
+} // namespace ONNX_NAMESPACE
diff --git a/onnx/test/optimizer_test.py b/onnx/test/optimizer_test.py
@@ -1120,6 +1120,49 @@ def test_eliminate_nop_dropout(self):  # type: () -> None
                 assert len(optimized_model.graph.node) == 1
                 assert optimized_model.graph.node[0].op_type == "Log"
 
+    def test_fuse_reduction_unsqueeze(self):  # type: () -> None
+        def _calculate_post_transform_shape(input_shape, reduction_axes, unsqueeze_axes, keepdim):  # type: (Tuple[int, ...], List[int], List[int], bool) -> Tuple[int, ...]
+            post_reduce_shape = None
+            if keepdim:
+                post_reduce_shape = tuple([(x if i not in reduction_axes else 1) for i, x in enumerate(input_shape)])
+            else:
+                post_reduce_shape = tuple([x for i, x in enumerate(input_shape) if i not in reduction_axes])
+            post_unsqueeze_shape = list(post_reduce_shape)
+            for ax in unsqueeze_axes:
+                post_unsqueeze_shape.insert(ax, 1)
+            return tuple(post_unsqueeze_shape)
+
+        for reduction in ["ReduceL1", "ReduceL2", "ReduceLogSum",
+                          "ReduceLogSumExp", "ReduceMax", "ReduceMean",
+                          "ReduceMin", "ReduceProd", "ReduceSum", "ReduceSumSquare"]:
+            for axes1 in [[1], [1, 2], [2]]:
+                for axes2 in [[1], [1, 2], [2]]:
+                    for keepdim in [False, True]:
+                        input_shape = (5, 7, 9)
+                        output_shape = _calculate_post_transform_shape(input_shape, axes1, axes2, keepdim)  # type: Tuple[int, ...]
+                        node = helper.make_node(reduction, ["X"], ["Y"], axes=axes1, keepdims=keepdim)
+                        node1 = helper.make_node("Unsqueeze", ["Y"], ["Z"], axes=axes2)
+                        graph = helper.make_graph(
+                            [node, node1],
+                            "test",
+                            [helper.make_tensor_value_info(
+                                "X", TensorProto.FLOAT, input_shape)],
+                            [helper.make_tensor_value_info("Z", TensorProto.FLOAT, output_shape)])
+                        optimized_model = self._optimized(
+                            graph, ["fuse_consecutive_reduce_unsqueeze"], False)
+
+                        if keepdim or axes1 != axes2:
+                            assert optimized_model.graph == graph
+                        else:
+                            assert len(optimized_model.graph.output) == 1
+                            assert len(optimized_model.graph.node) == 1
+                            assert optimized_model.graph.output[0].type.tensor_type.elem_type == TensorProto.FLOAT
+                            assert optimized_model.graph.node[-1].op_type == reduction
+                            assert optimized_model.graph.node[-1].attribute[0].name == "axes"
+                            assert optimized_model.graph.node[-1].attribute[0].ints == axes1
+                            optimized_output_shape = tuple(x.dim_value for x in optimized_model.graph.output[0].type.tensor_type.shape.dim)
+                            assert optimized_output_shape == output_shape
+
 
 if __name__ == '__main__':
     unittest.main()