[quant][graphmode] Add RemoveReduantDequantize pass (#38434)

jerryzh168 · facebook-github-bot · commit 6232481cabbb · 2020-05-15T15:01:40.000-07:00
Summary: Pull Request resolved: #38434 We insert dequantize for each use in order to produce quantization patterns that will later be fused, after that we should also remove extra dequantize node produced by this operation. Test Plan: Imported from OSS Differential Revision: D21597834 fbshipit-source-id: 18dfb2760bbb08932aa4e1d06f96cfc5fb37ed88
diff --git a/test/quantization/test_quantize_script.py b/test/quantization/test_quantize_script.py
@@ -27,9 +27,10 @@
 from torch.testing._internal.common_quantization import test_only_eval_fn as _test_only_eval_fn
 from torch.testing._internal.common_quantized import override_qengines
 
+from torch.testing._internal.common_quantization import QuantizationTestCase
+
 from torch.testing import FileCheck
 from torch.testing._internal.jit_utils import attrs_with_prefix
-from torch.testing._internal.jit_utils import JitTestCase
 from torch.testing._internal.jit_utils import get_forward
 from torch.testing._internal.jit_utils import get_forward_graph
 from torch.testing._internal.jit_utils import get_module_method
@@ -40,7 +41,7 @@
 import itertools
 import unittest
 
-class TestQuantizeScriptJitPasses(JitTestCase):
+class TestQuantizeScriptJitPasses(QuantizationTestCase):
     """ Test graph mode quantization passes used by quantize_script
     """
     def test_foldbn_trivial(self):
@@ -1015,6 +1016,21 @@ def forward(self, x):
                    .check("aten::dequantize") \
                    .run(model.graph)
 
+    def test_finalize_no_extra_dequantize(self):
+        class M(torch.nn.Module):
+            def __init__(self):
+                super(M, self).__init__()
+                self.conv = torch.nn.Conv2d(3, 3, 3).float()
+
+            def forward(self, x):
+                x = self.conv(x)
+                return x.size(0) * x
+
+        model = torch.jit.script(M()).eval()
+        model = quantize_script(model, {'': default_qconfig}, _test_only_eval_fn, [self.img_data])
+        FileCheck().check_not("aten::dequantize(") \
+                   .run(model.graph)
+
     def test_module_list(self):
         class SimpleLinearLayer(torch.nn.Module):
             def __init__(self):
@@ -1096,7 +1112,7 @@ def forward(self, x):
                    .check_not("aten::mul") \
                    .run(m.graph)
 
-class TestQuantizeScriptPTSQOps(JitTestCase):
+class TestQuantizeScriptPTSQOps(QuantizationTestCase):
     """ Test graph mode post training static quantization works
     for individual ops end to end.
     """
@@ -1737,7 +1753,7 @@ def forward(self, x):
                    .check("aten::dequantize(") \
                    .run(m2.graph)
 
-class TestQuantizeDynamicScript(JitTestCase):
+class TestQuantizeDynamicScript(QuantizationTestCase):
     def test_prepare_dynamic(self):
         class M(torch.nn.Module):
             def __init__(self):
diff --git a/torch/csrc/jit/passes/quantization/helper.cpp b/torch/csrc/jit/passes/quantization/helper.cpp
@@ -179,6 +179,9 @@ std::unordered_map<NodeKind, std::tuple<c10::QScheme, QParamVector>>
 AtenFuncArgs _observe_inputs_aten_func = {};
 CallFuncArgs _observe_inputs_call_func = {{"batch_norm", 1}};
 
+// Aten functions for getting tensor information
+std::vector<std::string> _tensor_info_funcs = {"size"};
+
 // Check if `use` is an aten function of name `func_name` and if value
 // `v` is the nth argument (if provided) of the function.
 bool matchAtenFuncToUse(
@@ -347,6 +350,10 @@ bool isSingleInputGeneralAtenFunction(Node* n) {
       isAtenFunc(n, fixed_qparams_aten_funcs);
 }
 
+bool isTensorInfoNode(Node* n) {
+  return isAtenFunc(n, _tensor_info_funcs);
+}
+
 c10::optional<std::tuple<c10::QScheme, QParamVector>> getFixedQParams(Node* n) {
   static std::vector<NodeKind> fixed_qparam_funcs;
   std::transform(
diff --git a/torch/csrc/jit/passes/quantization/helper.h b/torch/csrc/jit/passes/quantization/helper.h
@@ -39,6 +39,10 @@ TORCH_API bool isSingleInputGeneralCallFunction(Node* n);
 
 TORCH_API bool isSingleInputGeneralAtenFunction(Node* n);
 
+// Check if the node will produce the same result regardless of whether
+// the input tensor is quantized or not, example: aten::size
+TORCH_API bool isTensorInfoNode(Node* n);
+
 TORCH_API c10::optional<std::tuple<c10::QScheme, QParamVector>> getFixedQParams(
     Node* n);
 
diff --git a/torch/csrc/jit/passes/quantization/insert_quant_dequant.cpp b/torch/csrc/jit/passes/quantization/insert_quant_dequant.cpp
@@ -289,6 +289,30 @@ void ReplicateChooseQParamsQuantDequant(std::shared_ptr<Graph>& graph) {
   }
 }
 
+void RemoveRedundantDequantize(std::shared_ptr<Graph>& graph) {
+  const std::string dequantize = R"(
+    graph(%a_quant):
+        %a_dequant = aten::dequantize(%a_quant)
+        return (%a_dequant) )";
+  const std::string dequantize_replacement = R"(
+    graph(%a):
+        return (%a) )";
+  auto filter = [&](const Match& match,
+                    const std::unordered_map<std::string, Value*>& vmap) {
+    const auto& match_vmap = match.values_map;
+    auto dequant_node = match_vmap.at(vmap.at("a_dequant"))->node();
+    Value* dequant_out = dequant_node->output();
+    TORCH_CHECK(
+        dequant_out->uses().size() == 1,
+        "Expect dequant output to have single use");
+    Node* user = dequant_out->uses()[0].user;
+    return isTensorInfoNode(user);
+  };
+  SubgraphRewriter rewriter;
+  rewriter.RegisterRewritePattern(dequantize, dequantize_replacement);
+  rewriter.runOnGraph(graph, filter);
+}
+
 void RemoveRedundantQuantizationOps(std::shared_ptr<Graph>& graph) {
   const std::string dynamic_quant_ops = R"(
     graph(%a, %reduce_range, %a_dtype):
@@ -812,6 +836,7 @@ void InsertQuantDeQuantHelper::propagateQuantizationOps(Module& module) {
   RemoveRedundantQuantizationOps(graph);
   ReplicateQuant(graph);
   ReplicateDeQuant(graph);
+  RemoveRedundantDequantize(graph);
   PropagateQuantizationOps(graph);
 }
 
diff --git a/torch/testing/_internal/common_quantization.py b/torch/testing/_internal/common_quantization.py
@@ -137,7 +137,7 @@ def setUp(self):
         super().setUp()
         self.calib_data = [(torch.rand(2, 5, dtype=torch.float), torch.randint(0, 1, (2,), dtype=torch.long)) for _ in range(2)]
         self.train_data = [(torch.rand(2, 5, dtype=torch.float), torch.randint(0, 1, (2,), dtype=torch.long)) for _ in range(2)]
-        self.img_data = [(torch.rand(2, 3, 10, 10, dtype=torch.float), torch.randint(0, 1, (2,), dtype=torch.long))
+        self.img_data = [(torch.rand(1, 3, 10, 10, dtype=torch.float), torch.randint(0, 1, (1,), dtype=torch.long))
                          for _ in range(2)]
         self.img_data_1d = [(torch.rand(2, 3, 10, dtype=torch.float), torch.randint(0, 1, (1,), dtype=torch.long))
                             for _ in range(2)]