[quant][graphmode] Different rule for handling aten::cat (#38570)

jerryzh168 · facebook-github-bot · commit 1ef77f90454a · 2020-05-19T11:23:35.000-07:00
Summary: Pull Request resolved: #38570 We changed the rule of quantizing `aten::cat`, previously `aten::cat` is considered to be an op that should always be quantized, like `aten::conv2d`, but this is not ideal, a better way is to quantize the output of `aten::cat` depending on whether the input is quantized, if it is then we'll quantize the output, if not, then we will not quantize the output, since `aten::cat` works both on quantized and non-quantized tensor. Test Plan: Imported from OSS Differential Revision: D21600160 fbshipit-source-id: efa957e0eaa608fffefcdfefa7f442fab45605eb
diff --git a/test/quantization/test_quantize_script.py b/test/quantization/test_quantize_script.py
@@ -1383,13 +1383,12 @@ def forward(self, x):
                          " Quantized operations require FBGEMM. FBGEMM is only optimized for CPUs"
                          " with instruction set support avx2 or newer.")
     def test_quantized_cat(self):
-        """ Note that we to support the case that torch.cat is quantized
-        indepdently, we need to have an observer that works
-        for list of Tensors.
+        """ quantization of the output of cat will be depend on the
+        input of cat. we only quantize the output of cat when its inputs are quantized.
         """
-        class M(torch.nn.Module):
+        class QuantizedCat(torch.nn.Module):
             def __init__(self):
-                super(M, self).__init__()
+                super(QuantizedCat, self).__init__()
                 self.conv1 = torch.nn.Conv2d(1, 1, 1).float()
                 self.conv2 = torch.nn.Conv2d(1, 1, 1).float()
 
@@ -1398,7 +1397,15 @@ def forward(self, x, y):
                 y = self.conv2(y)
                 return torch.cat([x, y], 1)
 
-        m = torch.jit.script(M().eval())
+        class NonQuantizedCat(torch.nn.Module):
+            def __init__(self):
+                super(NonQuantizedCat, self).__init__()
+
+            def forward(self, x, y):
+                return torch.cat([x, y], 1)
+
+        # quantized cat
+        m = torch.jit.script(QuantizedCat()).eval()
         m = prepare_script(m, {'': default_qconfig}, True)
         # four for input and output of conv and one for output of cat
         # this also tests the ListConstruct can preserve the observed property so that
@@ -1410,7 +1417,20 @@ def forward(self, x, y):
 
         FileCheck().check_not("aten::cat") \
                    .check("quantized::cat") \
-                   .run(m.graph_for(data, data))
+                   .run(m.graph)
+
+        # non quantized cat
+        m = torch.jit.script(NonQuantizedCat()).eval()
+        m = prepare_script(m, {'': default_qconfig}, True)
+        assert len(attrs_with_prefix(m, '_observer_')) == 0
+        data = torch.randn(1, 1, 10, 10, dtype=torch.float)
+        m(data, data)
+        m = convert_script(m, True)
+
+        FileCheck().check_not("quantized::cat") \
+                   .check("aten::cat") \
+                   .run(m.graph)
+
 
     def test_qbatch_norm(self):
         class M(torch.nn.Module):
diff --git a/torch/csrc/jit/passes/quantization/helper.cpp b/torch/csrc/jit/passes/quantization/helper.cpp
@@ -32,7 +32,6 @@ std::vector<std::string> _static_quantizable_aten_funcs = {
     "matmul",
     "add_",
     "add",
-    "cat",
     "mul",
     "mul_",
     "hardswish",
@@ -181,6 +180,10 @@ CallFuncArgs _observe_inputs_call_func = {{"batch_norm", 1}};
 // Aten functions for getting tensor information
 std::vector<std::string> _tensor_info_funcs = {"size"};
 
+// Aten functions whose output will be quantized or not quantized depending
+// on input tensor
+std::vector<std::string> _propagate_quant_ops = {"cat"};
+
 // Check if `use` is an aten function of name `func_name` and if value
 // `v` is the nth argument (if provided) of the function.
 bool matchAtenFuncToUse(
@@ -350,6 +353,10 @@ bool isTensorInfoNode(Node* n) {
   return isAtenFunc(n, _tensor_info_funcs);
 }
 
+bool isPropagateQuantNode(Node* n) {
+  return isAtenFunc(n, _propagate_quant_ops);
+}
+
 c10::optional<std::tuple<c10::QScheme, QParamVector>> getFixedQParams(Node* n) {
   static std::vector<NodeKind> fixed_qparam_funcs;
   std::transform(
diff --git a/torch/csrc/jit/passes/quantization/helper.h b/torch/csrc/jit/passes/quantization/helper.h
@@ -43,6 +43,10 @@ TORCH_API bool isSingleInputGeneralAtenFunction(Node* n);
 // the input tensor is quantized or not, example: aten::size
 TORCH_API bool isTensorInfoNode(Node* n);
 
+// Check if this is the node that we'll quantize or not quantize depending on
+// whether the input of the node is quantized, example: aten::cat
+TORCH_API bool isPropagateQuantNode(Node* n);
+
 TORCH_API c10::optional<std::tuple<c10::QScheme, QParamVector>> getFixedQParams(
     Node* n);
 
diff --git a/torch/csrc/jit/passes/quantization/insert_observers.cpp b/torch/csrc/jit/passes/quantization/insert_observers.cpp
@@ -336,6 +336,12 @@ class InsertObserversHelper {
       Value* output,
       std::unordered_set<Value*>& block_observed_values);
 
+  bool shouldPropagateQuant(
+      Node* n, const std::unordered_set<Value*>& block_observed_values) {
+    return isObserved(n->input(0), block_observed_values);
+  }
+
+
   void delayObservingValuesInPattern(Graph& graph, const PatternInfo& pattern);
 
   void addValuesToDelayObservation(
@@ -732,7 +738,8 @@ bool InsertObserversHelper::valueNeedsToBeQuantized(Value* v) {
   // of the quantizable function.
   if (!is_dynamic_) {
     // Check whether producer is quantizable
-    if (mayRequireObservation(v) && nodeQuantizable(v->node())) {
+    if ((mayRequireObservation(v) && nodeQuantizable(v->node())) ||
+        isPropagateQuantNode(v->node())) {
       return true;
     }
   }
@@ -1026,7 +1033,13 @@ InsertObserversHelper::insertObserversFor(
           propagateObservedProperty(v, block_observed_values);
           if (!inputs_outputs.count(v) &&
               !isObserved(v, block_observed_values)) {
-            if (auto observer_opt = getObserverFor(v)) {
+            auto observer_opt = getObserverFor(v);
+            // If the node is one of the propagate quant node, e.g.
+            // aten::cat, we should observe its output only
+            // if the input of the node is observed
+            if (observer_opt &&
+                (!isPropagateQuantNode(n) ||
+                 shouldPropagateQuant(n, block_observed_values))) {
               recordObserved(
                   v, *observer_opt, values_to_observe, block_observed_values);
             }