[quant][graphmode] quantization support for aten::add (#34572)

jerryzh168 · facebook-github-bot · commit eff68bc87236 · 2020-03-23T17:52:28.000-07:00
Summary: Pull Request resolved: #34572 Test Plan: python test/test_jit.py Imported from OSS Differential Revision: D20519607 fbshipit-source-id: c57e062cffc24a47a76b73b58aff7f9ef80183fa
diff --git a/test/test_jit.py b/test/test_jit.py
@@ -1187,8 +1187,8 @@ def forward(self, x, y, weight):
         m = torch.jit.script(M()).eval()
         qconfig_dict = {'' : script_qconfig(default_qconfig)}
         m = prepare_script(m, qconfig_dict, False)
-        # 3 for x, y, weight, one for output of each F.conv2d
-        assert len(attrs_with_prefix(m, '_observer')) == 5
+        # 3 for x, y, weight, one for output of each F.conv2d and one for output of add
+        assert len(attrs_with_prefix(m, '_observer')) == 6
 
     def test_insert_observers_shared_class_type(self):
         class M(torch.nn.Module):
@@ -1404,7 +1404,7 @@ def forward(self, x, w0, w1, w2):
 
         # we just check we have one dequant on every op input, even input
         # is sharded as multi uses
-        FileCheck().check_count("aten::dequantize", 8, exactly=True) \
+        FileCheck().check_count("aten::dequantize", 9, exactly=True) \
                    .run(str(get_forward_graph(m._c)))
 
     def test_insert_quant_dequant_shared_class_type(self):
@@ -1568,6 +1568,33 @@ def forward(self, x):
                    .check("quantized::conv2d_relu") \
                    .run(m.graph_for(data))
 
+    def test_quantized_add_fusion(self):
+        class Add(torch.nn.Module):
+            def __init__(self):
+                super(Add, self).__init__()
+
+            def forward(self, x, y):
+                return x + y
+
+        class InplaceAdd(torch.nn.Module):
+            def __init__(self):
+                super(InplaceAdd, self).__init__()
+
+            def forward(self, x, y):
+                x += y
+                return x
+
+        for M in [Add, InplaceAdd]:
+            m = torch.jit.script(M()).eval()
+            m = prepare_script(m, {'': script_qconfig(default_qconfig)}, True)
+            data = torch.randn(1, 1, 10, 10, dtype=torch.float)
+            m(data, data)
+            m = convert_script(m, True)
+            FileCheck().check_not("aten::add") \
+                       .check_not("aten::add_") \
+                       .check("quantized::add") \
+                       .run(m.graph_for(data, data))
+
     def test_quantized_add_relu_fusion(self):
         class M(torch.nn.Module):
             def __init__(self, inplace):
diff --git a/torch/csrc/jit/passes/quantization.cpp b/torch/csrc/jit/passes/quantization.cpp
@@ -176,7 +176,8 @@ bool nodeQuantizable(Node* n) {
       "relu",
       "addmm",
       "matmul",
-      "add_"
+      "add_",
+      "add",
     });
 }
 
diff --git a/torch/csrc/jit/passes/quantization_patterns.h b/torch/csrc/jit/passes/quantization_patterns.h
@@ -127,6 +127,31 @@ graph(%packed_params, %a_quant, %r_scale, %r_zero_point, %r_dtype):
         %r = quantized::linear(%a_quant, %packed_params, %r_scale, %r_zero_point)
         return (%r) )";
 
+  std::string add = R"(
+graph(%a_quant, %b_quant, %alpha, %scale, %zero_point, %dtype):
+         %a_dequant = aten::dequantize(%a_quant)
+         %b_dequant = aten::dequantize(%b_quant)
+         %r_add = aten::add(%a_dequant, %b_dequant, %alpha)
+         %r = aten::quantize_per_tensor(%r_add, %scale, %zero_point, %dtype)
+         return (%r) )";
+
+  // TODO: add %dtype after when https://github.com/pytorch/pytorch/issues/34351
+  // is fixed
+  // TODO: add filter for %alpha
+  std::string quantized_add = R"(
+graph(%a_quant, %b_quant, %alpha, %scale, %zero_point, %dtype):
+         %r_add = quantized::add(%a_quant, %b_quant, %scale, %zero_point)
+         return (%r_add) )";
+
+  std::string inplace_add = R"(
+graph(%a_quant, %b_quant, %alpha, %scale, %zero_point, %dtype):
+         %a_dequant = aten::dequantize(%a_quant)
+         %b_dequant = aten::dequantize(%b_quant)
+         %r_add = aten::add_(%a_dequant, %b_dequant, %alpha)
+         %r = aten::quantize_per_tensor(%r_add, %scale, %zero_point, %dtype)
+         return (%r) )";
+  // We don't have quantized inplace add right now
+
   return {
     {conv2d, quantized_conv2d},
     {conv2d_relu, quantized_conv2d_relu},
@@ -137,6 +162,8 @@ graph(%packed_params, %a_quant, %r_scale, %r_zero_point, %r_dtype):
     {aten_linear, quantized_aten_linear},
     {add_relu, quantized_add_relu},
     {add_inplace_relu, quantized_add_relu},
+    {add, quantized_add},
+    {inplace_add, quantized_add},
   };
 
 }

Original file line number	Diff line number	Diff line change
`@@ -176,7 +176,8 @@ bool nodeQuantizable(Node* n) {`
`176`	`176`	`"relu",`
`177`	`177`	`"addmm",`
`178`	`178`	`"matmul",`
`179`		`- "add_"`
	`179`	`+ "add_",`
	`180`	`+ "add",`
`180`	`181`	`});`
`181`	`182`	`}`
`182`	`183`