[ONNX] fix export of embedding with padding_idx (#53053) (#53530)

BowenBao · facebook-github-bot · commit ee4ce8e9d9a4 · 2021-03-12T02:49:46.000-08:00
Summary: Pull Request resolved: #53530 fix export of embedding with padding_idx Test Plan: Imported from OSS Reviewed By: navahgar, jamesr66a, malfet Differential Revision: D26922420 Pulled By: SplitInfinity fbshipit-source-id: b8b867a96a13cf810f9c0ae88fcc5c95072bb390
diff --git a/test/onnx/test_pytorch_onnx_caffe2.py b/test/onnx/test_pytorch_onnx_caffe2.py
@@ -809,6 +809,7 @@ def forward(self, x):
         x = torch.randn(2, 3, 4)
         self.run_model_test(ArithmeticModule(), input=x, train=False, batch_size=BATCH_SIZE)
 
+    @skipIfUnsupportedMinOpsetVersion(9)  # Where op not supported for lower opsets
     def test_embedding(self):
         model = nn.Embedding(10, 3, padding_idx=-1)
         input = torch.LongTensor(list(range(10))[::-1])
diff --git a/test/onnx/test_pytorch_onnx_onnxruntime.py b/test/onnx/test_pytorch_onnx_onnxruntime.py
@@ -6077,6 +6077,23 @@ def run_model():
 
         self.assertRaises(TypeError, run_model)
 
+    @skipIfUnsupportedMinOpsetVersion(9)
+    def test_embedding(self):
+        class EmbedModel(torch.nn.Module):
+            def forward(self, input, emb):
+                return torch.nn.functional.embedding(input, emb, padding_idx=1)
+
+        model = EmbedModel()
+        x = torch.randint(4, (4, ))
+        x[2] = x[0] = 1
+        embedding_matrix = torch.rand(10, 3)
+        self.run_test(model, (x, embedding_matrix))
+
+        x = torch.randint(4, (4, 3, 2))
+        x[2] = 1
+        x[0][1] = 1
+        self.run_test(model, (x, embedding_matrix))
+
     def _dispatch_rnn_test(self, name, *args, **kwargs):
         if name == 'elman':
             self._elman_rnn_test(*args, **kwargs)
diff --git a/torch/onnx/symbolic_opset9.py b/torch/onnx/symbolic_opset9.py
@@ -471,8 +471,27 @@ def expand_as(g, self, other):
     return g.op("Expand", self, shape)
 
 
+@parse_args('v', 'v', 'i', 'b', 'v')
 def embedding(g, weight, indices, padding_idx, scale_grad_by_freq, sparse):
-    return g.op("Gather", weight, indices)
+    if scale_grad_by_freq and sym_help._training_mode:
+        raise RuntimeError('Unsupported: ONNX export of embedding with scale_grad_by_freq=True '
+                           'for training mode. ONNX does not support scaling the gradients.')
+    # To match the torch operator behavior for padding_idx:
+    # if (padding_idx >= 0) {
+    #   embedding.masked_fill_((indices == padding_idx).reshape({-1, 1}), 0);
+    # }
+    # auto out = weight.index_select(0, indices.reshape(-1));
+    # zerofill_padding(out);
+    # return out.view(size);
+    weight = g.op("Gather", weight, indices)
+    if (padding_idx >= 0):
+        mask = eq(g, indices, g.op("Constant", value_t=torch.tensor(padding_idx)))
+        if sym_help._export_onnx_opset_version < 11:
+            mask = unsqueeze(g, mask, -1)
+        else:
+            mask = sym_help._unsqueeze_helper(g, mask, [-1])
+        weight = masked_fill(g, weight, mask, torch.tensor(0.))
+    return weight
 
 
 @parse_args('v', 'v', 'v', 'i', 'i', 'i', 'v', 'i')