[NNC] Added some more external function bindings (#53420)

Chillee · facebook-github-bot · commit 067ad3121094 · 2021-03-08T14:18:30.000-08:00
Summary: Fixes #{issue number} Pull Request resolved: #53420 Reviewed By: navahgar Differential Revision: D26876784 Pulled By: Chillee fbshipit-source-id: 05e7c782a72de5159879f88a104f1a273e0345eb
diff --git a/test/cpp/tensorexpr/test_external_calls.cpp b/test/cpp/tensorexpr/test_external_calls.cpp
@@ -191,50 +191,157 @@ TEST(ExternalCall, Conv2d_nobias_noargs) {
   ASSERT_TRUE(at::allclose(nnc_result, ref));
 }
 
-TEST(ExternalCall, Matmul) {
+TEST(ExternalCall, BinaryFloat) {
   KernelScope kernel_scope;
-  Placeholder A("A", kFloat, {10, 3, 100, 200});
-  Placeholder B("", kFloat, {10, 3, 200, 300});
-  BufHandle ResultBuf("Result", {10, 3, 100, 300}, kFloat);
+  using TensorFunc = std::function<at::Tensor(at::Tensor, at::Tensor)>;
+  using Test = std::tuple<
+      std::vector<int64_t>,
+      std::vector<int64_t>,
+      std::vector<int64_t>,
+      TensorFunc,
+      std::string>;
+  std::vector<Test> tests = {};
+  tests.push_back(
+      Test{{100, 200}, {200, 300}, {100, 300}, at::matmul, "nnc_aten_matmul"});
+  tests.push_back(Test{{100, 300}, {300}, {100}, at::mv, "nnc_aten_mv"});
+  tests.push_back(
+      Test{{100, 200}, {200, 300}, {100, 300}, at::mm, "nnc_aten_mm"});
+  for (auto curTest : tests) {
+    std::vector<int64_t> aShape, bShape, resShape;
+    TensorFunc torchFunc;
+    std::string externCallName;
+    std::tie(aShape, bShape, resShape, torchFunc, externCallName) = curTest;
+    auto toExprHandleVec = [](std::vector<int64_t> v) {
+      auto intV = std::vector<int>(v.begin(), v.end());
+      return std::vector<ExprHandle>(intV.begin(), intV.end());
+    };
+    Placeholder A("A", kFloat, toExprHandleVec(aShape));
+    Placeholder B("", kFloat, toExprHandleVec(bShape));
+    BufHandle ResultBuf("Result", toExprHandleVec(resShape), kFloat);
+
+    Tensor* Result = new Tensor(
+        ResultBuf.node(),
+        ExternalCall::make(
+            ResultBuf,
+            externCallName,
+            {BufHandle(A.data()), BufHandle(B.data())},
+            {}));
+    LoopNest l({Result});
+    l.prepareForCodegen();
+    l.simplify();
+
+    auto options = at::TensorOptions()
+                       .dtype(at::kFloat)
+                       .layout(at::kStrided)
+                       .device(at::kCPU)
+                       .requires_grad(false);
+    at::Tensor a = at::ones(c10::IntArrayRef(aShape), options) * 5.f;
+    at::Tensor b = at::ones(c10::IntArrayRef(bShape), options) * 6.f;
+    at::Tensor ref = torchFunc(a, b);
+
+    auto prod = [](std::vector<int64_t> v) {
+      return std::accumulate(v.begin(), v.end(), 1, std::multiplies<int64_t>());
+    };
+
+    at::Tensor nnc_result;
+    std::vector<float> a_buf(prod(aShape), 5.f);
+    std::vector<float> b_buf(prod(bShape), 6.f);
+    std::vector<float> result_buf(prod(resShape), -1.f);
 
-  Tensor* Result = new Tensor(
-      ResultBuf.node(),
-      ExternalCall::make(
-          ResultBuf,
-          "nnc_aten_matmul",
-          {BufHandle(A.data()), BufHandle(B.data())},
-          {}));
-  LoopNest l({Result});
-  l.prepareForCodegen();
-  l.simplify();
+#ifdef TORCH_ENABLE_LLVM
+    LLVMCodeGen llvm_codegen(l.root_stmt(), {A, B, Result});
 
-  auto options = at::TensorOptions()
-                     .dtype(at::kFloat)
-                     .layout(at::kStrided)
-                     .device(at::kCPU)
-                     .requires_grad(false);
-  at::Tensor a = at::ones({10, 3, 100, 200}, options) * 5.f;
-  at::Tensor b = at::ones({10, 3, 200, 300}, options) * 6.f;
-  at::Tensor ref = at::matmul(a, b);
+    llvm_codegen.call({a_buf, b_buf, result_buf});
+    nnc_result =
+        at::from_blob(result_buf.data(), c10::IntArrayRef(resShape), options);
+    ASSERT_TRUE(at::allclose(nnc_result, ref));
+#endif
 
-  at::Tensor nnc_result;
-  std::vector<float> a_buf(10 * 3 * 100 * 200, 5.f);
-  std::vector<float> b_buf(10 * 3 * 200 * 300, 6.f);
-  std::vector<float> result_buf(10 * 3 * 100 * 300, -1.f);
+    SimpleIREvaluator ir_eval(l.root_stmt(), {A, B, Result});
+    ir_eval.call({a_buf, b_buf, result_buf});
+    nnc_result =
+        at::from_blob(result_buf.data(), c10::IntArrayRef(resShape), options);
+    ASSERT_TRUE(at::allclose(nnc_result, ref));
+  }
+}
+
+TEST(ExternalCall, UnaryFloat) {
+  KernelScope kernel_scope;
+  using TensorFunc = std::function<at::Tensor(at::Tensor)>;
+  auto toExprHandleVec = [](std::vector<int64_t> v) {
+    auto intV = std::vector<int>(v.begin(), v.end());
+    return std::vector<ExprHandle>(intV.begin(), intV.end());
+  };
+  using Test = std::tuple<
+      std::vector<int64_t>,
+      std::vector<int64_t>,
+      TensorFunc,
+      std::string,
+      std::vector<ExprHandle>>;
+  std::vector<Test> tests = {};
+  tests.push_back(Test{
+      {1, 64, 8, 9},
+      {1, 64, 5, 7},
+      [](at::Tensor x) {
+        return at::adaptive_avg_pool2d(x, {5, 7});
+      },
+      "nnc_aten_adaptive_avg_pool2d",
+      toExprHandleVec({5, 7})});
+  tests.push_back(Test{
+      {100, 200},
+      {100},
+      [](at::Tensor x) { return at::mean(x, {1}); },
+      "nnc_aten_mean",
+      toExprHandleVec({1})});
+  for (auto curTest : tests) {
+    std::vector<int64_t> aShape, resShape;
+    TensorFunc torchFunc;
+    std::string externCallName;
+    std::vector<ExprHandle> externCallArgs;
+    std::tie(aShape, resShape, torchFunc, externCallName, externCallArgs) =
+        curTest;
+    Placeholder A("A", kFloat, toExprHandleVec(aShape));
+    BufHandle ResultBuf("Result", toExprHandleVec(resShape), kFloat);
+
+    Tensor* Result = new Tensor(
+        ResultBuf.node(),
+        ExternalCall::make(
+            ResultBuf, externCallName, {BufHandle(A.data())}, externCallArgs));
+    LoopNest l({Result});
+    l.prepareForCodegen();
+    l.simplify();
+
+    auto options = at::TensorOptions()
+                       .dtype(at::kFloat)
+                       .layout(at::kStrided)
+                       .device(at::kCPU)
+                       .requires_grad(false);
+    at::Tensor a = at::ones(c10::IntArrayRef(aShape), options) * 5.f;
+    at::Tensor ref = torchFunc(a);
+
+    auto prod = [](std::vector<int64_t> v) {
+      return std::accumulate(v.begin(), v.end(), 1, std::multiplies<int64_t>());
+    };
+
+    at::Tensor nnc_result;
+    std::vector<float> a_buf(prod(aShape), 5.f);
+    std::vector<float> result_buf(prod(resShape), -1.f);
 
 #ifdef TORCH_ENABLE_LLVM
-  LLVMCodeGen llvm_codegen(l.root_stmt(), {A, B, Result});
+    LLVMCodeGen llvm_codegen(l.root_stmt(), {A, Result});
 
-  llvm_codegen.call({a_buf, b_buf, result_buf});
-  nnc_result = at::from_blob(result_buf.data(), {10, 3, 100, 300}, options);
-  ASSERT_TRUE(at::allclose(nnc_result, ref));
+    llvm_codegen.call({a_buf, result_buf});
+    nnc_result =
+        at::from_blob(result_buf.data(), c10::IntArrayRef(resShape), options);
+    ASSERT_TRUE(at::allclose(nnc_result, ref));
 #endif
 
-  SimpleIREvaluator ir_eval(l.root_stmt(), {A, B, Result});
-
-  ir_eval.call({a_buf, b_buf, result_buf});
-  nnc_result = at::from_blob(result_buf.data(), {10, 3, 100, 300}, options);
-  ASSERT_TRUE(at::allclose(nnc_result, ref));
+    SimpleIREvaluator ir_eval(l.root_stmt(), {A, Result});
+    ir_eval.call({a_buf, result_buf});
+    nnc_result =
+        at::from_blob(result_buf.data(), c10::IntArrayRef(resShape), options);
+    ASSERT_TRUE(at::allclose(nnc_result, ref));
+  }
 }
 
 TEST(ExternalCall, ComputeInterop) {
diff --git a/torch/csrc/jit/tensorexpr/external_functions.cpp b/torch/csrc/jit/tensorexpr/external_functions.cpp
@@ -111,12 +111,101 @@ void nnc_aten_matmul(
   }
 }
 
-static RegisterNNCExternalFunction nnc_conv2d(
+void nnc_aten_mv(
+    int64_t bufs_num,
+    void** buf_data,
+    int64_t* buf_ranks,
+    int64_t* buf_dims,
+    int8_t* buf_dtypes,
+    int64_t args_num,
+    int64_t* extra_args) {
+  std::vector<at::Tensor> tensors =
+      constructTensors(bufs_num, buf_data, buf_ranks, buf_dims, buf_dtypes);
+
+  at::Tensor& r = tensors[0];
+  const at::Tensor& x = tensors[1];
+  const at::Tensor& w = tensors[2];
+  try {
+    at::mv_out(r, x, w);
+  } catch (...) {
+  }
+}
+
+void nnc_aten_mm(
+    int64_t bufs_num,
+    void** buf_data,
+    int64_t* buf_ranks,
+    int64_t* buf_dims,
+    int8_t* buf_dtypes,
+    int64_t args_num,
+    int64_t* extra_args) {
+  std::vector<at::Tensor> tensors =
+      constructTensors(bufs_num, buf_data, buf_ranks, buf_dims, buf_dtypes);
+
+  at::Tensor& r = tensors[0];
+  const at::Tensor& x = tensors[1];
+  const at::Tensor& w = tensors[2];
+  try {
+    at::mm_out(r, x, w);
+  } catch (...) {
+  }
+}
+
+void nnc_aten_adaptive_avg_pool2d(
+    int64_t bufs_num,
+    void** buf_data,
+    int64_t* buf_ranks,
+    int64_t* buf_dims,
+    int8_t* buf_dtypes,
+    int64_t args_num,
+    int64_t* extra_args) {
+  std::vector<at::Tensor> tensors =
+      constructTensors(bufs_num, buf_data, buf_ranks, buf_dims, buf_dtypes);
+
+  at::Tensor& r = tensors[0];
+  const at::Tensor& x = tensors[1];
+  int64_t H = extra_args[0];
+  int64_t W = extra_args[1];
+  try {
+    at::adaptive_avg_pool2d_out(r, x, {H, W});
+  } catch (...) {
+  }
+}
+
+void nnc_aten_mean(
+    int64_t bufs_num,
+    void** buf_data,
+    int64_t* buf_ranks,
+    int64_t* buf_dims,
+    int8_t* buf_dtypes,
+    int64_t args_num,
+    int64_t* extra_args) {
+  std::vector<at::Tensor> tensors =
+      constructTensors(bufs_num, buf_data, buf_ranks, buf_dims, buf_dtypes);
+
+  at::Tensor& r = tensors[0];
+  const at::Tensor& x = tensors[1];
+  int64_t dim = extra_args[0];
+  try {
+    at::mean_out(r, x, {dim});
+  } catch (...) {
+  }
+}
+
+const static RegisterNNCExternalFunction nnc_conv2d(
     "nnc_aten_conv2d",
     nnc_aten_conv2d);
-static RegisterNNCExternalFunction nnc_matmul(
+const static RegisterNNCExternalFunction nnc_matmul(
     "nnc_aten_matmul",
     nnc_aten_matmul);
+const static RegisterNNCExternalFunction nnc_mv("nnc_aten_mv", nnc_aten_mv);
+const static RegisterNNCExternalFunction nnc_mm("nnc_aten_mm", nnc_aten_mm);
+const static RegisterNNCExternalFunction nnc_adaptive_avg_pool2d(
+    "nnc_aten_adaptive_avg_pool2d",
+    nnc_aten_adaptive_avg_pool2d);
+const static RegisterNNCExternalFunction nnc_mean(
+    "nnc_aten_mean",
+    nnc_aten_mean);
 
 } // namespace tensorexpr
 } // namespace jit