Skip to content

Commit 0216366

Browse files
ezyangfacebook-github-bot
authored andcommitted
Make use_c10_dispatcher: full mandatory for structured kernels (#49490)
Summary: Pull Request resolved: #49490 No reason to let people to do the legacy thing for the brand new kernel. This simplifies the codegen. I have to port the two structured kernels to this new format. Signed-off-by: Edward Z. Yang <ezyang@fb.com> Test Plan: Imported from OSS Reviewed By: smessmer Differential Revision: D25595406 Pulled By: ezyang fbshipit-source-id: b5931873379afdd0f3b00a012e0066af05de0a69
1 parent 6c833ef commit 0216366

10 files changed

Lines changed: 33 additions & 35 deletions

File tree

aten/src/ATen/native/BinaryOps.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ static Tensor wrapped_scalar_tensor(Scalar scalar) {
7171
}
7272

7373
TORCH_IMPL_FUNC(add_out) (
74-
Tensor& result, const Tensor& self, const Tensor& other, Scalar alpha
74+
const Tensor& self, const Tensor& other, Scalar alpha, Tensor& result
7575
) {
7676
add_stub(device_type(), *this, alpha);
7777
TORCH_INTERNAL_ASSERT(result.scalar_type() == output().dtype());

aten/src/ATen/native/UpSampleNearest1d.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -66,19 +66,21 @@ TORCH_META_FUNC(upsample_nearest1d_backward) (
6666
namespace native {
6767

6868
TORCH_IMPL_FUNC(upsample_nearest1d_out_cpu) (
69-
Tensor& output,
7069
const Tensor& input,
7170
IntArrayRef output_size,
72-
c10::optional<double> scales) {
71+
c10::optional<double> scales,
72+
Tensor& output
73+
) {
7374
upsample_nearest1d_kernel(kCPU, output, input, scales);
7475
}
7576

7677
TORCH_IMPL_FUNC(upsample_nearest1d_backward_out_cpu) (
77-
Tensor& grad_input,
7878
const Tensor& grad_output,
7979
IntArrayRef output_size,
8080
IntArrayRef input_size,
81-
c10::optional<double> scales) {
81+
c10::optional<double> scales,
82+
Tensor& grad_input
83+
) {
8284
grad_input.zero_();
8385
upsample_nearest1d_backward_kernel(kCPU, grad_input, grad_output, scales);
8486
}

aten/src/ATen/native/cuda/UpSampleNearest1d.cu

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -197,19 +197,21 @@ static void upsample_nearest1d_backward_out_cuda_template(
197197
} // namespace
198198

199199
TORCH_IMPL_FUNC(upsample_nearest1d_out_cuda) (
200-
Tensor& output,
201200
const Tensor& input,
202201
IntArrayRef output_size,
203-
c10::optional<double> scales) {
202+
c10::optional<double> scales,
203+
Tensor& output
204+
) {
204205
upsample_nearest1d_out_cuda_template(output, input, output_size, scales);
205206
}
206207

207208
TORCH_IMPL_FUNC(upsample_nearest1d_backward_out_cuda) (
208-
Tensor& grad_input,
209209
const Tensor& grad_output,
210210
IntArrayRef output_size,
211211
IntArrayRef input_size,
212-
c10::optional<double> scales) {
212+
c10::optional<double> scales,
213+
Tensor& grad_input
214+
) {
213215
upsample_nearest1d_backward_out_cuda_template(
214216
grad_input, grad_output, output_size, input_size, scales);
215217
}

aten/src/ATen/native/mkldnn/BinaryOps.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,11 @@ namespace at {
88
namespace native {
99

1010
Tensor& mkldnn_add_out(
11-
Tensor& result,
1211
const Tensor& self,
1312
const Tensor& other,
14-
Scalar alpha) {
13+
Scalar alpha,
14+
Tensor& result
15+
) {
1516
TORCH_CHECK(false, "mkldnn_add_out: ATen not compiled with MKLDNN support");
1617
}
1718

@@ -46,10 +47,11 @@ namespace at {
4647
namespace native {
4748

4849
Tensor& mkldnn_add_out(
49-
Tensor& result,
5050
const Tensor& self,
5151
const Tensor& other,
52-
Scalar alpha) {
52+
Scalar alpha,
53+
Tensor& result
54+
) {
5355
ideep::tensor& x = itensor_from_mkldnn(self);
5456
ideep::tensor& y = itensor_from_mkldnn(other);
5557

@@ -73,7 +75,7 @@ Tensor mkldnn_add(const Tensor& self, const Tensor& other, Scalar alpha) {
7375
}
7476

7577
Tensor& mkldnn_add_(Tensor& self, const Tensor& other, Scalar alpha) {
76-
return native::mkldnn_add_out(self, self, other, alpha);
78+
return native::mkldnn_add_out(self, other, alpha, self);
7779
}
7880

7981
Tensor& mkldnn_mul_out(Tensor& result, const Tensor& self, const Tensor& other) {

aten/src/ATen/native/native_functions.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -412,7 +412,7 @@
412412
MkldnnCPU: mkldnn_add_
413413

414414
- func: add.out(Tensor self, Tensor other, *, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
415-
use_c10_dispatcher: hacky_wrapper_for_legacy_signatures
415+
use_c10_dispatcher: full
416416
structured: True
417417
structured_inherits: TensorIteratorBase
418418
dispatch:
@@ -9439,7 +9439,7 @@
94399439
CUDA: upsample_trilinear3d_backward_cuda
94409440

94419441
- func: upsample_nearest1d.out(Tensor self, int[1] output_size, float? scales=None, *, Tensor(a!) out) -> Tensor(a!)
9442-
use_c10_dispatcher: hacky_wrapper_for_legacy_signatures
9442+
use_c10_dispatcher: full
94439443
python_module: nn
94449444
structured: True
94459445
dispatch:
@@ -9452,7 +9452,7 @@
94529452
structured_delegate: upsample_nearest1d.out
94539453

94549454
- func: upsample_nearest1d_backward.grad_input(Tensor grad_output, int[1] output_size, int[3] input_size, float? scales=None, *, Tensor(a!) grad_input) -> Tensor(a!)
9455-
use_c10_dispatcher: hacky_wrapper_for_legacy_signatures
9455+
use_c10_dispatcher: full
94569456
python_module: nn
94579457
structured: True
94589458
dispatch:

aten/src/ATen/native/sparse/SparseTensorMath.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -544,7 +544,7 @@ SparseTensor& add_out_sparse_non_contiguous(SparseTensor& r, const SparseTensor&
544544

545545
Tensor& add_out_dense_sparse_cpu(Tensor& r, const Tensor& dense, const SparseTensor& sparse_, Scalar value);
546546

547-
SparseTensor& add_out_sparse_cpu(SparseTensor& r, const SparseTensor& t, const SparseTensor& src, Scalar value) {
547+
SparseTensor& add_out_sparse_cpu(const SparseTensor& t, const SparseTensor& src, Scalar value, SparseTensor& r) {
548548
if (!t.is_sparse()) {
549549
return add_out_dense_sparse_cpu(r, t, src, value);
550550
}

aten/src/ATen/native/sparse/cuda/SparseCUDATensorMath.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -399,7 +399,7 @@ Tensor& add_out_dense_sparse_cuda(Tensor& r_, const Tensor& dense, const SparseT
399399

400400
Tensor& add_out_dense_sparse_cuda(Tensor& r, const Tensor& dense, const SparseTensor& sparse_, Scalar value);
401401

402-
SparseTensor& add_out_sparse_cuda(SparseTensor& r_, const SparseTensor& t, const SparseTensor& src, Scalar value) {
402+
SparseTensor& add_out_sparse_cuda(const SparseTensor& t, const SparseTensor& src, Scalar value, SparseTensor& r_) {
403403
if (!t.is_sparse()) {
404404
return add_out_dense_sparse_cuda(r_, t, src, value);
405405
}

tools/codegen/gen.py

Lines changed: 4 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -432,7 +432,7 @@ def gen_one(f: NativeFunction) -> Optional[str]:
432432
if self.dispatch_key == 'Meta':
433433
impl_call = ""
434434
else:
435-
impl_call = f"op.impl({out_expr}, {functional_exprs});"
435+
impl_call = f"op.impl({functional_exprs}, {out_expr});"
436436

437437
# For an overview of what this template code looks like, see
438438
# https://github.com/pytorch/rfcs/pull/9
@@ -455,19 +455,8 @@ def gen_one(f: NativeFunction) -> Optional[str]:
455455
elif self.target is Target.REGISTRATION:
456456
dispatcher_sig = DispatcherSignature.from_schema(f.func)
457457

458-
if local.use_c10_dispatcher() is UseC10Dispatcher.full:
459-
payload = f"TORCH_FN({sig.name()})"
460-
elif local.use_c10_dispatcher() is UseC10Dispatcher.hacky_wrapper_for_legacy_signatures:
461-
payload = f"""
462-
c10::impl::hacky_wrapper_for_legacy_signatures<
463-
{dispatcher_sig.type()},
464-
{len(f.func.arguments.out)}
465-
>(TORCH_FN({sig.name()}))
466-
"""
467-
else:
468-
assert local.use_c10_dispatcher() is UseC10Dispatcher.with_codegenerated_unboxing_wrapper
469-
payload = f"torch::CppFunction::makeUnboxedOnly(&{sig.name()})"
470-
return f'm.impl("{f.func.name}", {payload});'
458+
assert local.use_c10_dispatcher() is UseC10Dispatcher.full
459+
return f'm.impl("{f.func.name}", TORCH_FN({sig.name()}));'
471460
else:
472461
assert_never(self.target)
473462
# Silence mypy's "Missing return statement" error
@@ -760,7 +749,7 @@ def compute_meta_function_declaration(g: StructuredNativeFunctions) -> str:
760749
sig = g.signature()
761750
name = meta.name(g)
762751
args = native.arguments(sig)
763-
args_str = ', '.join(a.defn() for a in args)
752+
args_str = ', '.join(a.decl() for a in args)
764753
parent_class = g.out.structured_inherits
765754
if parent_class is None:
766755
parent_class = "at::impl::MetaBase"

tools/codegen/model.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,9 @@ def __post_init__(self) -> None:
306306
if a.default is not None}
307307
invalid_args = set.difference(self.cpp_no_default_args, defaulted_arguments)
308308
assert len(invalid_args) == 0, f'Invalid cpp_no_default_args: {invalid_args}'
309+
if self.structured or self.structured_delegate:
310+
assert self.use_c10_dispatcher is UseC10Dispatcher.full, \
311+
"Structured kernels MUST be use_c10_dispatcher: full; port your argument order"
309312

310313
SchemaKind = Enum('SchemaKind', ('functional', 'inplace', 'out'))
311314

torch/csrc/jit/runtime/static/ops.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::add, aten_add, [](Node* n) -> SROperator {
8888
auto out_t = p_node->Output(0, reg).toTensor();
8989
static_add op{out_t};
9090
op.meta(in0_t, in1_t, in2_s);
91-
op.impl(out_t, in0_t, in1_t, in2_s);
91+
op.impl(in0_t, in1_t, in2_s, out_t);
9292
};
9393
});
9494

0 commit comments

Comments
 (0)