Skip to content

Commit 288c97d

Browse files
author
Mikhail Zolotukhin
committed
Update on "[TensorExpr] Benchmarks: set up profiling executor and fuser according to the given arguments."
Differential Revision: [D21525741](https://our.internmc.facebook.com/intern/diff/D21525741) [ghstack-poisoned]
2 parents d8b3b2c + cfe3c79 commit 288c97d

83 files changed

Lines changed: 814 additions & 1008 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.circleci/config.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -509,7 +509,7 @@ jobs:
509509
if [ "${BUILD_ENVIRONMENT}" == "pytorch-linux-bionic-py3.6-clang9-test" ]; then
510510
return 0
511511
fi
512-
if [ "${BUILD_ENVIRONMENT}" == "pytorch-linux-xenial-py3.6-gcc-5.4-test" ]; then
512+
if [ "${BUILD_ENVIRONMENT}" == "pytorch-linux-xenial-py3.6-gcc5.4-test" ]; then
513513
return 0
514514
fi
515515
return 1

.circleci/verbatim-sources/pytorch-job-specs.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ jobs:
114114
if [ "${BUILD_ENVIRONMENT}" == "pytorch-linux-bionic-py3.6-clang9-test" ]; then
115115
return 0
116116
fi
117-
if [ "${BUILD_ENVIRONMENT}" == "pytorch-linux-xenial-py3.6-gcc-5.4-test" ]; then
117+
if [ "${BUILD_ENVIRONMENT}" == "pytorch-linux-xenial-py3.6-gcc5.4-test" ]; then
118118
return 0
119119
fi
120120
return 1

aten/src/ATen/AccumulateType.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ template <> struct AccumulateType<bool, true> {using type = bool; };
3737
template <> struct AccumulateType<BFloat16, false> { using type = float; };
3838
template <> struct AccumulateType<std::complex<float>, false> { using type = std::complex<double>; };
3939
template <> struct AccumulateType<std::complex<double>, false> { using type = std::complex<double>; };
40+
template <> struct AccumulateType<c10::complex<float>, false> { using type = c10::complex<double>; };
41+
template <> struct AccumulateType<c10::complex<double>, false> { using type = c10::complex<double>; };
4042
template <> struct AccumulateType<float, false> { using type = double; };
4143
template <> struct AccumulateType<double, false> { using type = double; };
4244
template <> struct AccumulateType<int8_t, false> { using type = int64_t; };

aten/src/ATen/Declarations.cwrap

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -348,20 +348,6 @@
348348
output: True
349349
- THTensor* self
350350
]]
351-
[[
352-
name: _th_atan
353-
cname: atan
354-
types:
355-
- floating_point
356-
backends:
357-
- CUDA
358-
variants: function
359-
return: argument 0
360-
arguments:
361-
- arg: THTensor* result
362-
output: True
363-
- THTensor* self
364-
]]
365351
[[
366352
name: _th_erfc
367353
cname: erfc

aten/src/ATen/core/DistributionsHelper.h

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -57,9 +57,9 @@ struct uniform_int_from_to_distribution {
5757
std::is_same<T, float>::value ||
5858
std::is_same<T, at::BFloat16>::value) && range_ >= 1ULL << 32)
5959
{
60-
return uniform_int_from_to_transformation<T>(generator->random64(), range_, base_);
60+
return transformation::uniform_int_from_to<T>(generator->random64(), range_, base_);
6161
} else {
62-
return uniform_int_from_to_transformation<T>(generator->random(), range_, base_);
62+
return transformation::uniform_int_from_to<T>(generator->random(), range_, base_);
6363
}
6464
}
6565

@@ -76,7 +76,7 @@ struct uniform_int_full_range_distribution {
7676

7777
template <typename RNG>
7878
C10_HOST_DEVICE inline T operator()(RNG generator) {
79-
return uniform_int_full_range_transformation<T>(generator->random64());
79+
return transformation::uniform_int_full_range<T>(generator->random64());
8080
}
8181

8282
};
@@ -91,9 +91,9 @@ struct uniform_int_distribution {
9191
template <typename RNG>
9292
C10_HOST_DEVICE inline T operator()(RNG generator) {
9393
if (std::is_same<T, double>::value || std::is_same<T, int64_t>::value) {
94-
return uniform_int_transformation<T>(generator->random64());
94+
return transformation::uniform_int<T>(generator->random64());
9595
} else {
96-
return uniform_int_transformation<T>(generator->random());
96+
return transformation::uniform_int<T>(generator->random());
9797
}
9898
}
9999

@@ -115,9 +115,9 @@ struct uniform_real_distribution {
115115
template <typename RNG>
116116
C10_HOST_DEVICE inline dist_acctype<T> operator()(RNG generator){
117117
if(std::is_same<T, double>::value) {
118-
return uniform_real_transformation<T>(generator->random64(), from_, to_);
118+
return transformation::uniform_real<T>(generator->random64(), from_, to_);
119119
} else {
120-
return uniform_real_transformation<T>(generator->random(), from_, to_);
120+
return transformation::uniform_real<T>(generator->random(), from_, to_);
121121
}
122122
}
123123

aten/src/ATen/core/TransformationHelper.h

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,29 +25,31 @@ template <> struct DistAccumType<double> { using type = double; };
2525
template <typename T>
2626
using dist_acctype = typename DistAccumType<T>::type;
2727

28+
namespace transformation {
29+
2830
/**
2931
* A transformation function for `torch.Tensor.random_()`, when both `from` and `to` are specified.
3032
* `range` is `to - from`
3133
* `base` is `from`
3234
*/
3335
template <typename T, typename V>
34-
C10_HOST_DEVICE inline T uniform_int_from_to_transformation(V val, uint64_t range, int64_t base) {
36+
C10_HOST_DEVICE inline T uniform_int_from_to(V val, uint64_t range, int64_t base) {
3537
return static_cast<T>(static_cast<int64_t>((val % range) + base));
3638
}
3739

3840
/**
3941
* A transformation function for `torch.Tensor.random_()`, when `from=min_value(int64_t)` and to=None
4042
*/
4143
template <typename T, typename V>
42-
C10_HOST_DEVICE inline T uniform_int_full_range_transformation(V val) {
44+
C10_HOST_DEVICE inline T uniform_int_full_range(V val) {
4345
return static_cast<T>(static_cast<int64_t>(val));
4446
}
4547

4648
/**
4749
* A transformation function for `torch.Tensor.random_()`, when used without specifying `from` and `to`.
4850
*/
4951
template <typename T, typename V>
50-
C10_HOST_DEVICE inline T uniform_int_transformation(V val) {
52+
C10_HOST_DEVICE inline T uniform_int(V val) {
5153
if (std::is_same<T, bool>::value) {
5254
return static_cast<bool>(val & 1);
5355
} else if (std::is_same<T, double>::value) {
@@ -65,11 +67,11 @@ C10_HOST_DEVICE inline T uniform_int_transformation(V val) {
6567
}
6668

6769
template <typename T, typename V>
68-
C10_HOST_DEVICE inline dist_acctype<T> uniform_real_transformation(V val, T from, T to) {
70+
C10_HOST_DEVICE inline dist_acctype<T> uniform_real(V val, T from, T to) {
6971
constexpr auto MASK = static_cast<V>((static_cast<uint64_t>(1) << std::numeric_limits<T>::digits) - 1);
7072
constexpr auto DIVISOR = static_cast<dist_acctype<T>>(1) / (static_cast<uint64_t>(1) << std::numeric_limits<T>::digits);
7173
dist_acctype<T> x = (val & MASK) * DIVISOR;
7274
return (x * (to - from) + from);
7375
}
7476

75-
} // namespace at
77+
}} // namespace at::transformation

aten/src/ATen/cpu/vec256/vec256_base.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ struct Vec256 {
157157
static Vec256<T> arange(T base = static_cast<T>(0), step_t step = static_cast<step_t>(1)) {
158158
Vec256 vec;
159159
for (int64_t i = 0; i < size(); i++) {
160-
vec.values[i] = base + i * step;
160+
vec.values[i] = base + static_cast<step_t>(i) * step;
161161
}
162162
return vec;
163163
}

aten/src/ATen/function_wrapper.py

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ def TypedDict(name, attrs, total=True): # type: ignore
4444
default:
4545
AT_ERROR("${api_name} not supported on ${Type} for ", dispatch_scalar_type);
4646
}
47+
${switch_epilogue}
4748
""")
4849

4950
LEGACY_TH_DEFINITION_CASE = CodeTemplate("""\
@@ -1414,19 +1415,7 @@ def emit_body(env, option, scalar_type_cases):
14141415

14151416
if ret['kind'] == 'arguments':
14161417
case_body.extend([call + ';' for call in calls])
1417-
arguments_indices = ret['arguments']
1418-
arguments = [option['arguments'][argi]
1419-
for argi in arguments_indices]
1420-
if len(arguments_indices) == 1:
1421-
arg = arguments[0]
1422-
case_body.append("return {};".format(arg['name']))
1423-
else:
1424-
types = [to_return_type(arg, option)['type']
1425-
for arg in arguments]
1426-
# TODO: check for move semantics...
1427-
names = [arg['name'] for arg in arguments]
1428-
case_body.append(CodeTemplate("return std::tuple<${types}>(${names});").substitute(
1429-
types=types, names=names))
1418+
# return handled later
14301419
elif ret['kind'] == 'type':
14311420
assert len(calls) == 1
14321421
call = calls[0]
@@ -1444,7 +1433,24 @@ def emit_body(env, option, scalar_type_cases):
14441433
raise Exception("NYI - return handling")
14451434

14461435
cases.append(LEGACY_TH_DEFINITION_CASE.substitute(case_env, case_body=case_body))
1447-
body.append(LEGACY_TH_DEFINITION_SWITCH_STATEMENT.substitute(env, cases=cases, switch_prologue=switch_prologue))
1436+
switch_epilogue = ''
1437+
if ret['kind'] == 'arguments':
1438+
arguments_indices = ret['arguments']
1439+
arguments = [option['arguments'][argi]
1440+
for argi in arguments_indices]
1441+
if len(arguments_indices) == 1:
1442+
arg = arguments[0]
1443+
switch_epilogue = "return {};".format(arg['name'])
1444+
else:
1445+
types = [to_return_type(arg, option)['type']
1446+
for arg in arguments]
1447+
# TODO: check for move semantics...
1448+
names = [arg['name'] for arg in arguments]
1449+
switch_epilogue = CodeTemplate("return std::tuple<${types}>(${names});").substitute(
1450+
types=types, names=names)
1451+
body.append(LEGACY_TH_DEFINITION_SWITCH_STATEMENT.substitute(env, cases=cases,
1452+
switch_prologue=switch_prologue,
1453+
switch_epilogue=switch_epilogue))
14481454
return body
14491455

14501456
def process_legacy_th_option(option):

aten/src/ATen/native/DistributionTemplates.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,8 @@ at::Tensor& random_from_to_impl(at::Tensor& self, int64_t from, c10::optional<in
124124
int64_t to_inc = 0;
125125
if (isFloatingType(iter.dtype())) {
126126
AT_DISPATCH_FLOATING_TYPES_AND2(at::ScalarType::Half, at::ScalarType::BFloat16, self.scalar_type(), "random_from_to_range_calc", [&] {
127-
to_inc = std::numeric_limits<scalar_t>::max() > std::numeric_limits<int64_t>::max() ? std::numeric_limits<int64_t>::max() : static_cast<int64_t>(std::numeric_limits<scalar_t>::max());
127+
constexpr int64_t scalar_t_max = static_cast<int64_t>(1) << std::numeric_limits<scalar_t>::digits;
128+
to_inc = scalar_t_max > std::numeric_limits<int64_t>::max() ? std::numeric_limits<int64_t>::max() : static_cast<int64_t>(scalar_t_max);
128129
from = update_from<scalar_t>(from);
129130
TORCH_CHECK(from < to_inc, "random_ expects 'from' casted to dtype to be less than or equal to 'to_inc' casted to dtype, but got from=", from, " > to_inc=", to_inc);
130131
});

aten/src/ATen/native/Pow.cpp

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,10 @@ Tensor& pow_out(Tensor& result, const Tensor& base, Scalar exp) {
2222
TORCH_CHECK(!(isIntegralType(base.scalar_type(), true) &&
2323
exp.isIntegral(true) && exp.toLong() < 0),
2424
"Integers to negative integer powers are not allowed.");
25-
if (exp.toDouble() == 0.0) {
25+
// Avoid runtime error when typecasting
26+
if (!exp.isComplex() && (exp.toDouble() == 0.0)) {
2627
result.resize_as_(base).fill_(1);
27-
} else if (exp.toDouble() == 1.0) {
28+
} else if (!exp.isComplex() && (exp.toDouble() == 1.0)) {
2829
result.resize_as_(base).copy_(base);
2930
} else {
3031
auto iter = TensorIterator::unary_op(result, base,
@@ -52,12 +53,28 @@ Tensor& pow_(Tensor& base, Scalar alpha) {
5253
}
5354

5455
Tensor pow(const Tensor& base, const Tensor& exp) {
55-
Tensor result = at::empty({0}, base.options());
56+
// If the exponent is complex, the result needs to be complex
57+
// we can't rely on result_type because it will break current
58+
// handling
59+
// TODO: change it to use type promotion after #37098 is merged
60+
ScalarType dtype = (exp.is_complex() ? exp.scalar_type() : base.scalar_type());
61+
Tensor result = at::empty({0}, base.options().dtype(dtype));
5662
return native::pow_out(result, base, exp);
5763
}
5864

5965
Tensor pow(const Tensor& base, Scalar exp) {
60-
Tensor result = at::empty_like(base, MemoryFormat::Preserve);
66+
// If the exponent is complex, the result needs to be complex
67+
// we can't rely on result_type because it will break current
68+
// handling for other datatypes
69+
// TODO: change it to use type promotion after #37098 is merged
70+
ScalarType dtype = (exp.isComplex() ? exp.type() : base.scalar_type());
71+
Tensor result = at::empty({0}, base.options().dtype(dtype));
72+
if (exp.isComplex()) {
73+
// The type checking logic in unary_op TensorIterator does not allow
74+
// a float tensor to output to a complex tensor, but binary ops allow it
75+
// so we create a tensor for the exponent to avoid using this iterator until its fixed
76+
return native::pow_out(result, base, c10::scalar_to_tensor(exp, base.device()));
77+
}
6178
return native::pow_out(result, base, exp);
6279
}
6380

0 commit comments

Comments
 (0)