Skip to content

Commit 69bfabb

Browse files
committed
Update on "[wip] Refactor foreach unary ops tests to use OpInfo"
Differential Revision: [D25673712](https://our.internmc.facebook.com/intern/diff/D25673712) ----- - Updated foreach unary ops tests to use OpInfo [ghstack-poisoned]
2 parents b938132 + 4d48a34 commit 69bfabb

33 files changed

Lines changed: 761 additions & 238 deletions

.github/workflows/lint.yml

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -218,17 +218,14 @@ jobs:
218218
runs-on: ubuntu-18.04
219219
steps:
220220
- name: Setup Python
221-
uses: actions/setup-python@v1
221+
uses: actions/setup-python@v2
222222
with:
223223
python-version: 3.8
224224
architecture: x64
225225
- name: Fetch PyTorch
226226
uses: actions/checkout@v2
227227
with:
228228
ref: ${{ github.event.pull_request.head.sha }}
229-
- name: Get HEAD commit SHA
230-
run: echo ::set-output name=commit-sha::$(git rev-parse HEAD)
231-
id: get-commit-sha
232229
- name: Install dependencies
233230
run: |
234231
set -eux

.github/workflows/test_tools.yml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,12 @@ jobs:
1919
uses: actions/checkout@v2
2020
with:
2121
ref: ${{ github.event.pull_request.head.sha }}
22+
fetch-depth: 0 # deep clone, to allow us to use git log
2223
- name: Install dependencies
23-
run: pip install -r requirements.txt
24+
# boto3 version copied from .circleci/docker/common/install_conda.sh
25+
run: |
26+
set -eux
27+
pip install -r requirements.txt
28+
pip install boto3==1.16.34
2429
- name: Run tests
2530
run: python -m unittest discover -vs tools/test -p 'test_*.py'

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ coverage.xml
1414
.gradle
1515
.hypothesis
1616
.mypy_cache
17-
.pytorch-test-times
17+
**/.pytorch-test-times
1818
*/*.pyc
1919
*/*.so*
2020
*/**/__pycache__

CMakeLists.txt

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,19 @@ option(HAVE_SOVERSION "Whether to add SOVERSION to the shared objects" OFF)
295295
cmake_dependent_option(
296296
USE_DEPLOY "Build embedded torch::deploy interpreter" OFF
297297
"BUILD_PYTHON" OFF)
298+
cmake_dependent_option(USE_CCACHE "Attempt using CCache to wrap the compilation" ON "UNIX" OFF)
299+
300+
if(USE_CCACHE)
301+
find_program(CCACHE_PROGRAM ccache)
302+
if(CCACHE_PROGRAM)
303+
set(CMAKE_C_COMPILER_LAUNCHER "${CCACHE_PROGRAM}")
304+
set(CMAKE_CXX_COMPILER_LAUNCHER "${CCACHE_PROGRAM}")
305+
set(CMAKE_CUDA_COMPILER_LAUNCHER "${CCACHE_PROGRAM}")
306+
else()
307+
message(STATUS "Could not find ccache. Consider installing ccache to speed up compilation.")
308+
endif()
309+
endif()
310+
298311
# Since TensorPipe does not support Windows, set it to OFF when WIN32 detected
299312
# On Windows platform, if user does not install libuv in build conda env and
300313
# does not set libuv_ROOT environment variable. Set USE_DISTRIBUTED to OFF.

aten/src/ATen/core/aten_interned_strings.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,8 @@ _(aten, cudnn_convolution_transpose_backward) \
276276
_(aten, cudnn_convolution_transpose_backward_bias) \
277277
_(aten, cudnn_convolution_transpose_backward_input) \
278278
_(aten, cudnn_convolution_transpose_backward_weight) \
279+
_(aten, cudnn_convolution_relu) \
280+
_(aten, cudnn_convolution_add_relu) \
279281
_(aten, cudnn_grid_sampler) \
280282
_(aten, cudnn_grid_sampler_backward) \
281283
_(aten, cudnn_is_acceptable) \

aten/src/ATen/cudnn/Descriptors.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,23 @@ struct TORCH_CUDA_CPP_API CTCLossDescriptor
301301
#endif
302302
};
303303

304+
struct TORCH_CUDA_CPP_API ActivationDescriptor
305+
: public Descriptor<
306+
cudnnActivationStruct,
307+
&cudnnCreateActivationDescriptor,
308+
&cudnnDestroyActivationDescriptor> {
309+
void set(cudnnActivationMode_t mode) {
310+
AT_ASSERT(
311+
mode == CUDNN_ACTIVATION_RELU,
312+
"TODO: support more cuDNN activation modes");
313+
AT_CUDNN_CHECK(cudnnSetActivationDescriptor(
314+
mut_desc(),
315+
mode,
316+
cudnnNanPropagation_t::CUDNN_NOT_PROPAGATE_NAN,
317+
std::numeric_limits<double>::max()));
318+
}
319+
};
320+
304321
union Constant
305322
{
306323
float f;

aten/src/ATen/native/BatchLinearAlgebra.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2461,7 +2461,7 @@ struct LapackLstsqHelper {
24612461
}
24622462
self_type& set_ldb(int ldb) { this->ldb = ldb; return *this; }
24632463
self_type& set_work() {
2464-
lwork = static_cast<int>(real_impl<scalar_t, value_t>(work_opt));
2464+
lwork = std::max<int>(1, real_impl<scalar_t, value_t>(work_opt));
24652465
work = at::empty({lwork}, scalar_type);
24662466
work_ptr = work.data_ptr<scalar_t>();
24672467
return *this;
@@ -2507,7 +2507,7 @@ struct LapackLstsqHelper {
25072507
break;
25082508
// case LapackLstsqDriverType::Gelsd:
25092509
default:
2510-
rwork_len = static_cast<int64_t>(rwork_opt);
2510+
rwork_len = std::max<int64_t>(1, rwork_opt);
25112511
}
25122512
rwork = at::empty({rwork_len}, c10::toValueType(scalar_type));
25132513
rwork_ptr = rwork.data_ptr<value_t>();
@@ -2530,7 +2530,7 @@ struct LapackLstsqHelper {
25302530
self_type& set_iwork() {
25312531
// handle `iwork` workspace array (relevant only for `?gelsd`)
25322532
if (LapackLstsqDriverType::Gelsd == driver_type) {
2533-
iwork = at::empty({iwork_opt}, at::kInt);
2533+
iwork = at::empty({std::max<int>(1, iwork_opt)}, at::kInt);
25342534
iwork_ptr = iwork.data_ptr<int>();
25352535
}
25362536
return *this;

aten/src/ATen/native/cuda/BatchLinearAlgebra.cu

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2088,15 +2088,15 @@ AT_ERROR("symeig: MAGMA library not found in "
20882088

20892089
scalar_t* work;
20902090
magma_int_t* iwork;
2091-
lwork = magma_int_cast(real_impl<scalar_t, value_t>(wkopt), "work_size");
2092-
liwork = magma_int_cast(iwkopt, "iwork_size");
2091+
lwork = magma_int_cast(std::max<int64_t>(1, real_impl<scalar_t, value_t>(wkopt)), "work_size");
2092+
liwork = magma_int_cast(std::max<int64_t>(1, iwkopt), "iwork_size");
20932093
ALLOCATE_ARRAY(work, scalar_t, lwork);
20942094
ALLOCATE_ARRAY(iwork, magma_int_t, liwork);
20952095

20962096
value_t* rwork = nullptr;
20972097
c10::Storage storage_rwork;
20982098
if (isComplexType(at::typeMetaToScalarType(self.dtype()))) {
2099-
lrwork = magma_int_cast(rwkopt, "rwork_size");
2099+
lrwork = magma_int_cast(std::max<int64_t>(1, rwkopt), "rwork_size");
21002100
storage_rwork = pin_memory<value_t>(lrwork);
21012101
rwork = static_cast<value_t*>(storage_rwork.data());
21022102
}
@@ -2288,9 +2288,9 @@ AT_ERROR("svd: MAGMA library not found in "
22882288
value_t* rwork = nullptr;
22892289

22902290
magma_int_t* iwork;
2291-
ALLOCATE_ARRAY(iwork, magma_int_t, 8 * mn);
2291+
ALLOCATE_ARRAY(iwork, magma_int_t, std::max<magma_int_t>(1, 8 * mn));
22922292
if (isComplexType(at::typeMetaToScalarType(self.dtype()))) {
2293-
auto lrwork = computeLRWorkDim(jobchar, m, n);
2293+
auto lrwork = std::max<int64_t>(1, computeLRWorkDim(jobchar, m, n));
22942294
storage_rwork = pin_memory<value_t>(lrwork);
22952295
rwork = static_cast<value_t*>(storage_rwork.data());
22962296
}
@@ -2303,7 +2303,7 @@ AT_ERROR("svd: MAGMA library not found in "
23032303
magma_int_t lwork = -1;
23042304
scalar_t wkopt;
23052305
magmaSvd<scalar_t, value_t>(jobz, m, n, self_data, lda, S_data, U_data, lda, VT_data, ldvt, &wkopt, lwork, rwork, iwork, &info);
2306-
lwork = magma_int_cast(real_impl<scalar_t, value_t>(wkopt), "work_size");
2306+
lwork = magma_int_cast(std::max<int64_t>(1, real_impl<scalar_t, value_t>(wkopt)), "work_size");
23072307
scalar_t* work;
23082308
ALLOCATE_ARRAY(work, scalar_t, lwork);
23092309

@@ -2475,9 +2475,9 @@ Tensor _lu_solve_helper_cuda(const Tensor& self, const Tensor& LU_data, const Te
24752475
TORCH_CHECK(info == 0, "MAGMA lu_solve : invalid argument: ", -info);
24762476
return self_working_copy;
24772477
}
2478-
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
24792478

24802479
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lstsq ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2480+
24812481
std::tuple<Tensor, Tensor, Tensor> _lstsq_helper_cuda(
24822482
const Tensor& a, const Tensor& b, double cond, c10::optional<std::string> driver_name) {
24832483
#ifndef USE_MAGMA
@@ -2492,8 +2492,8 @@ AT_ERROR("torch.linalg.lstsq: MAGMA library not found in "
24922492
auto ldda = std::max<magma_int_t>(1, m);
24932493
auto lddb = std::max<magma_int_t>(1, std::max(m, n));
24942494
auto nb = magmaGeqrfOptimalBlocksize<scalar_t>(m, n);
2495-
auto lwork = (m - n + nb) * (nrhs + nb) + nrhs * nb;
2496-
Tensor hwork = at::empty({static_cast<int64_t>(lwork)}, a.scalar_type());
2495+
magma_int_t lwork = magma_int_cast(std::max<int64_t>(1, (m - n + nb) * (nrhs + nb) + nrhs * nb), "work_size");
2496+
Tensor hwork = at::empty({lwork}, a.scalar_type());
24972497
auto* hwork_ptr = hwork.data_ptr<scalar_t>();
24982498
magma_int_t info;
24992499

@@ -2512,7 +2512,6 @@ AT_ERROR("torch.linalg.lstsq: MAGMA library not found in "
25122512
return std::make_tuple(b, rank, singular_values);
25132513
#endif
25142514
}
2515-
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
25162515

25172516
}} // namespace at::native
25182517

aten/src/ATen/native/cudnn/ConvPlaceholders.cpp

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
#include <ATen/cuda/CUDAConfig.h> // for the definition of AT_CUDNN_ENABLED
21
#include <ATen/ATen.h>
2+
#include <ATen/cuda/CUDAConfig.h> // for the definition of AT_CUDNN_ENABLED
33
#include <ATen/native/ConvUtils.h>
44

55
namespace at { namespace native {
@@ -93,6 +93,30 @@ void raw_cudnn_convolution_backward_weight_out(
9393
AT_ERROR("raw_cudnn_convolution_backward_weight_out: ATen not compiled with cuDNN support");
9494
}
9595

96+
Tensor cudnn_convolution_relu(
97+
const Tensor& input_t,
98+
const Tensor& weight_t,
99+
const c10::optional<Tensor>& bias_t,
100+
IntArrayRef stride,
101+
IntArrayRef padding,
102+
IntArrayRef dilation,
103+
int64_t groups) {
104+
AT_ERROR("cudnn_convolution_relu: ATen not compiled with cuDNN support");
105+
}
106+
107+
Tensor cudnn_convolution_add_relu(
108+
const Tensor& input_t,
109+
const Tensor& weight_t,
110+
const Tensor& z_t,
111+
const c10::optional<Scalar>& alpha,
112+
const c10::optional<Tensor>& bias_t,
113+
IntArrayRef stride,
114+
IntArrayRef padding,
115+
IntArrayRef dilation,
116+
int64_t groups) {
117+
AT_ERROR("cudnn_convolution_add_relu: ATen not compiled with cuDNN support");
118+
}
119+
96120
#endif // AT_CUDNN_ENABLED
97121

98122
// ---------------------------------------------------------------------

aten/src/ATen/native/cudnn/ConvShared.cpp

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -491,6 +491,89 @@ Tensor cudnn_convolution_transpose_backward_weight(
491491
padding, stride, dilation, groups, benchmark, deterministic, allow_tf32);
492492
}
493493

494+
Tensor cudnn_convolution_relu(
495+
const Tensor& input_t,
496+
const Tensor& weight_t,
497+
const c10::optional<Tensor>& bias_t,
498+
IntArrayRef stride,
499+
IntArrayRef padding,
500+
IntArrayRef dilation,
501+
int64_t groups) {
502+
// FuseFrozenConvAddRelu performs some tensor shape checking
503+
auto output_t = at::native::empty_cuda(
504+
conv_output_size(
505+
input_t.sizes(), weight_t.sizes(), padding, stride, dilation),
506+
/*dtype=*/input_t.scalar_type(),
507+
/*layout=*/c10::nullopt,
508+
/*device=*/kCUDA,
509+
/*pin_memory=*/c10::nullopt,
510+
/*memory_format=*/at::MemoryFormat::Contiguous);
511+
if (output_t.numel() == 0) {
512+
return output_t;
513+
}
514+
515+
raw_cudnn_convolution_add_relu_out(
516+
output_t,
517+
input_t,
518+
weight_t,
519+
output_t, // use output_t as z to satisfy CUDNN API
520+
0, // alpha
521+
bias_t.has_value() ? bias_t.value()
522+
: zeros({output_t.size(1)}, output_t.options()),
523+
stride,
524+
padding,
525+
dilation,
526+
groups,
527+
false, // benchmark
528+
false, // deterministic
529+
true // allow_tf32
530+
);
531+
532+
return output_t;
533+
}
534+
535+
Tensor cudnn_convolution_add_relu(
536+
const Tensor& input_t,
537+
const Tensor& weight_t,
538+
const Tensor& z_t,
539+
const c10::optional<Scalar>& alpha,
540+
const c10::optional<Tensor>& bias_t,
541+
IntArrayRef stride,
542+
IntArrayRef padding,
543+
IntArrayRef dilation,
544+
int64_t groups) {
545+
// FuseFrozenConvAddRelu performs some tensor shape checking
546+
auto output_t = at::native::empty_cuda(
547+
conv_output_size(
548+
input_t.sizes(), weight_t.sizes(), padding, stride, dilation),
549+
/*dtype=*/input_t.scalar_type(),
550+
/*layout=*/c10::nullopt,
551+
/*device=*/kCUDA,
552+
/*pin_memory=*/c10::nullopt,
553+
/*memory_format=*/at::MemoryFormat::Contiguous);
554+
if (output_t.numel() == 0) {
555+
return output_t;
556+
}
557+
558+
raw_cudnn_convolution_add_relu_out(
559+
output_t,
560+
input_t,
561+
weight_t,
562+
z_t,
563+
alpha.has_value() ? alpha.value().to<float>() : 1.0,
564+
bias_t.has_value() ? bias_t.value()
565+
: zeros({output_t.size(1)}, output_t.options()),
566+
stride,
567+
padding,
568+
dilation,
569+
groups,
570+
false, // benchmark
571+
false, // deterministic
572+
true // allow_tf32
573+
);
574+
575+
return output_t;
576+
}
494577
}}
495578

496579
#endif // AT_CUDNN_ENABLED

0 commit comments

Comments
 (0)