Conversation
7e0530e to
4e6db58
Compare
|
@pytorchbot retest this please |
1 similar comment
|
@pytorchbot retest this please |
d5afde0 to
767ac6f
Compare
facebook-github-bot
left a comment
There was a problem hiding this comment.
SsnL has imported this pull request. If you are a Facebook employee, you can view this diff on Phabricator.
facebook-github-bot
left a comment
There was a problem hiding this comment.
SsnL has imported this pull request. If you are a Facebook employee, you can view this diff on Phabricator.
| C = random.randint(1, 8) | ||
| IH = random.randint(1, 8) | ||
| IW = random.randint(1, 8) | ||
| N = random.randint(2, 8) |
This comment was marked as off-topic.
This comment was marked as off-topic.
Sorry, something went wrong.
| // (c, iy_nw, ix_nw) * nw + (c, iy_ne, ix_ne) * ne | ||
| // + (c, iy_sw, ix_sw) * sw + (c, iy_se, ix_se) * se | ||
| *out_ptr_NCHW = static_cast<scalar_t>(0); | ||
| if (padding_mode != GridSamplerPadding::Zeros || within_bounds_2d(iy_nw, ix_nw, inp_H, inp_W)) { |
This comment was marked as off-topic.
This comment was marked as off-topic.
Sorry, something went wrong.
| CUDA_KERNEL_LOOP(index, nthreads) { | ||
| const int w = index % out_W; | ||
| const int h = (index / out_W) % out_H; | ||
| const int n = index / (out_H * out_W); |
This comment was marked as off-topic.
This comment was marked as off-topic.
Sorry, something went wrong.
This comment was marked as off-topic.
This comment was marked as off-topic.
Sorry, something went wrong.
This comment was marked as off-topic.
This comment was marked as off-topic.
Sorry, something went wrong.
This comment was marked as off-topic.
This comment was marked as off-topic.
Sorry, something went wrong.
| giy = giy * (inp_H - 1) / 2; | ||
|
|
||
| // assuming grad_grid is contiguous | ||
| gGrid_ptr_NHW[0] = gix; |
This comment was marked as off-topic.
This comment was marked as off-topic.
Sorry, something went wrong.
| GridSamplerInterpolation interpolation_mode, | ||
| GridSamplerPadding padding_mode) { | ||
| auto grad_input = at::zeros_like(input); | ||
| auto grad_grid = at::empty_like(grid); |
This comment was marked as off-topic.
This comment was marked as off-topic.
Sorry, something went wrong.
|
|
||
| namespace { | ||
| static inline int64_t clip_coordinates(int64_t in, int64_t clip_limit) { | ||
| return std::min(clip_limit - 1, std::max(in, static_cast<int64_t>(0))); |
This comment was marked as off-topic.
This comment was marked as off-topic.
Sorry, something went wrong.
| int64_t sH, int64_t sW, int64_t H, int64_t W, | ||
| scalar_t delta) { | ||
| if (h >= 0 && h < H && w >= 0 && w < W) { | ||
| data[h * sH + w * sW] += delta; |
This comment was marked as off-topic.
This comment was marked as off-topic.
Sorry, something went wrong.
This comment was marked as off-topic.
This comment was marked as off-topic.
Sorry, something went wrong.
This comment was marked as off-topic.
This comment was marked as off-topic.
Sorry, something went wrong.
This comment was marked as off-topic.
This comment was marked as off-topic.
Sorry, something went wrong.
| GridSamplerPadding padding_mode) { | ||
| int64_t N = input.size(0); | ||
| int64_t C = input.size(1); | ||
| int64_t inp_H = input.size(2); |
This comment was marked as off-topic.
This comment was marked as off-topic.
Sorry, something went wrong.
This comment was marked as off-topic.
This comment was marked as off-topic.
Sorry, something went wrong.
This comment was marked as off-topic.
This comment was marked as off-topic.
Sorry, something went wrong.
| for (int64_t h = 0; h < out_H; ++h) { | ||
| for (int64_t w = 0; w < out_W; ++w) { | ||
| // get the corresponding input x, y, z co-ordinates from grid | ||
| scalar_t *grid_ptr_NDHW = grid_ptr_N + d * grid_sD + h * grid_sH + w * grid_sW; |
This comment was marked as off-topic.
This comment was marked as off-topic.
Sorry, something went wrong.
| namespace { | ||
| static __forceinline__ __device__ | ||
| int clip_coordinates(int in, int clip_limit) { | ||
| return ::min(clip_limit - 1, ::max(in, static_cast<int>(0))); |
This comment was marked as off-topic.
This comment was marked as off-topic.
Sorry, something went wrong.
| void safe_add_2d(scalar_t *data, int h, int w, | ||
| int sH, int sW, int H, int W, | ||
| scalar_t delta) { | ||
| if (h >= 0 && h < H && w >= 0 && w < W) { |
This comment was marked as off-topic.
This comment was marked as off-topic.
Sorry, something went wrong.
This comment was marked as off-topic.
This comment was marked as off-topic.
Sorry, something went wrong.
| TensorInfo<scalar_t, int> grad_grid, // initialized to empty | ||
| const GridSamplerPadding padding_mode) { | ||
|
|
||
| int C = input.sizes[1]; |
This comment was marked as off-topic.
This comment was marked as off-topic.
Sorry, something went wrong.
| scalar_t se = (ix - ix_nw) * (iy - iy_nw); | ||
|
|
||
| // calculate bilinear weighted pixel value and set output pixel | ||
| if (padding_mode == GridSamplerPadding::Border) { |
This comment was marked as off-topic.
This comment was marked as off-topic.
Sorry, something went wrong.
This comment was marked as off-topic.
This comment was marked as off-topic.
Sorry, something went wrong.
This comment was marked as off-topic.
This comment was marked as off-topic.
Sorry, something went wrong.
| gix = gix * (inp_W - 1.f) / 2; | ||
| giy = giy * (inp_H - 1.f) / 2; | ||
|
|
||
| // assuming grad_grid is contiguous |
This comment was marked as off-topic.
This comment was marked as off-topic.
Sorry, something went wrong.
This comment was marked as off-topic.
This comment was marked as off-topic.
Sorry, something went wrong.
This comment was marked as off-topic.
This comment was marked as off-topic.
Sorry, something went wrong.
| test_cpu_against_cuda(N, C, H, W, padding_mode) | ||
|
|
||
| # test channels >1024, which doesn't work on cudnn 7102 and further | ||
| N, C, H, W = 1, 1025, 3, 3 |
This comment was marked as off-topic.
This comment was marked as off-topic.
Sorry, something went wrong.
This comment was marked as off-topic.
This comment was marked as off-topic.
Sorry, something went wrong.
ezyang
left a comment
There was a problem hiding this comment.
Please fix the tabs v. spaces
|
It could be interesting to consider adding the timings to our benchmark library. I'm happy to review or advise etc. |
|
@cpuhrsch Good idea. Will try to code up a spatial transformer for benchmark. :) |
facebook-github-bot
left a comment
There was a problem hiding this comment.
SsnL has imported this pull request. If you are a Facebook employee, you can view this diff on Phabricator.
|
@ezyang does this look better? :) |
Summary: Spatial version benchmark | | CPUFloat THNN | CPUFloat ATen | CPUDouble THNN | CPUDouble ATen | CUDAHalf THNN | CUDAHalf ATen | CUDAFloat THNN | CUDAFloat ATen | CUDADouble THNN | CUDADouble ATen | |---------------------------|---------------|---------------|----------------|----------------|---------------|---------------|----------------|----------------|-----------------|-----------------| | [1024x1x28x28] zero pad | 2.19281888s | 0.21280479s | 2.52922535s | 0.23944831s | 0.17494774s | 0.06242800s | 0.31270599s | 0.03706479s | 0.40542483s | 0.07391024s | | [1024x1x28x28] border pad | 3.04329610s | 0.24705672s | 2.29205394s | 0.22336411s | 0.17980361s | 0.06212497s | 0.31415701s | 0.03847790s | 0.43020391s | 0.07540464s | | [32x3x244x244] zero pad | 18.29301333s | 2.18566656s | 19.01662397s | 3.51552224s | 1.72487235s | 0.28933954s | 2.02466702s | 0.18178749s | 2.63671613s | 0.41391206s | | [32x3x244x244] border pad | 18.72205329s | 2.02600884s | 20.13017297s | 3.25979590s | 1.96455693s | 0.33070564s | 2.18666625s | 0.19546938s | 2.91268897s | 0.38465047s | For #9702 basics: + grid tensors have dimensions `[N, H, W, 2]` (or `[N, D, H, W, 3]` for 3d). + input/output tensors have dimensions `[N, C, H, W]` (or `[N, C, D, H ,W]` for 3d) + grid sampler maps `input([N, C, inp_H, inp_W]), grid([N, H, W, 2])` to `output([N, C, H, W])` (3d case is similar). variable naming: + `tensor_sH` means the stride of `tensor` at the dimension of `H`. + `tensor_ptr_NCH` is a data pointer that always points to the beginning of the `tensor[n][c][h]` slice in the loop. Pull Request resolved: pytorch/pytorch#9961 Differential Revision: D9057175 Pulled By: SsnL fbshipit-source-id: 9ed8f1dc376ed10229f047fdcf3c90dbd250bee6
Summary: Spatial version benchmark | | CPUFloat THNN | CPUFloat ATen | CPUDouble THNN | CPUDouble ATen | CUDAHalf THNN | CUDAHalf ATen | CUDAFloat THNN | CUDAFloat ATen | CUDADouble THNN | CUDADouble ATen | |---------------------------|---------------|---------------|----------------|----------------|---------------|---------------|----------------|----------------|-----------------|-----------------| | [1024x1x28x28] zero pad | 2.19281888s | 0.21280479s | 2.52922535s | 0.23944831s | 0.17494774s | 0.06242800s | 0.31270599s | 0.03706479s | 0.40542483s | 0.07391024s | | [1024x1x28x28] border pad | 3.04329610s | 0.24705672s | 2.29205394s | 0.22336411s | 0.17980361s | 0.06212497s | 0.31415701s | 0.03847790s | 0.43020391s | 0.07540464s | | [32x3x244x244] zero pad | 18.29301333s | 2.18566656s | 19.01662397s | 3.51552224s | 1.72487235s | 0.28933954s | 2.02466702s | 0.18178749s | 2.63671613s | 0.41391206s | | [32x3x244x244] border pad | 18.72205329s | 2.02600884s | 20.13017297s | 3.25979590s | 1.96455693s | 0.33070564s | 2.18666625s | 0.19546938s | 2.91268897s | 0.38465047s | For pytorch#9702 basics: + grid tensors have dimensions `[N, H, W, 2]` (or `[N, D, H, W, 3]` for 3d). + input/output tensors have dimensions `[N, C, H, W]` (or `[N, C, D, H ,W]` for 3d) + grid sampler maps `input([N, C, inp_H, inp_W]), grid([N, H, W, 2])` to `output([N, C, H, W])` (3d case is similar). variable naming: + `tensor_sH` means the stride of `tensor` at the dimension of `H`. + `tensor_ptr_NCH` is a data pointer that always points to the beginning of the `tensor[n][c][h]` slice in the loop. Pull Request resolved: pytorch#9961 Differential Revision: D9057175 Pulled By: SsnL fbshipit-source-id: 9ed8f1dc376ed10229f047fdcf3c90dbd250bee6
Spatial version benchmark
For #9702
basics:
[N, H, W, 2](or[N, D, H, W, 3]for 3d).[N, C, H, W](or[N, C, D, H ,W]for 3d)input([N, C, inp_H, inp_W]), grid([N, H, W, 2])tooutput([N, C, H, W])(3d case is similar).variable naming:
tensor_sHmeans the stride oftensorat the dimension ofH.tensor_ptr_NCHis a data pointer that always points to the beginning of thetensor[n][c][h]slice in the loop.