Skip to content

Commit b34e55a

Browse files
committed
Update on "Move default or no default logic into native.argument"
Previously, it was done at a use site, but that meant other use sites don't get the right logic. Pushing it in makes sure everyone gets it. I also fixed one case of confusion where defn() was used to define a decl(). If you want to define a declaration with no defaults, say no_default().decl() which is more direct and will give us code reviewers a clue if you should have pushed this logic in. Signed-off-by: Edward Z. Yang <ezyang@fb.com> Differential Revision: [D25595407](https://our.internmc.facebook.com/intern/diff/D25595407) [ghstack-poisoned]
2 parents 37efc05 + d42a6ba commit b34e55a

503 files changed

Lines changed: 11952 additions & 4597 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.circleci/cimodel/data/dimensions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@
88
]
99

1010
ROCM_VERSIONS = [
11-
"3.9",
1211
"3.10",
12+
"4.0",
1313
]
1414

1515
ROCM_VERSION_LABELS = ["rocm" + v for v in ROCM_VERSIONS]

.circleci/config.yml

Lines changed: 104 additions & 104 deletions
Large diffs are not rendered by default.

BUILD.bazel

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -373,7 +373,6 @@ filegroup(
373373
filegroup(
374374
name = "thc_srcs_cu",
375375
srcs = [
376-
"aten/src/THC/THCBlas.cu.cc",
377376
"aten/src/THC/THCReduceApplyUtils.cu.cc",
378377
"aten/src/THC/THCSleep.cu.cc",
379378
"aten/src/THC/THCSortUtils.cu.cc",

CONTRIBUTING.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -903,6 +903,16 @@ You'll need to install an appropriately configured flake8; see
903903
[Lint as you type](https://github.com/pytorch/pytorch/wiki/Lint-as-you-type)
904904
for documentation on how to do this.
905905

906+
If you haven't set up the pre-commit hook and have already committed files and
907+
CI reports `flake8` errors, you can run the check locally in your PR branch with:
908+
909+
```bash
910+
flake8 $(git diff --name-only $(git merge-base --fork-point master))
911+
```
912+
913+
fix the code so that no errors are reported when you re-run the above check again,
914+
and then commit the fix.
915+
906916
## Building PyTorch with ASAN
907917
908918
[ASAN](https://github.com/google/sanitizers/wiki/AddressSanitizer) is very

android/gradle/android_tasks.gradle

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
21
import java.nio.file.Files
32
import java.nio.file.Paths
43
import java.io.FileOutputStream

android/pytorch_android/host/build.gradle

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,4 +38,3 @@ dependencies {
3838
}
3939

4040
apply from: rootProject.file('gradle/release.gradle')
41-

android/settings.gradle

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,3 @@ project(':pytorch_android_torchvision').projectDir = file('pytorch_android_torch
44

55
project(':pytorch_host').projectDir = file('pytorch_android/host')
66
project(':test_app').projectDir = file('test_app/app')
7-

aten/src/ATen/LegacyTHFunctionsCUDA.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,6 @@ Tensor & _thnn_log_sigmoid_backward_out(Tensor & grad_input, const Tensor & grad
7575
Tensor _thnn_log_sigmoid_backward(const Tensor & grad_output, const Tensor & self, const Tensor & buffer);
7676
Tensor & _thnn_rrelu_with_noise_forward_out(Tensor & output, const Tensor & self, const Tensor & noise, Scalar lower, Scalar upper, bool training, c10::optional<at::Generator> generator);
7777
Tensor _thnn_rrelu_with_noise_forward(const Tensor & self, const Tensor & noise, Scalar lower, Scalar upper, bool training, c10::optional<at::Generator> generator);
78-
Tensor & _thnn_rrelu_with_noise_backward_out(Tensor & grad_input, const Tensor & grad_output, const Tensor & self, const Tensor & noise, Scalar lower, Scalar upper, bool training);
7978
Tensor _thnn_rrelu_with_noise_backward(const Tensor & grad_output, const Tensor & self, const Tensor & noise, Scalar lower, Scalar upper, bool training);
8079
Tensor & _thnn_rrelu_with_noise_forward_(Tensor & self, const Tensor & noise, Scalar lower, Scalar upper, bool training, c10::optional<at::Generator> generator);
8180
std::tuple<Tensor &,Tensor &,Tensor &> _thnn_conv2d_forward_out(Tensor & output, Tensor & columns, Tensor & ones, const Tensor & self, const Tensor & weight, IntArrayRef kernel_size, const Tensor & bias, IntArrayRef stride, IntArrayRef padding);

aten/src/ATen/MemoryOverlap.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,9 @@ MemOverlapStatus get_overlap_status(TensorImpl* a, TensorImpl* b) {
4848
if (!a->is_contiguous() || !b->is_contiguous()) {
4949
return MemOverlapStatus::TOO_HARD;
5050
}
51+
if (!a->has_storage() || !b->has_storage()) {
52+
return MemOverlapStatus::NO;
53+
}
5154
if (a->storage().data() == b->storage().data()) {
5255
const auto a_begin = static_cast<char*>(a->data());
5356
const auto a_end = a_begin + a->numel() * a->itemsize();
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
#include <ATen/SparseTensorUtils.h>
2+
3+
#include <ATen/ATen.h>
4+
#include <ATen/SparseTensorImpl.h>
5+
#include <ATen/Parallel.h>
6+
7+
namespace at { namespace sparse {
8+
9+
// NOTE [ Flatten Sparse Indices ]
10+
// This helper function flattens a sparse indices tensor (a Tensor) into a 1D
11+
// indices tensor. E.g.,
12+
// input = [[2, 4, 0],
13+
// [3, 1, 10]]
14+
// full_size = [2, 12]
15+
// output = [ 2 * 12 + 3, 4 * 12 + 1, 0 * 12 + 10 ] = [27, 49, 10]
16+
//
17+
// In other words, assuming that each `indices[i, :]` is a valid index to a
18+
// tensor `t` of shape `full_size`. This returns the corresponding indices to
19+
// the flattened tensor `t.reshape( prod(full_size[:indices.size(0)]), -1 )`.
20+
// if forceClone is true, the result will forced to be a clone of self.
21+
// if force_clone is true, the result will forced to be a clone of self.
22+
Tensor flatten_indices(const Tensor& indices, IntArrayRef full_size, bool force_clone /*= false*/) {
23+
int64_t sparse_dim = indices.size(0);
24+
if (sparse_dim == 1) {
25+
if (force_clone) {
26+
return indices.squeeze(0).clone(at::MemoryFormat::Contiguous);
27+
} else {
28+
return indices.squeeze(0);
29+
}
30+
} else {
31+
std::vector<int64_t> indices_mult_cpu_vec;
32+
indices_mult_cpu_vec.reserve(sparse_dim);
33+
int64_t mult = 1;
34+
for (int64_t i = sparse_dim - 1; i >= 0; i--) {
35+
indices_mult_cpu_vec[i] = mult;
36+
mult *= full_size[i];
37+
}
38+
auto indices_mult_cpu = at::from_blob(
39+
indices_mult_cpu_vec.data(),
40+
/*size=*/{sparse_dim, 1},
41+
indices.options().device(kCPU));
42+
// NB: must be blocking because this blob may be freed after this closure,
43+
// and non_blocking copy will see garbage.
44+
auto indices_mult = indices_mult_cpu.to(indices.device(), /*non_blocking=*/false);
45+
// Ideally we want matmul but matmul is slow on CPU Long and not implemented
46+
// on CUDA Long. So mul is faster.
47+
return indices.mul(indices_mult).sum(0);
48+
}
49+
}
50+
51+
// Flatten sparse tensor's indices from nD to 1D, similar to NOTE [ Flatten Sparse Indices ],
52+
// except this one allows partial flatten: only flatten on specified dims. Note that
53+
// the flatten indices might be uncoalesced if dims_to_flatten.size() < sparse_dim.
54+
// Also if input indices is already coalesced, the flattened indices will also be sorted.
55+
//
56+
// args:
57+
// indices: sparse tensor indices
58+
// sizes: sparse tensor sizes
59+
// dims_to_flatten: a list of dim index to flatten
60+
//
61+
// Ex1:
62+
// indices = [[2, 4, 0],
63+
// [3, 1, 3]]
64+
// sizes = [2, 12]
65+
// dims_to_flatten = [0, 1]
66+
// new_indices = [ 2 * 12 + 3, 4 * 12 + 1, 0 * 12 + 3 ] = [27, 49, 3]
67+
//
68+
// Ex2:
69+
// dims_to_flatten = [1]
70+
// new_indices = [ 3, 1, 3 ] # uncoalesced
71+
Tensor flatten_indices_by_dims(const Tensor& indices, const IntArrayRef& sizes, const IntArrayRef& dims_to_flatten){
72+
Tensor new_indices = at::zeros({indices.size(1)}, indices.options());
73+
for (auto d : dims_to_flatten) {
74+
new_indices.mul_(sizes[d]);
75+
new_indices.add_(indices.select(0, d));
76+
}
77+
return new_indices;
78+
}
79+
80+
Tensor coo_to_csr(const int64_t* indices, int64_t dim, int64_t nnz) {
81+
/*
82+
Find the CSR representation for a row `indices` from the COO format
83+
Inputs:
84+
`indices` is the row pointer from COO indices
85+
`dim` is the row dimensionality
86+
`nnz` is the number of non-zeros
87+
88+
Output:
89+
`csr` is a compressed row array in a CSR format
90+
*/
91+
Tensor csr = at::zeros({dim + 1}, kLong);
92+
93+
// TODO: eliminate this conditional when zero-size dims supported correctly
94+
if (nnz > 0) {
95+
auto csr_accessor = csr.accessor<int64_t, 1>();
96+
// Convert the sparse matrix to CSR format
97+
at::parallel_for(0, nnz, 10000, [&](int64_t start, int64_t end) {
98+
int64_t h, hp0, hp1;
99+
for (auto i = start; i < end; i++) {
100+
hp0 = indices[i];
101+
hp1 = (i+1 == nnz) ? dim : indices[i+1];
102+
if (hp0 != hp1) {
103+
for (h = hp0; h < hp1; h++) {
104+
csr_accessor[h+1] = i+1;
105+
}
106+
}
107+
}
108+
});
109+
}
110+
return csr;
111+
}
112+
113+
}} // namespace at::sparse

0 commit comments

Comments
 (0)