|
| 1 | +#include <ATen/SparseTensorUtils.h> |
| 2 | + |
| 3 | +#include <ATen/ATen.h> |
| 4 | +#include <ATen/SparseTensorImpl.h> |
| 5 | +#include <ATen/Parallel.h> |
| 6 | + |
| 7 | +namespace at { namespace sparse { |
| 8 | + |
| 9 | +// NOTE [ Flatten Sparse Indices ] |
| 10 | +// This helper function flattens a sparse indices tensor (a Tensor) into a 1D |
| 11 | +// indices tensor. E.g., |
| 12 | +// input = [[2, 4, 0], |
| 13 | +// [3, 1, 10]] |
| 14 | +// full_size = [2, 12] |
| 15 | +// output = [ 2 * 12 + 3, 4 * 12 + 1, 0 * 12 + 10 ] = [27, 49, 10] |
| 16 | +// |
| 17 | +// In other words, assuming that each `indices[i, :]` is a valid index to a |
| 18 | +// tensor `t` of shape `full_size`. This returns the corresponding indices to |
| 19 | +// the flattened tensor `t.reshape( prod(full_size[:indices.size(0)]), -1 )`. |
| 20 | +// if forceClone is true, the result will forced to be a clone of self. |
| 21 | +// if force_clone is true, the result will forced to be a clone of self. |
| 22 | +Tensor flatten_indices(const Tensor& indices, IntArrayRef full_size, bool force_clone /*= false*/) { |
| 23 | + int64_t sparse_dim = indices.size(0); |
| 24 | + if (sparse_dim == 1) { |
| 25 | + if (force_clone) { |
| 26 | + return indices.squeeze(0).clone(at::MemoryFormat::Contiguous); |
| 27 | + } else { |
| 28 | + return indices.squeeze(0); |
| 29 | + } |
| 30 | + } else { |
| 31 | + std::vector<int64_t> indices_mult_cpu_vec; |
| 32 | + indices_mult_cpu_vec.reserve(sparse_dim); |
| 33 | + int64_t mult = 1; |
| 34 | + for (int64_t i = sparse_dim - 1; i >= 0; i--) { |
| 35 | + indices_mult_cpu_vec[i] = mult; |
| 36 | + mult *= full_size[i]; |
| 37 | + } |
| 38 | + auto indices_mult_cpu = at::from_blob( |
| 39 | + indices_mult_cpu_vec.data(), |
| 40 | + /*size=*/{sparse_dim, 1}, |
| 41 | + indices.options().device(kCPU)); |
| 42 | + // NB: must be blocking because this blob may be freed after this closure, |
| 43 | + // and non_blocking copy will see garbage. |
| 44 | + auto indices_mult = indices_mult_cpu.to(indices.device(), /*non_blocking=*/false); |
| 45 | + // Ideally we want matmul but matmul is slow on CPU Long and not implemented |
| 46 | + // on CUDA Long. So mul is faster. |
| 47 | + return indices.mul(indices_mult).sum(0); |
| 48 | + } |
| 49 | +} |
| 50 | + |
| 51 | +// Flatten sparse tensor's indices from nD to 1D, similar to NOTE [ Flatten Sparse Indices ], |
| 52 | +// except this one allows partial flatten: only flatten on specified dims. Note that |
| 53 | +// the flatten indices might be uncoalesced if dims_to_flatten.size() < sparse_dim. |
| 54 | +// Also if input indices is already coalesced, the flattened indices will also be sorted. |
| 55 | +// |
| 56 | +// args: |
| 57 | +// indices: sparse tensor indices |
| 58 | +// sizes: sparse tensor sizes |
| 59 | +// dims_to_flatten: a list of dim index to flatten |
| 60 | +// |
| 61 | +// Ex1: |
| 62 | +// indices = [[2, 4, 0], |
| 63 | +// [3, 1, 3]] |
| 64 | +// sizes = [2, 12] |
| 65 | +// dims_to_flatten = [0, 1] |
| 66 | +// new_indices = [ 2 * 12 + 3, 4 * 12 + 1, 0 * 12 + 3 ] = [27, 49, 3] |
| 67 | +// |
| 68 | +// Ex2: |
| 69 | +// dims_to_flatten = [1] |
| 70 | +// new_indices = [ 3, 1, 3 ] # uncoalesced |
| 71 | +Tensor flatten_indices_by_dims(const Tensor& indices, const IntArrayRef& sizes, const IntArrayRef& dims_to_flatten){ |
| 72 | + Tensor new_indices = at::zeros({indices.size(1)}, indices.options()); |
| 73 | + for (auto d : dims_to_flatten) { |
| 74 | + new_indices.mul_(sizes[d]); |
| 75 | + new_indices.add_(indices.select(0, d)); |
| 76 | + } |
| 77 | + return new_indices; |
| 78 | +} |
| 79 | + |
| 80 | +Tensor coo_to_csr(const int64_t* indices, int64_t dim, int64_t nnz) { |
| 81 | + /* |
| 82 | + Find the CSR representation for a row `indices` from the COO format |
| 83 | + Inputs: |
| 84 | + `indices` is the row pointer from COO indices |
| 85 | + `dim` is the row dimensionality |
| 86 | + `nnz` is the number of non-zeros |
| 87 | +
|
| 88 | + Output: |
| 89 | + `csr` is a compressed row array in a CSR format |
| 90 | + */ |
| 91 | + Tensor csr = at::zeros({dim + 1}, kLong); |
| 92 | + |
| 93 | + // TODO: eliminate this conditional when zero-size dims supported correctly |
| 94 | + if (nnz > 0) { |
| 95 | + auto csr_accessor = csr.accessor<int64_t, 1>(); |
| 96 | + // Convert the sparse matrix to CSR format |
| 97 | + at::parallel_for(0, nnz, 10000, [&](int64_t start, int64_t end) { |
| 98 | + int64_t h, hp0, hp1; |
| 99 | + for (auto i = start; i < end; i++) { |
| 100 | + hp0 = indices[i]; |
| 101 | + hp1 = (i+1 == nnz) ? dim : indices[i+1]; |
| 102 | + if (hp0 != hp1) { |
| 103 | + for (h = hp0; h < hp1; h++) { |
| 104 | + csr_accessor[h+1] = i+1; |
| 105 | + } |
| 106 | + } |
| 107 | + } |
| 108 | + }); |
| 109 | + } |
| 110 | + return csr; |
| 111 | +} |
| 112 | + |
| 113 | +}} // namespace at::sparse |
0 commit comments