pytorch
diff --git a/‎.circleci/cimodel/data/dimensions.py‎
Lines changed: 1 addition & 1 deletion b/‎.circleci/cimodel/data/dimensions.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.circleci/config.yml‎
Lines changed: 104 additions & 104 deletions b/‎.circleci/config.yml‎
Lines changed: 104 additions & 104 deletions
diff --git a/‎BUILD.bazel‎
Lines changed: 0 additions & 1 deletion b/‎BUILD.bazel‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎CONTRIBUTING.md‎
Lines changed: 10 additions & 0 deletions b/‎CONTRIBUTING.md‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎android/gradle/android_tasks.gradle‎
Lines changed: 0 additions & 1 deletion b/‎android/gradle/android_tasks.gradle‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎android/pytorch_android/host/build.gradle‎
Lines changed: 0 additions & 1 deletion b/‎android/pytorch_android/host/build.gradle‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎android/settings.gradle‎
Lines changed: 0 additions & 1 deletion b/‎android/settings.gradle‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎aten/src/ATen/LegacyTHFunctionsCUDA.h‎
Lines changed: 0 additions & 1 deletion b/‎aten/src/ATen/LegacyTHFunctionsCUDA.h‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎aten/src/ATen/MemoryOverlap.cpp‎
Lines changed: 3 additions & 0 deletions b/‎aten/src/ATen/MemoryOverlap.cpp‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎aten/src/ATen/SparseTensorUtils.cpp‎
Lines changed: 113 additions & 0 deletions b/‎aten/src/ATen/SparseTensorUtils.cpp‎
Lines changed: 113 additions & 0 deletions
@@ -8,8 +8,8 @@
 ]
 
 ROCM_VERSIONS = [
-    "3.9",
     "3.10",
+    "4.0",
 ]
 
 ROCM_VERSION_LABELS = ["rocm" + v for v in ROCM_VERSIONS]
 
@@ -373,7 +373,6 @@ filegroup(
 filegroup(
     name = "thc_srcs_cu",
     srcs = [
-        "aten/src/THC/THCBlas.cu.cc",
         "aten/src/THC/THCReduceApplyUtils.cu.cc",
         "aten/src/THC/THCSleep.cu.cc",
         "aten/src/THC/THCSortUtils.cu.cc",
 
@@ -903,6 +903,16 @@ You'll need to install an appropriately configured flake8; see
 [Lint as you type](https://github.com/pytorch/pytorch/wiki/Lint-as-you-type)
 for documentation on how to do this.
 
+If you haven't set up the pre-commit hook and have already committed files and 
+CI reports `flake8` errors, you can run the check locally in your PR branch with:
+
+  ```bash
+  flake8 $(git diff --name-only $(git merge-base --fork-point master))
+  ```
+
+fix the code so that no errors are reported when you re-run the above check again, 
+and then commit the fix.
+
 ## Building PyTorch with ASAN
 
 [ASAN](https://github.com/google/sanitizers/wiki/AddressSanitizer) is very
 
@@ -1,4 +1,3 @@
-
 import java.nio.file.Files
 import java.nio.file.Paths
 import java.io.FileOutputStream
 
@@ -38,4 +38,3 @@ dependencies {
 }
 
 apply from: rootProject.file('gradle/release.gradle')
-
@@ -4,4 +4,3 @@ project(':pytorch_android_torchvision').projectDir = file('pytorch_android_torch
 
 project(':pytorch_host').projectDir = file('pytorch_android/host')
 project(':test_app').projectDir = file('test_app/app')
-
@@ -75,7 +75,6 @@ Tensor & _thnn_log_sigmoid_backward_out(Tensor & grad_input, const Tensor & grad
 Tensor _thnn_log_sigmoid_backward(const Tensor & grad_output, const Tensor & self, const Tensor & buffer);
 Tensor & _thnn_rrelu_with_noise_forward_out(Tensor & output, const Tensor & self, const Tensor & noise, Scalar lower, Scalar upper, bool training, c10::optional<at::Generator> generator);
 Tensor _thnn_rrelu_with_noise_forward(const Tensor & self, const Tensor & noise, Scalar lower, Scalar upper, bool training, c10::optional<at::Generator> generator);
-Tensor & _thnn_rrelu_with_noise_backward_out(Tensor & grad_input, const Tensor & grad_output, const Tensor & self, const Tensor & noise, Scalar lower, Scalar upper, bool training);
 Tensor _thnn_rrelu_with_noise_backward(const Tensor & grad_output, const Tensor & self, const Tensor & noise, Scalar lower, Scalar upper, bool training);
 Tensor & _thnn_rrelu_with_noise_forward_(Tensor & self, const Tensor & noise, Scalar lower, Scalar upper, bool training, c10::optional<at::Generator> generator);
 std::tuple<Tensor &,Tensor &,Tensor &> _thnn_conv2d_forward_out(Tensor & output, Tensor & columns, Tensor & ones, const Tensor & self, const Tensor & weight, IntArrayRef kernel_size, const Tensor & bias, IntArrayRef stride, IntArrayRef padding);
 
@@ -48,6 +48,9 @@ MemOverlapStatus get_overlap_status(TensorImpl* a, TensorImpl* b) {
   if (!a->is_contiguous() || !b->is_contiguous()) {
     return MemOverlapStatus::TOO_HARD;
   }
+  if (!a->has_storage() || !b->has_storage()) {
+    return MemOverlapStatus::NO;
+  }
   if (a->storage().data() == b->storage().data()) {
     const auto a_begin = static_cast<char*>(a->data());
     const auto a_end = a_begin + a->numel() * a->itemsize();
 
@@ -0,0 +1,113 @@
+#include <ATen/SparseTensorUtils.h>
+
+#include <ATen/ATen.h>
+#include <ATen/SparseTensorImpl.h>
+#include <ATen/Parallel.h>
+
+namespace at { namespace sparse {
+
+// NOTE [ Flatten Sparse Indices ]
+// This helper function flattens a sparse indices tensor (a Tensor) into a 1D
+// indices tensor. E.g.,
+//   input = [[2, 4, 0],
+//            [3, 1, 10]]
+//   full_size = [2, 12]
+//   output = [ 2 * 12 + 3, 4 * 12 + 1, 0 * 12 + 10 ] = [27, 49, 10]
+//
+// In other words, assuming that each `indices[i, :]` is a valid index to a
+// tensor `t` of shape `full_size`. This returns the corresponding indices to
+// the flattened tensor `t.reshape( prod(full_size[:indices.size(0)]), -1 )`.
+// if forceClone is true, the result will forced to be a clone of self.
+// if force_clone is true, the result will forced to be a clone of self.
+Tensor flatten_indices(const Tensor& indices, IntArrayRef full_size, bool force_clone /*= false*/) {
+  int64_t sparse_dim = indices.size(0);
+  if (sparse_dim == 1) {
+    if (force_clone) {
+      return indices.squeeze(0).clone(at::MemoryFormat::Contiguous);
+    } else {
+      return indices.squeeze(0);
+    }
+  } else {
+    std::vector<int64_t> indices_mult_cpu_vec;
+    indices_mult_cpu_vec.reserve(sparse_dim);
+    int64_t mult = 1;
+    for (int64_t i = sparse_dim - 1; i >= 0; i--) {
+      indices_mult_cpu_vec[i] = mult;
+      mult *= full_size[i];
+    }
+    auto indices_mult_cpu = at::from_blob(
+        indices_mult_cpu_vec.data(),
+        /*size=*/{sparse_dim, 1},
+        indices.options().device(kCPU));
+    // NB: must be blocking because this blob may be freed after this closure,
+    //     and non_blocking copy will see garbage.
+    auto indices_mult = indices_mult_cpu.to(indices.device(), /*non_blocking=*/false);
+    // Ideally we want matmul but matmul is slow on CPU Long and not implemented
+    // on CUDA Long. So mul is faster.
+    return indices.mul(indices_mult).sum(0);
+  }
+}
+
+// Flatten sparse tensor's indices from nD to 1D, similar to NOTE [ Flatten Sparse Indices ],
+// except this one allows partial flatten: only flatten on specified dims. Note that
+// the flatten indices might be uncoalesced if dims_to_flatten.size() < sparse_dim.
+// Also if input indices is already coalesced, the flattened indices will also be sorted.
+//
+// args:
+//    indices: sparse tensor indices
+//    sizes: sparse tensor sizes
+//    dims_to_flatten: a list of dim index to flatten
+//
+// Ex1:
+//   indices = [[2, 4, 0],
+//             [3, 1, 3]]
+//   sizes = [2, 12]
+//   dims_to_flatten = [0, 1]
+//   new_indices = [ 2 * 12 + 3, 4 * 12 + 1, 0 * 12 + 3 ] = [27, 49, 3]
+//
+// Ex2:
+//   dims_to_flatten = [1]
+//   new_indices = [ 3, 1, 3 ]  # uncoalesced
+Tensor flatten_indices_by_dims(const Tensor& indices, const IntArrayRef& sizes, const IntArrayRef& dims_to_flatten){
+  Tensor new_indices = at::zeros({indices.size(1)}, indices.options());
+  for (auto d : dims_to_flatten) {
+    new_indices.mul_(sizes[d]);
+    new_indices.add_(indices.select(0, d));
+  }
+  return new_indices;
+}
+
+Tensor coo_to_csr(const int64_t* indices, int64_t dim, int64_t nnz) {
+  /*
+    Find the CSR representation for a row `indices` from the COO format
+    Inputs:
+      `indices` is the row pointer from COO indices
+      `dim` is the row dimensionality
+      `nnz` is the number of non-zeros
+
+    Output:
+      `csr` is a compressed row array in a CSR format
+  */
+  Tensor csr = at::zeros({dim + 1}, kLong);
+
+  // TODO: eliminate this conditional when zero-size dims supported correctly
+  if (nnz > 0) {
+    auto csr_accessor = csr.accessor<int64_t, 1>();
+    // Convert the sparse matrix to CSR format
+    at::parallel_for(0, nnz, 10000, [&](int64_t start, int64_t end) {
+      int64_t h, hp0, hp1;
+      for (auto i = start; i < end; i++) {
+        hp0 = indices[i];
+        hp1 = (i+1 == nnz) ?  dim : indices[i+1];
+        if (hp0 != hp1) {
+          for (h = hp0; h < hp1; h++) {
+            csr_accessor[h+1] = i+1;
+          }
+        }
+      }
+    });
+  }
+  return csr;
+}
+
+}} // namespace at::sparse
Original file line number	Diff line number	Diff line change
`@@ -8,8 +8,8 @@`
`8`	`8`	`]`
`9`	`9`
`10`	`10`	`ROCM_VERSIONS = [`
`11`		`- "3.9",`
`12`	`11`	`"3.10",`
	`12`	`+ "4.0",`
`13`	`13`	`]`
`14`	`14`
`15`	`15`	`ROCM_VERSION_LABELS = ["rocm" + v for v in ROCM_VERSIONS]`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,3 @@`
`1`		`-`
`2`	`1`	`import java.nio.file.Files`
`3`	`2`	`import java.nio.file.Paths`
`4`	`3`	`import java.io.FileOutputStream`
Original file line number	Diff line number	Diff line change
`@@ -38,4 +38,3 @@ dependencies {`
`38`	`38`	`}`
`39`	`39`
`40`	`40`	`apply from: rootProject.file('gradle/release.gradle')`
`41`		`-`
Original file line number	Diff line number	Diff line change
`@@ -4,4 +4,3 @@ project(':pytorch_android_torchvision').projectDir = file('pytorch_android_torch`
`4`	`4`
`5`	`5`	`project(':pytorch_host').projectDir = file('pytorch_android/host')`
`6`	`6`	`project(':test_app').projectDir = file('test_app/app')`
`7`		`-`