pytorch
diff --git a/‎.circleci/cimodel/data/windows_build_definitions.py‎
Lines changed: 1 addition & 2 deletions b/‎.circleci/cimodel/data/windows_build_definitions.py‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎.circleci/config.yml‎
Lines changed: 13 additions & 13 deletions b/‎.circleci/config.yml‎
Lines changed: 13 additions & 13 deletions
diff --git a/‎.circleci/scripts/binary_linux_build.sh‎
Lines changed: 1 addition & 3 deletions b/‎.circleci/scripts/binary_linux_build.sh‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎.circleci/verbatim-sources/header-section.yml‎
Lines changed: 3 additions & 3 deletions b/‎.circleci/verbatim-sources/header-section.yml‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎.jenkins/pytorch/win-test-helpers/run_python_nn_smoketests.py‎
Lines changed: 0 additions & 2 deletions b/‎.jenkins/pytorch/win-test-helpers/run_python_nn_smoketests.py‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎BUILD.bazel‎
Lines changed: 3 additions & 0 deletions b/‎BUILD.bazel‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎CMakeLists.txt‎
Lines changed: 5 additions & 0 deletions b/‎CMakeLists.txt‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎CODEOWNERS‎
Lines changed: 11 additions & 13 deletions b/‎CODEOWNERS‎
Lines changed: 11 additions & 13 deletions
diff --git a/‎CONTRIBUTING.md‎
Lines changed: 3 additions & 2 deletions b/‎CONTRIBUTING.md‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎android/pytorch_android/src/main/cpp/pytorch_jni_common.cpp‎
Lines changed: 2 additions & 1 deletion b/‎android/pytorch_android/src/main/cpp/pytorch_jni_common.cpp‎
Lines changed: 2 additions & 1 deletion
@@ -124,8 +124,7 @@ def FalsePred(_):
 def TruePred(_):
     return True
 
-# MKLDNN compilation fails with VC-19.27
-_VC2019 = VcSpec(2019, ["14", "26"], hide_version=True)
+_VC2019 = VcSpec(2019)
 
 WORKFLOW_DATA = [
     # VS2019 CUDA-10.1
 
@@ -23,19 +23,19 @@ executors:
   windows-with-nvidia-gpu:
     machine:
       resource_class: windows.gpu.nvidia.medium
-      image: windows-server-2019-nvidia:canary
+      image: windows-server-2019-nvidia:stable
       shell: bash.exe
 
   windows-xlarge-cpu-with-nvidia-cuda:
     machine:
       resource_class: windows.xlarge
-      image: windows-server-2019-vs2019:canary
+      image: windows-server-2019-vs2019:stable
       shell: bash.exe
 
   windows-medium-cpu-with-nvidia-cuda:
     machine:
       resource_class: windows.medium
-      image: windows-server-2019-vs2019:canary
+      image: windows-server-2019-vs2019:stable
       shell: bash.exe
 commands:
 
@@ -7109,7 +7109,7 @@ workflows:
           python_version: "3.6"
           use_cuda: "1"
           vc_product: Community
-          vc_version: "14.26"
+          vc_version: ""
           vc_year: "2019"
       - pytorch_windows_test:
           build_environment: pytorch-win-vs2019-cuda10-cudnn7-py3
@@ -7122,7 +7122,7 @@ workflows:
           test_name: pytorch-windows-test1
           use_cuda: "1"
           vc_product: Community
-          vc_version: "14.26"
+          vc_version: ""
           vc_year: "2019"
       - pytorch_windows_test:
           build_environment: pytorch-win-vs2019-cuda10-cudnn7-py3
@@ -7135,7 +7135,7 @@ workflows:
           test_name: pytorch-windows-test2
           use_cuda: "1"
           vc_product: Community
-          vc_version: "14.26"
+          vc_version: ""
           vc_year: "2019"
       - pytorch_windows_build:
           build_environment: pytorch-win-vs2019-cuda11-cudnn8-py3
@@ -7144,7 +7144,7 @@ workflows:
           python_version: "3.6"
           use_cuda: "1"
           vc_product: Community
-          vc_version: "14.26"
+          vc_version: ""
           vc_year: "2019"
       - pytorch_windows_test:
           build_environment: pytorch-win-vs2019-cuda11-cudnn8-py3
@@ -7163,7 +7163,7 @@ workflows:
           test_name: pytorch-windows-test1
           use_cuda: "1"
           vc_product: Community
-          vc_version: "14.26"
+          vc_version: ""
           vc_year: "2019"
       - pytorch_windows_test:
           build_environment: pytorch-win-vs2019-cuda11-cudnn8-py3
@@ -7182,7 +7182,7 @@ workflows:
           test_name: pytorch-windows-test2
           use_cuda: "1"
           vc_product: Community
-          vc_version: "14.26"
+          vc_version: ""
           vc_year: "2019"
       - pytorch_windows_build:
           build_environment: pytorch-win-vs2019-cpu-py3
@@ -7191,7 +7191,7 @@ workflows:
           python_version: "3.6"
           use_cuda: "0"
           vc_product: Community
-          vc_version: "14.26"
+          vc_version: ""
           vc_year: "2019"
       - pytorch_windows_test:
           build_environment: pytorch-win-vs2019-cpu-py3
@@ -7209,7 +7209,7 @@ workflows:
           test_name: pytorch-windows-test1
           use_cuda: "0"
           vc_product: Community
-          vc_version: "14.26"
+          vc_version: ""
           vc_year: "2019"
       - pytorch_windows_test:
           build_environment: pytorch-win-vs2019-cpu-py3
@@ -7227,7 +7227,7 @@ workflows:
           test_name: pytorch-windows-test2
           use_cuda: "0"
           vc_product: Community
-          vc_version: "14.26"
+          vc_version: ""
           vc_year: "2019"
       - pytorch_windows_test:
           build_environment: pytorch-win-vs2019-cuda10-cudnn7-py3
@@ -7245,7 +7245,7 @@ workflows:
           test_name: pytorch-windows-test1
           use_cuda: "0"
           vc_product: Community
-          vc_version: "14.26"
+          vc_version: ""
           vc_year: "2019"
       - update_s3_htmls:
           context: org-member
 
@@ -5,9 +5,7 @@ set -eux -o pipefail
 source /env
 
 # Defaults here so they can be changed in one place
-# This script is run inside Docker.2XLarge+ container that has 20 CPU cores
-# But ncpu will return total number of cores on the system
-export MAX_JOBS=18
+export MAX_JOBS=${MAX_JOBS:-$(( $(nproc) - 2 ))}
 
 # Parse the parameters
 if [[ "$PACKAGE_TYPE" == 'conda' ]]; then
 
@@ -23,17 +23,17 @@ executors:
   windows-with-nvidia-gpu:
     machine:
       resource_class: windows.gpu.nvidia.medium
-      image: windows-server-2019-nvidia:canary
+      image: windows-server-2019-nvidia:stable
       shell: bash.exe
 
   windows-xlarge-cpu-with-nvidia-cuda:
     machine:
       resource_class: windows.xlarge
-      image: windows-server-2019-vs2019:canary
+      image: windows-server-2019-vs2019:stable
       shell: bash.exe
 
   windows-medium-cpu-with-nvidia-cuda:
     machine:
       resource_class: windows.medium
-      image: windows-server-2019-vs2019:canary
+      image: windows-server-2019-vs2019:stable
       shell: bash.exe
@@ -1,7 +1,5 @@
 #!/usr/bin/env python
 
-from __future__ import print_function
-
 import subprocess
 import os
 
 
@@ -331,10 +331,12 @@ filegroup(
     name = "aten_cuda_srcs",
     srcs = [
         "aten/src/ATen/cuda/CUDABlas.cpp",
+        "aten/src/ATen/cuda/CUDASolver.cpp",
         "aten/src/ATen/cuda/CUDAContext.cpp",
         "aten/src/ATen/cuda/CUDAGenerator.cpp",
         "aten/src/ATen/cuda/CuSparseHandlePool.cpp",
         "aten/src/ATen/cuda/CublasHandlePool.cpp",
+        "aten/src/ATen/cuda/CusolverDnHandlePool.cpp",
         "aten/src/ATen/cuda/PinnedMemoryAllocator.cpp",
         "aten/src/ATen/cuda/detail/CUDAHooks.cpp",
         "aten/src/ATen/cudnn/AutocastRNN.cpp",
@@ -459,6 +461,7 @@ filegroup(
         "aten/src/ATen/native/cuda/AveragePool2d.cu.cc",
         "aten/src/ATen/native/cuda/AveragePool3d.cu.cc",
         "aten/src/ATen/native/cuda/BatchLinearAlgebra.cu.cc",
+        "aten/src/ATen/native/cuda/BatchLinearAlgebraLib.cu.cc",
         "aten/src/ATen/native/cuda/BinaryArithmeticKernel.cu.cc",
         "aten/src/ATen/native/cuda/BinaryCompareKernel.cu.cc",
         "aten/src/ATen/native/cuda/BinaryMiscOpsKernels.cu.cc",
 
@@ -201,6 +201,7 @@ option(USE_TENSORRT "Using Nvidia TensorRT library" OFF)
 option(USE_VULKAN "Use Vulkan GPU backend" OFF)
 option(USE_VULKAN_WRAPPER "Use Vulkan wrapper" ON)
 option(USE_VULKAN_SHADERC_RUNTIME "Use Vulkan Shader compilation runtime(Needs shaderc lib)" OFF)
+option(USE_VULKAN_RELAXED_PRECISION "Use Vulkan relaxed precision(mediump)" OFF)
 option(USE_XNNPACK "Use XNNPACK" ON)
 option(USE_ZMQ "Use ZMQ" OFF)
 option(USE_ZSTD "Use ZSTD" OFF)
@@ -513,6 +514,10 @@ if(USE_VULKAN_SHADERC_RUNTIME)
   string(APPEND CMAKE_CXX_FLAGS " -DUSE_VULKAN_SHADERC_RUNTIME")
 endif()
 
+if(USE_VULKAN_RELAXED_PRECISION)
+  string(APPEND CMAKE_CXX_FLAGS " -DUSE_VULKAN_RELAXED_PRECISION")
+endif()
+
 # ---[ Allowlist file if allowlist is specified
 include(cmake/Allowlist.cmake)
 
 
@@ -4,10 +4,6 @@
 /docs/cpp @goldsborough @ebetica @yf225
 /torch/csrc/api/ @ebetica @goldsborough @yf225
 /test/cpp/api/ @ebetica @goldsborough @yf225
-/torch/lib/c10d/ @pietern @mrshenli @zhaojuanmao
-/torch/csrc/distributed/ @pietern @mrshenli @zhaojuanmao
-/torch/distributed/ @apaszke @pietern @mrshenli @zhaojuanmao
-/test/test_c10d.py @pietern @mrshenli @zhaojuanmao
 /torch/utils/cpp_extension.py @goldsborough @fmassa @soumith @ezyang
 
 # Not there to strictly require the approval, but to be tagged as a reviewer
@@ -20,17 +16,19 @@
 /torch/jit/ @apaszke
 /torch/utils/data/ @apaszke
 
-# Distributed RPC Framework.
-/torch/csrc/distributed/rpc @mrshenli @pritamdamania87 @zhaojuanmao
-/torch/csrc/distributed/autograd @mrshenli @pritamdamania87 @zhaojuanmao
-/torch/distributed/rpc @mrshenli @pritamdamania87 @zhaojuanmao
-/torch/distributed/autograd @mrshenli @pritamdamania87 @zhaojuanmao
-/torch/distributed/optim @mrshenli @pritamdamania87 @zhaojuanmao @aazzolini
-
 # Tensorpipe RPC Agent.
 /torch/csrc/distributed/rpc/tensorpipe_agent.cpp @jiayisuse @osalpekar @lw @beauby
 /torch/csrc/distributed/rpc/tensorpipe_agent.h @jiayisuse @osalpekar @lw @beauby
 
+# Distributed package
+# This list is mostly if you'd like to be tagged as reviewer, feel free to add
+# or remove yourself from it.
+/torch/lib/c10d/ @pietern @mrshenli @zhaojuanmao @pritamdamania87 @rohan-varma
+/torch/csrc/distributed/ @pietern @mrshenli @zhaojuanmao @pritamdamania87 @rohan-varma
+/torch/distributed/ @apaszke @pietern @mrshenli @zhaojuanmao @pritamdamania87 @rohan-varma
+
 # Distributed tests
-/test/distributed @mrshenli @pritamdamania87 @zhaojuanmao
-/torch/testing/_internal/distributed @mrshenli @pritamdamania87 @zhaojuanmao
+# This list is mostly if you'd like to be tagged as reviewer, feel free to add
+# or remove yourself from it.
+/test/distributed @mrshenli @pritamdamania87 @zhaojuanmao @rohan-varma
+/torch/testing/_internal/distributed @mrshenli @pritamdamania87 @zhaojuanmao @rohan-varma
@@ -825,8 +825,9 @@ static_assert(std::is_same(A*, decltype(A::singleton()))::value, "hmm");
 
 [Clang-Tidy](https://clang.llvm.org/extra/clang-tidy/index.html) is a C++
 linter and static analysis tool based on the clang compiler. We run clang-tidy
-in our CI to make sure that new C++ code is safe, sane and efficient. See our
-[.travis.yml](https://github.com/pytorch/pytorch/blob/master/.travis.yml) file
+in our CI to make sure that new C++ code is safe, sane and efficient. See the
+[`clang-tidy` job in our GitHub Workflow's
+lint.yml file](https://github.com/pytorch/pytorch/blob/master/.github/workflows/lint.yml)
 for the simple commands we use for this.
 
 To run clang-tidy locally, follow these steps:
 
@@ -289,9 +289,10 @@ facebook::jni::local_ref<JIValue> JIValue::newJIValueFromAtIValue(
         JIValue::javaClassStatic()
             ->getStaticMethod<facebook::jni::local_ref<JIValue>(
                 facebook::jni::local_ref<TensorHybrid::javaobject>)>("from");
+    const auto& tensor = ivalue.toTensor();
     return jMethodTensor(
         JIValue::javaClassStatic(),
-        TensorHybrid::newJTensorFromAtTensor(ivalue.toTensor()));
+        TensorHybrid::newJTensorFromAtTensor(tensor.cpu()));
   } else if (ivalue.isBool()) {
     static auto jMethodBool =
         JIValue::javaClassStatic()