pytorch
diff --git a/‎.clang-tidy‎
Lines changed: 1 addition & 0 deletions b/‎.clang-tidy‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.github/workflows/lint.yml‎
Lines changed: 10 additions & 0 deletions b/‎.github/workflows/lint.yml‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎.github/workflows/update_s3_htmls.yml‎
Lines changed: 22 additions & 0 deletions b/‎.github/workflows/update_s3_htmls.yml‎
Lines changed: 22 additions & 0 deletions
diff --git a/‎.jenkins/pytorch/win-build.sh‎
Lines changed: 15 additions & 0 deletions b/‎.jenkins/pytorch/win-build.sh‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎aten/src/ATen/BatchedFallback.cpp‎
Lines changed: 1 addition & 1 deletion b/‎aten/src/ATen/BatchedFallback.cpp‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎aten/src/ATen/BatchingRegistrations.cpp‎
Lines changed: 50 additions & 50 deletions b/‎aten/src/ATen/BatchingRegistrations.cpp‎
Lines changed: 50 additions & 50 deletions
diff --git a/‎aten/src/ATen/OpaqueTensorImpl.h‎
Lines changed: 3 additions & 1 deletion b/‎aten/src/ATen/OpaqueTensorImpl.h‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎aten/src/ATen/ParallelOpenMP.cpp‎
Lines changed: 8 additions & 0 deletions b/‎aten/src/ATen/ParallelOpenMP.cpp‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎aten/src/ATen/VmapTransforms.cpp‎
Lines changed: 14 additions & 10 deletions b/‎aten/src/ATen/VmapTransforms.cpp‎
Lines changed: 14 additions & 10 deletions
diff --git a/‎aten/src/ATen/VmapTransforms.h‎
Lines changed: 34 additions & 14 deletions b/‎aten/src/ATen/VmapTransforms.h‎
Lines changed: 34 additions & 14 deletions
@@ -22,6 +22,7 @@ cppcoreguidelines-*,
 hicpp-exception-baseclass,
 hicpp-avoid-goto,
 modernize-*,
+-modernize-concat-nested-namespaces,
 -modernize-return-braced-init-list,
 -modernize-use-auto,
 -modernize-use-default-member-init,
 
@@ -17,6 +17,16 @@ jobs:
           architecture: x64
       - name: Checkout PyTorch
         uses: actions/checkout@v1
+      - name: Checkout PR tip
+        run: |
+          set -eux
+          if [[ "${{ github.event_name }}" == "pull_request" ]]; then
+            # We are on a PR, so actions/checkout leaves us on a merge commit.
+            # Check out the actual tip of the branch.
+            git checkout ${{ github.event.pull_request.head.sha }}
+          fi
+          echo ::set-output name=commit_sha::$(git rev-parse HEAD)
+        id: get_pr_tip
       - name: Ensure consistent CircleCI YAML config
         run: |
           pip install -r requirements.txt
 
@@ -0,0 +1,22 @@
+name: Update S3 HTML indices for download.pytorch.org
+on:
+  schedule:
+    # Update the indices every 30 minutes
+    - cron: "*/30 * * * *"
+  # Have the ability to trigger this job manually using the API as well
+  workflow_dispatch:
+
+jobs:
+  update-html:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        prefix: ["whl", "whl/test", "whl/nightly"]
+    steps:
+      - name: Run updater image
+        env:
+          AWS_ACCESS_KEY_ID: ${{ secrets.AWS_S3_UPDATE_ACCESS_KEY_ID }}
+          AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_S3_UPDATE_SECRET_ACCESS_KEY }}
+        uses: docker://pytorch/manage_s3_html
+        with:
+          args: ${{ matrix.prefix }}
@@ -38,6 +38,21 @@ fi
 
 export SCRIPT_HELPERS_DIR=$SCRIPT_PARENT_DIR/win-test-helpers
 
+set +ex
+grep -E -R 'PyLong_(From|As)(Unsigned|)Long\(' --exclude=python_numbers.h torch/
+PYLONG_API_CHECK=$?
+if [[ $PYLONG_API_CHECK == 0 ]]; then
+  echo "Usage of PyLong_{From,As}{Unsigned}Long API may lead to overflow errors on Windows"
+  echo "because \`sizeof(long) == 4\` and \`sizeof(unsigned long) == 4\`."
+  echo "Please include \"torch/csrc/python_numbers.h\" and use the correspoding APIs instead."
+  echo "PyLong_FromLong -> THPUtils_packInt32 / THPUtils_packInt64"
+  echo "PyLong_AsLong -> THPUtils_unpackInt (32-bit) / THPUtils_unpackLong (64-bit)"
+  echo "PyLong_FromUnsignedLong -> THPUtils_packUInt32 / THPUtils_packUInt64"
+  echo "PyLong_AsUnsignedLong -> THPUtils_unpackUInt32 / THPUtils_unpackUInt64"
+  exit 1
+fi
+set -ex
+
 $SCRIPT_HELPERS_DIR/build_pytorch.bat
 
 assert_git_not_dirty
 
@@ -361,7 +361,7 @@ void batchedTensorForLoopFallback(const c10::OperatorHandle& op, torch::jit::Sta
         flat_output.sizes().end());
     torch::jit::push(
         stack,
-        input_physical_views.front().newLogicalFromPhysical(flat_output.view(output_sizes)));
+        input_physical_views.front().getPhysicalToLogicalMap().apply(flat_output.view(output_sizes)));
   }
 }
 
 
@@ -24,11 +24,13 @@ struct CAFFE2_API OpaqueTensorImpl : public TensorImpl {
       const caffe2::TypeMeta data_type,
       c10::Device device,
       OpaqueHandle opaque_handle,
-      c10::IntArrayRef sizes)
+      c10::IntArrayRef sizes,
+      bool is_non_overlapping_and_dense = true)
       : TensorImpl(key_set, data_type, device),
         opaque_handle_(std::move(opaque_handle)) {
     sizes_ = sizes.vec();
     refresh_numel();
+    is_non_overlapping_and_dense_ = is_non_overlapping_and_dense;
   }
 
   void release_resources() override {
 
@@ -8,6 +8,8 @@
 #include <mkl.h>
 #endif
 
+#include <caffe2/utils/threadpool/pthreadpool-cpp.h>
+
 namespace at {
 
 namespace {
@@ -49,6 +51,12 @@ void set_num_threads(int nthreads) {
   // See https://github.com/pytorch/pytorch/issues/13757
   mkl_set_dynamic(false);
 #endif
+#ifdef USE_PTHREADPOOL
+  // because PyTorch uses caffe2::pthreadpool() in QNNPACK
+  caffe2::PThreadPool* const pool = caffe2::pthreadpool();
+  TORCH_INTERNAL_ASSERT(pool, "Invalid thread pool!");
+  pool->set_thread_count(nthreads);
+#endif
 }
 
 // Explicitly calling omp_get_max_threads() as the size of the parallel
 
@@ -91,16 +91,6 @@ static BatchDims computeFrontBatchDimsFromLevels(std::bitset<kVmapNumLevels> lev
   return bdims;
 }
 
-Tensor VmapPhysicalView::newLogicalFromPhysical(const Tensor& physical) const {
-  return makeBatched(physical, computeFrontBatchDimsFromLevels(levels_));
-}
-
-void VmapPhysicalView::makeLogicalFromPhysicalListInplace(std::vector<Tensor>& physical_tensors) const {
-  for (int64_t idx = 0; idx < physical_tensors.size(); ++idx) {
-    physical_tensors[idx] = newLogicalFromPhysical(physical_tensors[idx]);
-  }
-}
-
 // Given a Tensor or a BatchedTensor, returns the underlying physical tensor
 // with all vmapped dimensions permuted to the front, if they exist, and a
 // bitset of vmap levels that were present in the tensor.
@@ -281,4 +271,18 @@ VmapPhysicalViewVec BroadcastingVmapTransform::logicalToPhysical(TensorList logi
   return result;
 }
 
+VmapPhysicalToLogicalMap VmapPhysicalView::getPhysicalToLogicalMap() const {
+  return VmapPhysicalToLogicalMap(levels_);
+}
+
+Tensor VmapPhysicalToLogicalMap::apply(const Tensor& physical_tensor) const {
+  return makeBatched(physical_tensor, computeFrontBatchDimsFromLevels(levels_));
+}
+
+void VmapPhysicalToLogicalMap::applyInplace(std::vector<Tensor>& physical_tensors) const {
+  for (int64_t idx = 0; idx < physical_tensors.size(); ++idx) {
+    physical_tensors[idx] = apply(physical_tensors[idx]);
+  }
+}
+
 } // namespace at
@@ -79,6 +79,10 @@ struct TORCH_API BroadcastingVmapTransform {
   static VmapPhysicalViewVec logicalToPhysical(TensorList logical_tensors);
 };
 
+// Forward declared, if you're reading this file head to toe, don't worry about
+// it yet.
+struct VmapPhysicalToLogicalMap;
+
 // NOTE: [What is a VmapPhysicalView?]
 // VmapPhysicalView represents a physical view on a Tensor.
 //
@@ -115,24 +119,14 @@ struct TORCH_API VmapPhysicalView {
   VmapDimVector getPhysicalDims(IntArrayRef logical_dims) const;
   int64_t getPhysicalDim(int64_t logical_dim) const;
 
+  // Returns a VmapPhysicalToLogicalMap object. This can be used for
+  // mapping a physical tensor to a new logical tensor (BatchedTensor)
+  VmapPhysicalToLogicalMap getPhysicalToLogicalMap() const;
+
   // Maps a logical shape to a physical shape by pre-pending the batch
   // sizes to the logical shape.
   VmapDimVector getPhysicalShape(IntArrayRef logical_shape) const;
 
-  // Maps a physical tensor to a new logical tensor (BatchedTensor),
-  // using the mapping info stored in this VmapPhysicalView.
-  // Assumes that all of the "batch dimensions" are at the front
-  // of the physical tensor.
-  Tensor newLogicalFromPhysical(const Tensor& physical) const;
-
-  // Given a vector of physical tensors,
-  // 1. maps each tensor to a new logical tensor using the mapping info stored
-  //    in this VmapPhysicalView. Assumes that all of the "batch dimensions"
-  //    are at the front of the physical tensors.
-  // 2. stores the new logical tensors back into the passed-in vector. This is
-  //    to avoid additional dynamic allocations.
-  void makeLogicalFromPhysicalListInplace(std::vector<Tensor>& physical_tensors) const;
-
   int64_t numBatchDims() const;
 
  private:
@@ -142,5 +136,31 @@ struct TORCH_API VmapPhysicalView {
   Tensor tensor_;
 };
 
+// Convenience struct used for mapping a physical tensor (a non-BatchedTensor)
+// to a logical one (BatchedTensor). It holds some levels that are used to do the
+// mapping and assumes that the batch dimensions in the physical tensor all
+// occur at the front of the tensor.
+struct TORCH_API VmapPhysicalToLogicalMap {
+  VmapPhysicalToLogicalMap(std::bitset<kVmapNumLevels> levels): levels_(levels) {}
+
+  // Maps a physical tensor to a new logical tensor (BatchedTensor).
+  // Assumes that all of the "batch dimensions" are at the front
+  // of the physical tensor. For example, given:
+  // - x = rank-4 Tensor with size 2, 3, 5, 7
+  // - levels = (2, 4)
+  // Returns:
+  // - BatchedTensor(x, bdims=[(dim=0,lvl=2), (dim=1, lvl=4)])
+  Tensor apply(const Tensor& physical_tensor) const;
+
+  // Given a vector of physical tensors,
+  // 1. maps each tensor to a new logical tensor. Assumes that all of the
+  //    "batch dimensions" are at the front of the physical tensors.
+  // 2. stores the new logical tensors back into the passed-in vector. This is
+  //    to avoid additional dynamic allocations.
+  void applyInplace(std::vector<Tensor>& physical_tensors) const;
+
+  std::bitset<kVmapNumLevels> levels_;
+};
+
 
 } // namespace at
Original file line number	Diff line number	Diff line change
`@@ -361,7 +361,7 @@ void batchedTensorForLoopFallback(const c10::OperatorHandle& op, torch::jit::Sta`
`361`	`361`	`flat_output.sizes().end());`
`362`	`362`	`torch::jit::push(`
`363`	`363`	`stack,`
`364`		`- input_physical_views.front().newLogicalFromPhysical(flat_output.view(output_sizes)));`
	`364`	`+ input_physical_views.front().getPhysicalToLogicalMap().apply(flat_output.view(output_sizes)));`
`365`	`365`	`}`
`366`	`366`	`}`
`367`	`367`