pytorch
diff --git a/‎.ci/pytorch/test.sh‎
Lines changed: 0 additions & 7 deletions b/‎.ci/pytorch/test.sh‎
Lines changed: 0 additions & 7 deletions
diff --git a/‎.github/ci_commit_pins/vision.txt‎
Lines changed: 1 addition & 1 deletion b/‎.github/ci_commit_pins/vision.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/scripts/gql_mocks.json‎
Lines changed: 883 additions & 0 deletions b/‎.github/scripts/gql_mocks.json‎
Lines changed: 883 additions & 0 deletions
diff --git a/‎.github/scripts/test_trymerge.py‎
Lines changed: 13 additions & 0 deletions b/‎.github/scripts/test_trymerge.py‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎.github/scripts/trymerge.py‎
Lines changed: 9 additions & 4 deletions b/‎.github/scripts/trymerge.py‎
Lines changed: 9 additions & 4 deletions
diff --git a/‎.github/workflows/_win-test.yml‎
Lines changed: 12 additions & 0 deletions b/‎.github/workflows/_win-test.yml‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎.github/workflows/pull.yml‎
Lines changed: 7 additions & 1 deletion b/‎.github/workflows/pull.yml‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎CMakeLists.txt‎
Lines changed: 0 additions & 9 deletions b/‎CMakeLists.txt‎
Lines changed: 0 additions & 9 deletions
diff --git a/‎aten/src/ATen/CMakeLists.txt‎
Lines changed: 2 additions & 1 deletion b/‎aten/src/ATen/CMakeLists.txt‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎aten/src/ATen/mps/MPSAllocator.h‎
Lines changed: 26 additions & 35 deletions b/‎aten/src/ATen/mps/MPSAllocator.h‎
Lines changed: 26 additions & 35 deletions
@@ -828,10 +828,6 @@ test_executorch() {
   assert_git_not_dirty
 }
 
-test_smoke() {
-  time python test/run_test.py --include test_fx test_jit test_schema_check test_foreach test_weak --verbose
-}
-
 if ! [[ "${BUILD_ENVIRONMENT}" == *libtorch* || "${BUILD_ENVIRONMENT}" == *-bazel-* || "${BUILD_ENVIRONMENT}" == *-tsan* ]]; then
   (cd test && python -c "import torch; print(torch.__config__.show())")
   (cd test && python -c "import torch; print(torch.__config__.parallel_info())")
@@ -992,9 +988,6 @@ elif [[ "${TEST_CONFIG}" = docs_test ]]; then
   test_docs_test
 elif [[ "${TEST_CONFIG}" == *functorch* ]]; then
   test_functorch
-elif [[ "${TEST_CONFIG}" == *smoke* ]]; then
-  # TODO: Delete me once we get more 3.11 testing
-  test_smoke
 else
   install_torchvision
   install_triton
 
@@ -1 +1 @@
-85983a57e8986cf4a9afc34704bbacb9e6206ec9
+2d6e663afc15f878e6ff7ff52a1eaf0ee3e5a081
@@ -247,6 +247,19 @@ def test_get_author_null(self, mocked_gql: Any, *args: Any) -> None:
         author = pr.get_author()
         self.assertTrue(author is not None)
 
+    @mock.patch('trymerge.gh_graphql', side_effect=mocked_gh_graphql)
+    def test_last_pushed_at(self, mocked_gql: Any, *args: Any) -> None:
+        """ Tests that last_pushed_at will return None on merge commits.
+        """
+        pr = GitHubPR("pytorch", "pytorch", 71759)
+        self.assertIsNotNone(pr.last_pushed_at())
+
+        # 307120d6d3f7fcc3f92cfd26be891d360ad6a92a is merge commit
+        # and as such does not have a pushedDate
+        # See https://github.com/pytorch/pytorch/pull/94146#issuecomment-1421647117
+        pr = GitHubPR("pytorch", "pytorch", 94146)
+        self.assertIsNone(pr.last_pushed_at())
+
     @mock.patch('trymerge.gh_graphql', side_effect=mocked_gh_graphql)
     def test_large_diff(self, mocked_gql: Any, *args: Any) -> None:
         "Tests that PR with 100+ files can be fetched"
 
@@ -726,8 +726,11 @@ def is_base_repo_private(self) -> bool:
     def get_changed_files_count(self) -> int:
         return int(self.info["changedFiles"])
 
-    def last_pushed_at(self) -> datetime:
-        return datetime.fromisoformat(self.last_commit()['pushedDate'][:-1])
+    def last_pushed_at(self) -> Optional[datetime]:
+        pushed_date = self.last_commit()["pushedDate"]
+        if pushed_date is None:
+            return None
+        return datetime.fromisoformat(pushed_date[:-1])
 
     def last_commit(self) -> Any:
         return self.info["commits"]["nodes"][-1]["commit"]
@@ -849,7 +852,7 @@ def get_checkrun_conclusions(self) -> JobNameToStateDict:
         """ Returns dict of checkrun -> [conclusion, url] """
         if self.conclusions is not None:
             return self.conclusions
-        orig_last_commit = self.info["commits"]["nodes"][-1]["commit"]
+        orig_last_commit = self.last_commit()
 
         def get_pr_next_check_runs(edges: List[Dict[str, Dict[str, Any]]], edge_idx: int, checkruns: Any) -> Any:
             rc = gh_graphql(GH_GET_PR_NEXT_CHECK_RUNS,
@@ -1622,7 +1625,9 @@ def merge(pr_num: int, repo: GitRepo,
         )
 
     gh_post_pr_comment(org, project, pr.pr_num, explainer.get_merge_message(land_check_commit), dry_run=dry_run)
-    if (datetime.utcnow() - pr.last_pushed_at()).days > stale_pr_days:
+    if pr.last_pushed_at() is None:
+        print(f"Can't get commit {pr.last_commit()['oid']} pushed date. Is it merge commit by chance?")
+    elif (datetime.utcnow() - cast(datetime, pr.last_pushed_at())).days > stale_pr_days:
         if land_checks and not dry_run:
             pr.delete_land_time_check_branch(repo)
         raise RuntimeError(f"This PR is too stale; the last push date was more than {stale_pr_days} days ago. "
 
@@ -78,6 +78,18 @@ jobs:
             }
             Catch {
               Write-Output "No leftover $process process, continuing"
+              Write-Output $_
+            }
+          }
+
+          # Try it again https://stackoverflow.com/questions/40585754/powershell-wont-terminate-hung-process
+          # for hung processes
+          Foreach ($process In $processes) {
+            Try {
+              (Get-WmiObject -Class Win32_Process -Filter "Name LIKE '${process}%'").terminate()
+            }
+            Catch {
+              Write-Output $_
             }
           }
 
 
@@ -146,7 +146,13 @@ jobs:
       docker-image-name: pytorch-linux-bionic-py3.11-clang9
       test-matrix: |
         { include: [
-          { config: "smoke", shard: 1, num_shards: 1, runner: "linux.2xlarge" },
+          { config: "default", shard: 1, num_shards: 2, runner: "linux.2xlarge" },
+          { config: "default", shard: 2, num_shards: 2, runner: "linux.2xlarge" },
+          { config: "crossref", shard: 1, num_shards: 2, runner: "linux.2xlarge" },
+          { config: "crossref", shard: 2, num_shards: 2, runner: "linux.2xlarge" },
+          { config: "dynamo", shard: 1, num_shards: 2, runner: "linux.2xlarge" },
+          { config: "dynamo", shard: 2, num_shards: 2, runner: "linux.2xlarge" },
+          { config: "functorch", shard: 1, num_shards: 1, runner: "linux.2xlarge" },
         ]}
 
   linux-bionic-py3_11-clang9-test:
 
@@ -477,14 +477,6 @@ if(MSVC)
     # Turn off warnings on Windows.  In an ideal world we'd be warning
     # clean on Windows too, but this is too much work for our
     # non-Windows developers.
-    #
-    # NB: Technically, this is not necessary if CMP0092 was applied
-    # properly, but only cmake >= 3.15 has this policy, so we nail
-    # it one more time just be safe.
-    #
-    # NB2: This is NOT enough to prevent warnings from nvcc on MSVC.  At the
-    # moment only CMP0092 is enough to prevent those warnings too.
-    string(REPLACE "/W3" "" ${flag_var} "${${flag_var}}")
 
     # Turn off warnings (Windows build is currently is extremely warning
     # unclean and the warnings aren't telling us anything useful.)
@@ -1120,7 +1112,6 @@ if(BUILD_SHARED_LIBS)
       ${PROJECT_SOURCE_DIR}/cmake/public/mkl.cmake
       ${PROJECT_SOURCE_DIR}/cmake/public/mkldnn.cmake
       ${PROJECT_SOURCE_DIR}/cmake/public/protobuf.cmake
-      ${PROJECT_SOURCE_DIR}/cmake/public/threads.cmake
       ${PROJECT_SOURCE_DIR}/cmake/public/utils.cmake
       ${PROJECT_SOURCE_DIR}/cmake/public/LoadHIP.cmake
       DESTINATION share/cmake/Caffe2/public
 
@@ -32,6 +32,7 @@ set_bool(AT_BLAS_F2C BLAS_F2C)
 set_bool(AT_BLAS_USE_CBLAS_DOT BLAS_USE_CBLAS_DOT)
 set_bool(AT_MAGMA_ENABLED USE_MAGMA)
 set_bool(CAFFE2_STATIC_LINK_CUDA_INT CAFFE2_STATIC_LINK_CUDA)
+set_bool(AT_CUDNN_ENABLED CAFFE2_USE_CUDNN)
 
 configure_file(Config.h.in "${CMAKE_CURRENT_SOURCE_DIR}/Config.h")
 # TODO: Do not generate CUDAConfig.h for ROCm BUILDS
@@ -622,4 +623,4 @@ set(ATen_VULKAN_INCLUDE ${ATen_VULKAN_INCLUDE} PARENT_SCOPE)
 set(ATen_CPU_DEPENDENCY_LIBS ${ATen_CPU_DEPENDENCY_LIBS} PARENT_SCOPE)
 set(ATen_CUDA_DEPENDENCY_LIBS ${ATen_CUDA_DEPENDENCY_LIBS} PARENT_SCOPE)
 set(ATen_HIP_DEPENDENCY_LIBS ${ATen_HIP_DEPENDENCY_LIBS} PARENT_SCOPE)
-set(MEM_EFF_ATTENTION_CUDA_SOURCES ${MEM_EFF_ATTENTION_CUDA_SOURCES} PARENT_SCOPE)
+set(MEM_EFF_ATTENTION_CUDA_SOURCES ${MEM_EFF_ATTENTION_CUDA_SOURCES} PARENT_SCOPE)
@@ -1,5 +1,8 @@
 //  Copyright © 2022 Apple Inc.
 
+#pragma once
+
+#include <ATen/mps/MPSAllocatorInterface.h>
 #include <ATen/mps/MPSStream.h>
 #include <cstdio>
 #include <mutex>
@@ -9,27 +12,10 @@
 
 // this implementation is based on CUDACachingAllocator.
 // It utilizes Metal Heaps to improve the performance with buffer allocation.
+// Do not include this header. Use MPSAllocatorInterface.h instead.
 // TODO: Unify the logic with CUDACachingAllocator and remove redundant code.
 namespace at {
 namespace mps {
-
-class IMpsAllocatorCallback {
- public:
-  enum class EventType {
-    ALLOCATED, // buffer got allocated to be used immediately
-    RECYCLED,  // buffer pulled from free list to be reused
-    FREED,     // buffer put to free list for future recycling
-    RELEASED,  // buffer memory released
-  };
-  virtual ~IMpsAllocatorCallback() = default;
-  virtual void executeMPSAllocatorCallback(void* ptr, EventType event) = 0;
-};
-
-// MPS allocator will execute every registered callback when a block of memory is freed.
-C10_DECLARE_REGISTRY(MPSAllocatorCallbacksRegistry, IMpsAllocatorCallback);
-#define REGISTER_MPS_ALLOCATOR_CALLBACK(name, ...) \
-  C10_REGISTER_CLASS(MPSAllocatorCallbacksRegistry, name, __VA_ARGS__);
-
 namespace HeapAllocator {
 
 #define MB(x) round_page(x * 1048576UL)
@@ -263,27 +249,44 @@ class MPSHeapAllocatorImpl
 
   // interface exposed to at::Allocator
   id<MTLBuffer> malloc(size_t size, uint32_t usage);
+  // frees a buffer and returns it into buffer pool
   void free(void* ptr);
+  // releases all the cached buffers and their associated heaps
   void emptyCache();
-  // interface exposed to internal MPS operations
+  // returns true if buffer was allocated from the shared pool
   bool isSharedBuffer(void* ptr);
-  ssize_t getRequestedBufferSize(void* ptr);
+  // get the requested unaligned size of an MTLBuffer
+  ssize_t getUnalignedBufferSize(void* ptr);
+  // set the shape of a base tensor from a view tensor
   void setBufferShape(void* ptr, const IntArrayRef& shape);
+  // retrieve the shape of a base tensor from a view tensor
   IntArrayRef getBufferShape(void* ptr);
+  // allocate a buffer from a specialized pool to import CPU scalars into GPU
   id<MTLBuffer> allocScalarBufferWithValue(void* value, size_t size);
   // this indicates how far (in Megabytes) the current total allocations are from the
   // low watermark limit which is used to detect if we're under memory pressure
   // This returns zero if we've reached the low watermark limit
   ssize_t getLowWatermarkValue();
-
-  bool getDebugVerbosity() const { return m_debug_verbosity; }
-  size_t getMaxTotalAllowedSize() const { return m_max_total_allowed_size; }
+  // (see m_low_watermark_ratio for description)
+  void setLowWatermarkRatio(double ratio);
+  // (see m_high_watermark_ratio for description)
+  void setHighWatermarkRatio(double ratio);
+  // (see m_low_watermark_limit for description)
   size_t getLowWatermarkLimit() const { return m_low_watermark_limit; }
+  // (see m_max_total_allowed_size for description)
+  size_t getHighWatermarkLimit() const { return m_max_total_allowed_size; }
+  // (see m_total_allocated_memory for description)
+  size_t getTotalAllocatedMemory() const {return m_total_allocated_memory; }
+  // (see enum DebugVerbosity for description)
+  uint32_t getDebugVerbosity() const { return m_debug_verbosity; }
+  // returns the device that we allocate from
   inline id<MTLDevice> Device() const { return m_device; }
 
 private:
   // (see m_high_watermark_ratio for description)
   constexpr static double default_high_watermark_ratio = 1.7;
+  // we set the allowed upper bound to twice the size of recommendedMaxWorkingSetSize.
+  constexpr static double default_high_watermark_upper_bound = 2.0;
   // (see m_low_watermark_ratio for description)
   // on unified memory, we could allocate beyond the recommendedMaxWorkingSetSize
   constexpr static double default_low_watermark_ratio_unified  = 1.4;
@@ -375,17 +378,5 @@ class MPSHeapAllocatorImpl
 };
 
 } // namespace HeapAllocator
-
-// interface exposed to internal MPS operations
-
-// get the requested non-aligned size of an MTL buffer
-ssize_t get_requested_buffer_size(void* ptr);
-// retrieve the shape of a base tensor from a view tensor
-IntArrayRef get_buffer_shape(void* ptr);
-// set the shape of a base tensor from a view tensor
-void set_buffer_shape(void* ptr, const IntArrayRef& shape);
-// allocate a buffer from a specialized pool to import CPU scalars into GPU
-DataPtr allocate_scalar_buffer(void* value, size_t size);
-
 } // namespace mps
 } // namespace at
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-85983a57e8986cf4a9afc34704bbacb9e6206ec9`
	`1`	`+2d6e663afc15f878e6ff7ff52a1eaf0ee3e5a081`
Original file line number	Diff line number	Diff line change
`@@ -78,6 +78,18 @@ jobs:`
`78`	`78`	`}`
`79`	`79`	`Catch {`
`80`	`80`	`Write-Output "No leftover $process process, continuing"`
	`81`	`+ Write-Output $_`
	`82`	`+ }`
	`83`	`+ }`
	`84`	`+`
	`85`	`+ # Try it again https://stackoverflow.com/questions/40585754/powershell-wont-terminate-hung-process`
	`86`	`+ # for hung processes`
	`87`	`+ Foreach ($process In $processes) {`
	`88`	`+ Try {`
	`89`	`+ (Get-WmiObject -Class Win32_Process -Filter "Name LIKE '${process}%'").terminate()`
	`90`	`+ }`
	`91`	`+ Catch {`
	`92`	`+ Write-Output $_`
`81`	`93`	`}`
`82`	`94`	`}`
`83`	`95`