pytorch
diff --git a/‎.github/ci_commit_pins/vision.txt‎
Lines changed: 1 addition & 1 deletion b/‎.github/ci_commit_pins/vision.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/requirements/conda-env-Linux-X64‎
Lines changed: 1 addition & 1 deletion b/‎.github/requirements/conda-env-Linux-X64‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/requirements/conda-env-macOS-ARM64‎
Lines changed: 1 addition & 1 deletion b/‎.github/requirements/conda-env-macOS-ARM64‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/requirements/conda-env-macOS-X64‎
Lines changed: 1 addition & 1 deletion b/‎.github/requirements/conda-env-macOS-X64‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.jenkins/pytorch/test.sh‎
Lines changed: 1 addition & 4 deletions b/‎.jenkins/pytorch/test.sh‎
Lines changed: 1 addition & 4 deletions
diff --git a/‎.lintrunner.toml‎
Lines changed: 10 additions & 1 deletion b/‎.lintrunner.toml‎
Lines changed: 10 additions & 1 deletion
diff --git a/‎aten/src/ATen/CPUGeneratorImpl.cpp‎
Lines changed: 3 additions & 3 deletions b/‎aten/src/ATen/CPUGeneratorImpl.cpp‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎aten/src/ATen/cpu/vec/vec256/vec256.h‎
Lines changed: 10 additions & 2 deletions b/‎aten/src/ATen/cpu/vec/vec256/vec256.h‎
Lines changed: 10 additions & 2 deletions
diff --git a/‎aten/src/ATen/cpu/vec/vec256/vec256_complex_double.h‎
Lines changed: 1 addition & 1 deletion b/‎aten/src/ATen/cpu/vec/vec256/vec256_complex_double.h‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎aten/src/ATen/cpu/vec/vec256/vec256_complex_float.h‎
Lines changed: 1 addition & 1 deletion b/‎aten/src/ATen/cpu/vec/vec256/vec256_complex_float.h‎
Lines changed: 1 addition & 1 deletion
@@ -1 +1 @@
-790f1cdcea0359619adfc9ec37b91883748d1854
+842e178a488722720b6eb1e9cb508439e8e1ecd9
@@ -1,5 +1,5 @@
 cffi=1.15.1
-cmake=3.22.1
+cmake=3.22.*
 mkl=2022.1.0
 mkl-include=2022.1.0
 ninja=1.10.2
 
@@ -1,7 +1,7 @@
 numpy=1.22.3
 pyyaml=6.0
 setuptools=61.2.0
-cmake=3.22.1
+cmake=3.22.*
 cffi=1.15.1
 typing_extensions=4.3.0
 dataclasses=0.8
 
@@ -3,7 +3,7 @@ mkl-include=2021.2.0
 numpy=1.18.5
 pyyaml=5.3
 setuptools=46.0.0
-cmake=3.22.1
+cmake=3.22.*
 cffi=1.15.1
 typing_extensions=4.3.0
 dataclasses=0.8
 
@@ -252,11 +252,8 @@ test_inductor_distributed() {
 
 test_inductor() {
   python tools/dynamo/verify_dynamo.py
-  python test/run_test.py --include test_modules test_ops --verbose
+  python test/run_test.py --include test_modules test_ops test_ops_gradients --verbose
   PYTORCH_TEST_WITH_INDUCTOR=0 python test/run_test.py --include inductor/test_torchinductor --include inductor/test_torchinductor_opinfo --verbose
-  # TODO: investigate "RuntimeError: CUDA driver API confirmed a leak"
-  # seen intest_ops_gradients.py
-  # pytest test/test_ops_gradients.py --verbose -k "not _complex and not test_inplace_grad_acos_cuda_float64"
 }
 
 test_inductor_huggingface() {
 
@@ -101,13 +101,22 @@ exclude_patterns = [
     'torch/csrc/**',
     'torch/_dynamo/**/*.py',
     'torch/_inductor/**/*.py',
+    'torch/_functorch/aot_autograd.py',
+    'torch/_functorch/benchmark_utils.py',
+    'torch/_functorch/compile_utils.py',
+    'torch/_functorch/compilers.py',
+    'torch/_functorch/eager_transforms.py',
+    'torch/_functorch/fx_minifier.py',
+    'torch/_functorch/partitioners.py',
+    'torch/_functorch/make_functional.py',
+    'torch/_functorch/top_operators_github_usage.py',
+    'torch/_functorch/vmap.py',
     'torch/distributed/elastic/agent/server/api.py',
     'torch/testing/_internal/**',
     'torch/distributed/fsdp/fully_sharded_data_parallel.py',
     'torch/distributed/distributed_c10d.py',
     # TODO(suo): these exclusions were added just to get lint clean on master.
     # Follow up to do more target suppressions and remove them.
-    'torch/distributed/fsdp/flatten_params_wrapper.py',
     'torch/ao/quantization/fx/convert.py',
     'torch/ao/quantization/_dbr/function_fusion.py',
     'test/test_datapipe.py',
 
@@ -127,8 +127,8 @@ void CPUGeneratorImpl::set_state(const c10::TensorImpl& new_state) {
   using detail::CPUGeneratorImplState;
   using detail::CPUGeneratorImplStateLegacy;
 
-  static_assert(std::is_pod<CPUGeneratorImplStateLegacy>::value, "CPUGeneratorImplStateLegacy is not a PODType");
-  static_assert(std::is_pod<CPUGeneratorImplState>::value, "CPUGeneratorImplState is not a PODType");
+  static_assert(std::is_standard_layout<CPUGeneratorImplStateLegacy>::value, "CPUGeneratorImplStateLegacy is not a PODType");
+  static_assert(std::is_standard_layout<CPUGeneratorImplState>::value, "CPUGeneratorImplState is not a PODType");
 
   static const size_t size_legacy = sizeof(CPUGeneratorImplStateLegacy);
   static const size_t size_current = sizeof(CPUGeneratorImplState);
@@ -207,7 +207,7 @@ c10::intrusive_ptr<c10::TensorImpl> CPUGeneratorImpl::get_state() const {
   using detail::CPUGeneratorImplState;
 
   static const size_t size = sizeof(CPUGeneratorImplState);
-  static_assert(std::is_pod<CPUGeneratorImplState>::value, "CPUGeneratorImplState is not a PODType");
+  static_assert(std::is_standard_layout<CPUGeneratorImplState>::value, "CPUGeneratorImplState is not a PODType");
 
   auto state_tensor = at::detail::empty_cpu({(int64_t)size}, ScalarType::Byte, c10::nullopt, c10::nullopt, c10::nullopt, c10::nullopt);
   auto rng_state = state_tensor.data_ptr();
 
@@ -256,8 +256,7 @@ inline Vectorized<int16_t> flip(const Vectorized<int16_t> & v) {
   return _mm256_permute2x128_si256(reversed, reversed, 1);
 }
 
-template<>
-inline Vectorized<int8_t> flip(const Vectorized<int8_t> & v) {
+inline __m256i flip8(const __m256i & v) {
   const __m256i mask_int8 = _mm256_set_epi8(
     0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
     0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
@@ -266,6 +265,15 @@ inline Vectorized<int8_t> flip(const Vectorized<int8_t> & v) {
   return _mm256_permute2x128_si256(reversed, reversed, 1);
 }
 
+template<>
+inline Vectorized<int8_t> flip(const Vectorized<int8_t> & v) {
+  return flip8(v);
+}
+
+template<>
+inline Vectorized<uint8_t> flip(const Vectorized<uint8_t> & v) {
+  return flip8(v);
+}
 
 #endif // (defined(CPU_CAPABILITY_AVX2) && !defined(_MSC_VER)
 
 
@@ -185,7 +185,7 @@ template <> class Vectorized<c10::complex<double>> {
     return _mm256_div_pd(log(), log10_);
   }
   Vectorized<c10::complex<double>> log1p() const {
-    AT_ERROR("not supported for complex numbers");
+    return map(std::log1p);
   }
   Vectorized<c10::complex<double>> asin() const {
     // asin(x)
 
@@ -221,7 +221,7 @@ template <> class Vectorized<c10::complex<float>> {
     return _mm256_div_ps(log(), log10_);
   }
   Vectorized<c10::complex<float>> log1p() const {
-    AT_ERROR("not supported for complex numbers");
+    return map(std::log1p);
   }
   Vectorized<c10::complex<float>> asin() const {
     // asin(x)
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-790f1cdcea0359619adfc9ec37b91883748d1854`
	`1`	`+842e178a488722720b6eb1e9cb508439e8e1ecd9`
Original file line number	Diff line number	Diff line change
`@@ -185,7 +185,7 @@ template <> class Vectorized<c10::complex<double>> {`
`185`	`185`	`return _mm256_div_pd(log(), log10_);`
`186`	`186`	`}`
`187`	`187`	`Vectorized<c10::complex<double>> log1p() const {`
`188`		`- AT_ERROR("not supported for complex numbers");`
	`188`	`+ return map(std::log1p);`
`189`	`189`	`}`
`190`	`190`	`Vectorized<c10::complex<double>> asin() const {`
`191`	`191`	`// asin(x)`
Original file line number	Diff line number	Diff line change
`@@ -221,7 +221,7 @@ template <> class Vectorized<c10::complex<float>> {`
`221`	`221`	`return _mm256_div_ps(log(), log10_);`
`222`	`222`	`}`
`223`	`223`	`Vectorized<c10::complex<float>> log1p() const {`
`224`		`- AT_ERROR("not supported for complex numbers");`
	`224`	`+ return map(std::log1p);`
`225`	`225`	`}`
`226`	`226`	`Vectorized<c10::complex<float>> asin() const {`
`227`	`227`	`// asin(x)`