leave defaults untouched for now

eqy · pytorchmergebot · commit 909a989ffef4 · 2022-12-20T22:31:14.000Z
diff --git a/aten/src/ATen/Context.h b/aten/src/ATen/Context.h
@@ -288,7 +288,7 @@ class TORCH_API Context {
   int benchmark_limit_cudnn = 10;
   bool allow_tf32_cudnn = true;
   bool allow_fp16_reduction_cublas = true;
-  bool allow_bf16_reduction_cublas = false;
+  bool allow_bf16_reduction_cublas = true;
   bool enabled_mkldnn = true;
   at::LinalgBackend linalg_preferred_backend = at::LinalgBackend::Default;
 #ifdef C10_MOBILE
diff --git a/docs/source/notes/cuda.rst b/docs/source/notes/cuda.rst
@@ -184,11 +184,12 @@ A similar flag (as above) exists for BFloat16 GEMMs. Note that this switch is
 set to `False` by default for BF16 as we have observed numerical instability in
 PyTorch CI tests (e.g., test/test_matmul_cuda.py).
 
-If reduced precision reductions are desired, users can disable reduced precision reductions in bf16 GEMMs with:
+If reduced precision reductions are not desired, users can disable reduced
+precision reductions in bf16 GEMMs with:
 
 .. code:: python
 
-  torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = True
+  torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False
 
 To toggle the reduced precision reduction flags in C++, one can do
 
diff --git a/docs/source/notes/numerical_accuracy.rst b/docs/source/notes/numerical_accuracy.rst
@@ -104,9 +104,9 @@ Half-precision GEMM operations are typically done with intermediate accumulation
 If reduced-precision reductions are problematic, they can be turned off with
 ``torch.backends.cuda.matmul.allow_fp16_reduced_precision_reduction = False``
 
-A similar flag exists for BF16 GEMM operations and is turned on by default. If
-reduced-precision reductions are desired for BF16, they can be turn on with
-``torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = True``
+A similar flag exists for BF16 GEMM operations and is turned off by default. If BF16
+reduced-precision reductions are problematic, they can be turned off with
+``torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False``
 
 For more information see :ref:`allow_fp16_reduced_precision_reduction<fp16reducedprecision>` and :ref:`allow_bf16_reduced_precision_reduction<bf16reducedprecision>`