pytorch
diff --git a/‎aten/src/ATen/cpu/vec256/vec256_base.h‎
Lines changed: 0 additions & 2 deletions b/‎aten/src/ATen/cpu/vec256/vec256_base.h‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎aten/src/ATen/cpu/vec256/vec256_qint.h‎
Lines changed: 1 addition & 1 deletion b/‎aten/src/ATen/cpu/vec256/vec256_qint.h‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎aten/src/ATen/native/Math.h‎
Lines changed: 2 additions & 0 deletions b/‎aten/src/ATen/native/Math.h‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎aten/src/ATen/native/cpu/RangeFactoriesKernel.cpp‎
Lines changed: 1 addition & 0 deletions b/‎aten/src/ATen/native/cpu/RangeFactoriesKernel.cpp‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎aten/src/ATen/native/quantized/affine_quantizer.cpp‎
Lines changed: 0 additions & 214 deletions b/‎aten/src/ATen/native/quantized/affine_quantizer.cpp‎
Lines changed: 0 additions & 214 deletions
diff --git a/‎aten/src/ATen/native/quantized/affine_quantizer.h‎
Lines changed: 1 addition & 36 deletions b/‎aten/src/ATen/native/quantized/affine_quantizer.h‎
Lines changed: 1 addition & 36 deletions
@@ -21,8 +21,6 @@
 #include <bitset>
 
 #include <ATen/cpu/vec256/intrinsics.h>
-#include <ATen/Utils.h>
-#include <ATen/native/Copy.h>
 #include <ATen/native/Math.h>
 #include <ATen/NumericUtils.h>
 #include <c10/util/C++17.h>
 
@@ -5,7 +5,7 @@
 
 #include <ATen/cpu/vec256/intrinsics.h>
 #include <ATen/cpu/vec256/vec256_base.h>
-#include <ATen/native/quantized/affine_quantizer.h>
+#include <ATen/native/quantized/affine_quantizer_base.h>
 #include <c10/util/qint32.h>
 #include <c10/util/qint8.h>
 #include <c10/util/quint8.h>
 
@@ -6,6 +6,8 @@
 #include <cfloat>
 #include <limits>
 #include <type_traits>
+#include <c10/util/BFloat16.h>
+#include <c10/util/Half.h>
 #include <c10/util/MathConstants.h>
 #include <c10/util/math_compat.h>
 
 
@@ -1,6 +1,7 @@
 #include <cmath>
 #include <ATen/Config.h>
 #include <ATen/Dispatch.h>
+#include <ATen/native/DispatchStub.h>
 
 #include <ATen/AccumulateType.h>
 #include <ATen/cpu/vec256/vec256.h>
 
@@ -290,219 +290,5 @@ Tensor dequantize_tensor_per_channel_float_qparams(
   return rtensor;
 }
 
-#ifdef USE_FBGEMM
-// Note: quantize_val is only explicitly used in test outside of this file
-template <typename T>
-T quantize_val(double scale, int64_t zero_point, float value) {
-  // Internally, fbgemm::Quantize uses std::nearbyint.
-  // std::nearbyint results in nearest integer value according to the current
-  // rounding mode and the default rounding mode is rounds to even in half-way
-  // cases in most popular processor architectures like x86 and ARM. This is
-  // typically faster than an alternatives like std::round that rounds half-way
-  // cases away from zero, and can be consistent with SIMD implementations for
-  // example in x86 using _mm512_cvtps_epi32 or mm512_round_ps with
-  // _MM_FROUND_CUR_DIRECTION option that also follow the current rounding mode.
-  int32_t qvalue;
-  qvalue = fbgemm::Quantize<typename T::underlying, false /*LEGACY*/>(
-      value,
-      static_cast<int32_t>(zero_point),
-      static_cast<float>(scale),
-      /*result_precision=*/CHAR_BIT * sizeof(typename T::underlying));
-  return static_cast<T>(qvalue);
-}
-
-template <typename T, int precision>
-void quantize_vec(
-    double scale,
-    int64_t zero_point,
-    const float* src,
-    T* dst,
-    size_t count) {
-  fbgemm::Quantize<typename T::underlying, false /*LEGACY*/>(
-      src,
-      (typename T::underlying*)dst,
-      count,
-      fbgemm::TensorQuantizationParams{
-          (float)scale, (int32_t)zero_point, precision});
-}
-
-template <typename T>
-inline float dequantize_val(double scale, int64_t zero_point, T value) {
-  fbgemm::TensorQuantizationParams qparams;
-  qparams.scale = static_cast<float>(scale);
-  qparams.zero_point = static_cast<int32_t>(zero_point);
-  return fbgemm::Dequantize<typename T::underlying>(value.val_, qparams);
-}
-#else // USE_FBGEMM
-
-#if defined(__ANDROID__) && !defined(__NDK_MAJOR__)
-template <class T>
-inline float Round(const float x) {
-  return ::nearbyintf(x);
-}
-inline double Round(const double x) {
-  return ::nearbyint(x);
-}
-#else
-template <class T>
-inline T Round(const T x) {
-  return std::nearbyint(x);
-}
-#endif
-
-template <typename T>
-T quantize_val(double scale, int64_t zero_point, float value) {
-  // std::nearbyint results in nearest integer value according to the current
-  // rounding mode and the default rounding mode is rounds to even in half-way
-  // cases in most popular processor architectures like x86 and ARM. This is
-  // typically faster than an alternatives like std::round that rounds half-way
-  // cases away from zero, and can be consistent with SIMD implementations for
-  // example in x86 using _mm512_cvtps_epi32 or mm512_round_ps with
-  // _MM_FROUND_CUR_DIRECTION option that also follow the current rounding mode.
-  int64_t qvalue;
-  constexpr int64_t qmin = std::numeric_limits<typename T::underlying>::min();
-  constexpr int64_t qmax = std::numeric_limits<typename T::underlying>::max();
-  float inv_scale = 1.0f / static_cast<float>(scale);
-  qvalue = static_cast<int64_t>(zero_point + Round(value * inv_scale));
-  qvalue = std::max<int64_t>(qvalue, qmin);
-  qvalue = std::min<int64_t>(qvalue, qmax);
-  return static_cast<T>(qvalue);
-}
-
-uint8_t quantize_val_arm(
-    const float scale,
-    const int32_t zero_point,
-    const float value) {
-  const int32_t qmin = std::numeric_limits<uint8_t>::min();
-  const int32_t qmax = std::numeric_limits<uint8_t>::max();
-  float inv_scale = 1.0f / scale;
-  auto r = zero_point + static_cast<int32_t>(Round(value * inv_scale));
-  r = std::max(r, qmin);
-  r = std::min(r, qmax);
-  return static_cast<uint8_t>(r);
-}
-
-template <typename T, int precision>
-void quantize_vec(
-    double scale,
-    int64_t zero_point,
-    const float* src,
-    T* dst,
-    size_t count) {
-  checkZeroPoint<typename T::underlying>("quantize_vec", zero_point);
-  for (int64_t i = 0; i < count; ++i) {
-    dst[i] = quantize_val<T>(scale, zero_point, src[i]);
-  }
-}
-
-template <typename T>
-TORCH_API float dequantize_val(double scale, int64_t zero_point, T value) {
-  // We need to convert the qint8 value to float to ensure the subtraction
-  // subexpression returns a float
-  return (static_cast<float>(value.val_) - zero_point) * scale;
-}
-#endif // USE_FBGEMM
-
-/*
-* Quantize value based on the following equation
-* Xq = Round(Xf * inv_scale + zero_point)
-* where zero_point is in float.
-*
-* Note: For the case of embedding quantization we will set zero_point
-* to (-Xmin/scale), where Xmin is the min value in input tensor row.
-*/
-int quantize_val_float_qparams(float scale, float zero_point, float value, int qmin, int qmax) {
-  int qvalue;
-
-  float inv_scale = scale == 0 ? 1.0f : 1.0f / scale;
-  qvalue = lrintf(value * inv_scale + zero_point);
-  qvalue = std::max(qmin, std::min(qvalue, qmax));
-  return qvalue;
-}
-
-template <typename SRC_T, typename DST_T>
-DST_T requantize_val(
-    double src_scale,
-    int64_t src_zero_point,
-    double dst_scale,
-    int64_t dst_zero_point,
-    SRC_T src) {
-  const auto dq = dequantize_val<SRC_T>(src_scale, src_zero_point, src);
-  return quantize_val<DST_T>(dst_scale, dst_zero_point, dq);
-}
-
-template <typename DST_T>
-DST_T requantize_from_int(double multiplier, int64_t zero_point, int64_t src) {
-  int64_t quantize_down =
-      zero_point + lrintf(src * static_cast<float>(multiplier));
-  int32_t min = std::numeric_limits<typename DST_T::underlying>::min();
-  int32_t max = std::numeric_limits<typename DST_T::underlying>::max();
-  return static_cast<DST_T>(
-      std::min<int64_t>(std::max<int64_t>(quantize_down, min), max));
-}
-
-template TORCH_API qint8
-quantize_val<qint8>(double scale, int64_t zero_point, float value);
-template TORCH_API quint8
-quantize_val<quint8>(double scale, int64_t zero_point, float value);
-template TORCH_API qint32
-quantize_val<qint32>(double scale, int64_t zero_point, float value);
-template TORCH_API void quantize_vec<c10::qint8>(
-    double scale,
-    int64_t zero_point,
-    const float* src,
-    c10::qint8* dst,
-    size_t count);
-template TORCH_API void quantize_vec<c10::quint8>(
-    double scale,
-    int64_t zero_point,
-    const float* src,
-    c10::quint8* dst,
-    size_t count);
-template TORCH_API void quantize_vec<c10::qint32, 32>(
-    double scale,
-    int64_t zero_point,
-    const float* src,
-    c10::qint32* dst,
-    size_t count);
-
-template TORCH_API float dequantize_val<qint8>(
-    double scale,
-    int64_t zero_point,
-    qint8 value);
-template TORCH_API float dequantize_val<quint8>(
-    double scale,
-    int64_t zero_point,
-    quint8 value);
-template TORCH_API float dequantize_val<qint32>(
-    double scale,
-    int64_t zero_point,
-    qint32 value);
-
-template TORCH_API qint8
-requantize_val<qint8, qint8>(double, int64_t, double, int64_t, qint8);
-template TORCH_API quint8
-requantize_val<qint8, quint8>(double, int64_t, double, int64_t, qint8);
-template TORCH_API qint32
-requantize_val<qint8, qint32>(double, int64_t, double, int64_t, qint8);
-template TORCH_API qint8
-requantize_val<quint8, qint8>(double, int64_t, double, int64_t, quint8);
-template TORCH_API quint8
-requantize_val<quint8, quint8>(double, int64_t, double, int64_t, quint8);
-template TORCH_API qint32
-requantize_val<quint8, qint32>(double, int64_t, double, int64_t, quint8);
-template TORCH_API qint8
-requantize_val<qint32, qint8>(double, int64_t, double, int64_t, qint32);
-template TORCH_API quint8
-requantize_val<qint32, quint8>(double, int64_t, double, int64_t, qint32);
-template TORCH_API qint32
-requantize_val<qint32, qint32>(double, int64_t, double, int64_t, qint32);
-
-template TORCH_API qint8 requantize_from_int<qint8>(double, int64_t, int64_t);
-template TORCH_API quint8
-requantize_from_int<quint8>(double, int64_t, int64_t);
-template TORCH_API qint32
-requantize_from_int<qint32>(double, int64_t, int64_t);
-
 } // namespace native
 } // namespace at
@@ -2,6 +2,7 @@
 
 #include <ATen/ATen.h>
 #include <ATen/native/DispatchStub.h>
+#include <ATen/native/quantized/affine_quantizer_base.h>
 
 namespace at {
 namespace native {
@@ -111,54 +112,18 @@ DECLARE_DISPATCH(
     dequantize_tensor_per_tensor_affine_sub_byte_fn,
     dequantize_tensor_per_tensor_affine_sub_byte_stub);
 
-// Quantize a float value into a uint value given scale and zero_point
-template <typename T>
-TORCH_API T quantize_val(double scale, int64_t zero_point, float value);
-// TODO combine this with quantize_val once the numerics for ARM are aligned
-// with it
-uint8_t quantize_val_arm(
-    const float scale,
-    const int32_t zero_point,
-    const float value);
-template <typename T, int precision = 8>
-void quantize_vec(
-    double scale,
-    int64_t zero_point,
-    const float* src,
-    T* dst,
-    size_t count = 8);
 template <typename T>
 TORCH_API Tensor quantize_tensor(
     Tensor rtensor,
     Tensor qtensor,
     double scale,
     int64_t zero_point);
 template <typename T>
-TORCH_API float dequantize_val(double scale, int64_t zero_point, T value);
-template <typename T>
-TORCH_API float dequantize_vec(
-    double scale,
-    int64_t zero_point,
-    const T* src,
-    float* dst,
-    size_t count = 8);
-template <typename T>
 TORCH_API Tensor dequantize_tensor(
     Tensor qtensor,
     Tensor rtensor,
     double scale,
     int64_t zero_point);
-template <typename SRC_T, typename DST_T>
-TORCH_API DST_T requantize_val(double, int64_t, double, int64_t, SRC_T src);
-
-// Given a multiplier and a zero_point, requantize int32_t computed values back
-// to quantized values. See comment above
-// make_per_tensor_affine_quantizer function for the usage of int64_t
-template <typename DST_T>
-TORCH_API DST_T
-requantize_from_int(double multiplier, int64_t zero_point, int64_t src);
-
-int quantize_val_float_qparams(float scale, float zero_point, float value, int qmin, int qmax);
 
 } // namespace native
 } // namespace at