Skip to content

Commit cd4afd4

Browse files
committed
[BE] Make Vec256 header only library
Do it by removing extraneous header dependencies. None of the at::vec256 primitive depend on notion of Tensor, therefore none of the headers that vec256 depends on should include <ATen/Tensor.h> Implicity test it be removing c10 and tensor dependency when building `vec256_test_all_types` Split affine_quantizer into affine_quantizer_base (that contains methods operating on raw types) and affine_quantizer (which contains Tensor specific methods)
1 parent 789f6f1 commit cd4afd4

11 files changed

Lines changed: 317 additions & 255 deletions

aten/src/ATen/cpu/vec256/vec256_base.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,6 @@
2121
#include <bitset>
2222

2323
#include <ATen/cpu/vec256/intrinsics.h>
24-
#include <ATen/Utils.h>
25-
#include <ATen/native/Copy.h>
2624
#include <ATen/native/Math.h>
2725
#include <ATen/NumericUtils.h>
2826
#include <c10/util/C++17.h>

aten/src/ATen/cpu/vec256/vec256_qint.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
#include <ATen/cpu/vec256/intrinsics.h>
77
#include <ATen/cpu/vec256/vec256_base.h>
8-
#include <ATen/native/quantized/affine_quantizer.h>
8+
#include <ATen/native/quantized/affine_quantizer_base.h>
99
#include <c10/util/qint32.h>
1010
#include <c10/util/qint8.h>
1111
#include <c10/util/quint8.h>

aten/src/ATen/native/Math.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
#include <cfloat>
77
#include <limits>
88
#include <type_traits>
9+
#include <c10/util/BFloat16.h>
10+
#include <c10/util/Half.h>
911
#include <c10/util/MathConstants.h>
1012
#include <c10/util/math_compat.h>
1113

aten/src/ATen/native/cpu/RangeFactoriesKernel.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include <cmath>
22
#include <ATen/Config.h>
33
#include <ATen/Dispatch.h>
4+
#include <ATen/native/DispatchStub.h>
45

56
#include <ATen/AccumulateType.h>
67
#include <ATen/cpu/vec256/vec256.h>

aten/src/ATen/native/quantized/affine_quantizer.cpp

Lines changed: 0 additions & 214 deletions
Original file line numberDiff line numberDiff line change
@@ -290,219 +290,5 @@ Tensor dequantize_tensor_per_channel_float_qparams(
290290
return rtensor;
291291
}
292292

293-
#ifdef USE_FBGEMM
294-
// Note: quantize_val is only explicitly used in test outside of this file
295-
template <typename T>
296-
T quantize_val(double scale, int64_t zero_point, float value) {
297-
// Internally, fbgemm::Quantize uses std::nearbyint.
298-
// std::nearbyint results in nearest integer value according to the current
299-
// rounding mode and the default rounding mode is rounds to even in half-way
300-
// cases in most popular processor architectures like x86 and ARM. This is
301-
// typically faster than an alternatives like std::round that rounds half-way
302-
// cases away from zero, and can be consistent with SIMD implementations for
303-
// example in x86 using _mm512_cvtps_epi32 or mm512_round_ps with
304-
// _MM_FROUND_CUR_DIRECTION option that also follow the current rounding mode.
305-
int32_t qvalue;
306-
qvalue = fbgemm::Quantize<typename T::underlying, false /*LEGACY*/>(
307-
value,
308-
static_cast<int32_t>(zero_point),
309-
static_cast<float>(scale),
310-
/*result_precision=*/CHAR_BIT * sizeof(typename T::underlying));
311-
return static_cast<T>(qvalue);
312-
}
313-
314-
template <typename T, int precision>
315-
void quantize_vec(
316-
double scale,
317-
int64_t zero_point,
318-
const float* src,
319-
T* dst,
320-
size_t count) {
321-
fbgemm::Quantize<typename T::underlying, false /*LEGACY*/>(
322-
src,
323-
(typename T::underlying*)dst,
324-
count,
325-
fbgemm::TensorQuantizationParams{
326-
(float)scale, (int32_t)zero_point, precision});
327-
}
328-
329-
template <typename T>
330-
inline float dequantize_val(double scale, int64_t zero_point, T value) {
331-
fbgemm::TensorQuantizationParams qparams;
332-
qparams.scale = static_cast<float>(scale);
333-
qparams.zero_point = static_cast<int32_t>(zero_point);
334-
return fbgemm::Dequantize<typename T::underlying>(value.val_, qparams);
335-
}
336-
#else // USE_FBGEMM
337-
338-
#if defined(__ANDROID__) && !defined(__NDK_MAJOR__)
339-
template <class T>
340-
inline float Round(const float x) {
341-
return ::nearbyintf(x);
342-
}
343-
inline double Round(const double x) {
344-
return ::nearbyint(x);
345-
}
346-
#else
347-
template <class T>
348-
inline T Round(const T x) {
349-
return std::nearbyint(x);
350-
}
351-
#endif
352-
353-
template <typename T>
354-
T quantize_val(double scale, int64_t zero_point, float value) {
355-
// std::nearbyint results in nearest integer value according to the current
356-
// rounding mode and the default rounding mode is rounds to even in half-way
357-
// cases in most popular processor architectures like x86 and ARM. This is
358-
// typically faster than an alternatives like std::round that rounds half-way
359-
// cases away from zero, and can be consistent with SIMD implementations for
360-
// example in x86 using _mm512_cvtps_epi32 or mm512_round_ps with
361-
// _MM_FROUND_CUR_DIRECTION option that also follow the current rounding mode.
362-
int64_t qvalue;
363-
constexpr int64_t qmin = std::numeric_limits<typename T::underlying>::min();
364-
constexpr int64_t qmax = std::numeric_limits<typename T::underlying>::max();
365-
float inv_scale = 1.0f / static_cast<float>(scale);
366-
qvalue = static_cast<int64_t>(zero_point + Round(value * inv_scale));
367-
qvalue = std::max<int64_t>(qvalue, qmin);
368-
qvalue = std::min<int64_t>(qvalue, qmax);
369-
return static_cast<T>(qvalue);
370-
}
371-
372-
uint8_t quantize_val_arm(
373-
const float scale,
374-
const int32_t zero_point,
375-
const float value) {
376-
const int32_t qmin = std::numeric_limits<uint8_t>::min();
377-
const int32_t qmax = std::numeric_limits<uint8_t>::max();
378-
float inv_scale = 1.0f / scale;
379-
auto r = zero_point + static_cast<int32_t>(Round(value * inv_scale));
380-
r = std::max(r, qmin);
381-
r = std::min(r, qmax);
382-
return static_cast<uint8_t>(r);
383-
}
384-
385-
template <typename T, int precision>
386-
void quantize_vec(
387-
double scale,
388-
int64_t zero_point,
389-
const float* src,
390-
T* dst,
391-
size_t count) {
392-
checkZeroPoint<typename T::underlying>("quantize_vec", zero_point);
393-
for (int64_t i = 0; i < count; ++i) {
394-
dst[i] = quantize_val<T>(scale, zero_point, src[i]);
395-
}
396-
}
397-
398-
template <typename T>
399-
TORCH_API float dequantize_val(double scale, int64_t zero_point, T value) {
400-
// We need to convert the qint8 value to float to ensure the subtraction
401-
// subexpression returns a float
402-
return (static_cast<float>(value.val_) - zero_point) * scale;
403-
}
404-
#endif // USE_FBGEMM
405-
406-
/*
407-
* Quantize value based on the following equation
408-
* Xq = Round(Xf * inv_scale + zero_point)
409-
* where zero_point is in float.
410-
*
411-
* Note: For the case of embedding quantization we will set zero_point
412-
* to (-Xmin/scale), where Xmin is the min value in input tensor row.
413-
*/
414-
int quantize_val_float_qparams(float scale, float zero_point, float value, int qmin, int qmax) {
415-
int qvalue;
416-
417-
float inv_scale = scale == 0 ? 1.0f : 1.0f / scale;
418-
qvalue = lrintf(value * inv_scale + zero_point);
419-
qvalue = std::max(qmin, std::min(qvalue, qmax));
420-
return qvalue;
421-
}
422-
423-
template <typename SRC_T, typename DST_T>
424-
DST_T requantize_val(
425-
double src_scale,
426-
int64_t src_zero_point,
427-
double dst_scale,
428-
int64_t dst_zero_point,
429-
SRC_T src) {
430-
const auto dq = dequantize_val<SRC_T>(src_scale, src_zero_point, src);
431-
return quantize_val<DST_T>(dst_scale, dst_zero_point, dq);
432-
}
433-
434-
template <typename DST_T>
435-
DST_T requantize_from_int(double multiplier, int64_t zero_point, int64_t src) {
436-
int64_t quantize_down =
437-
zero_point + lrintf(src * static_cast<float>(multiplier));
438-
int32_t min = std::numeric_limits<typename DST_T::underlying>::min();
439-
int32_t max = std::numeric_limits<typename DST_T::underlying>::max();
440-
return static_cast<DST_T>(
441-
std::min<int64_t>(std::max<int64_t>(quantize_down, min), max));
442-
}
443-
444-
template TORCH_API qint8
445-
quantize_val<qint8>(double scale, int64_t zero_point, float value);
446-
template TORCH_API quint8
447-
quantize_val<quint8>(double scale, int64_t zero_point, float value);
448-
template TORCH_API qint32
449-
quantize_val<qint32>(double scale, int64_t zero_point, float value);
450-
template TORCH_API void quantize_vec<c10::qint8>(
451-
double scale,
452-
int64_t zero_point,
453-
const float* src,
454-
c10::qint8* dst,
455-
size_t count);
456-
template TORCH_API void quantize_vec<c10::quint8>(
457-
double scale,
458-
int64_t zero_point,
459-
const float* src,
460-
c10::quint8* dst,
461-
size_t count);
462-
template TORCH_API void quantize_vec<c10::qint32, 32>(
463-
double scale,
464-
int64_t zero_point,
465-
const float* src,
466-
c10::qint32* dst,
467-
size_t count);
468-
469-
template TORCH_API float dequantize_val<qint8>(
470-
double scale,
471-
int64_t zero_point,
472-
qint8 value);
473-
template TORCH_API float dequantize_val<quint8>(
474-
double scale,
475-
int64_t zero_point,
476-
quint8 value);
477-
template TORCH_API float dequantize_val<qint32>(
478-
double scale,
479-
int64_t zero_point,
480-
qint32 value);
481-
482-
template TORCH_API qint8
483-
requantize_val<qint8, qint8>(double, int64_t, double, int64_t, qint8);
484-
template TORCH_API quint8
485-
requantize_val<qint8, quint8>(double, int64_t, double, int64_t, qint8);
486-
template TORCH_API qint32
487-
requantize_val<qint8, qint32>(double, int64_t, double, int64_t, qint8);
488-
template TORCH_API qint8
489-
requantize_val<quint8, qint8>(double, int64_t, double, int64_t, quint8);
490-
template TORCH_API quint8
491-
requantize_val<quint8, quint8>(double, int64_t, double, int64_t, quint8);
492-
template TORCH_API qint32
493-
requantize_val<quint8, qint32>(double, int64_t, double, int64_t, quint8);
494-
template TORCH_API qint8
495-
requantize_val<qint32, qint8>(double, int64_t, double, int64_t, qint32);
496-
template TORCH_API quint8
497-
requantize_val<qint32, quint8>(double, int64_t, double, int64_t, qint32);
498-
template TORCH_API qint32
499-
requantize_val<qint32, qint32>(double, int64_t, double, int64_t, qint32);
500-
501-
template TORCH_API qint8 requantize_from_int<qint8>(double, int64_t, int64_t);
502-
template TORCH_API quint8
503-
requantize_from_int<quint8>(double, int64_t, int64_t);
504-
template TORCH_API qint32
505-
requantize_from_int<qint32>(double, int64_t, int64_t);
506-
507293
} // namespace native
508294
} // namespace at

aten/src/ATen/native/quantized/affine_quantizer.h

Lines changed: 1 addition & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
#include <ATen/ATen.h>
44
#include <ATen/native/DispatchStub.h>
5+
#include <ATen/native/quantized/affine_quantizer_base.h>
56

67
namespace at {
78
namespace native {
@@ -111,54 +112,18 @@ DECLARE_DISPATCH(
111112
dequantize_tensor_per_tensor_affine_sub_byte_fn,
112113
dequantize_tensor_per_tensor_affine_sub_byte_stub);
113114

114-
// Quantize a float value into a uint value given scale and zero_point
115-
template <typename T>
116-
TORCH_API T quantize_val(double scale, int64_t zero_point, float value);
117-
// TODO combine this with quantize_val once the numerics for ARM are aligned
118-
// with it
119-
uint8_t quantize_val_arm(
120-
const float scale,
121-
const int32_t zero_point,
122-
const float value);
123-
template <typename T, int precision = 8>
124-
void quantize_vec(
125-
double scale,
126-
int64_t zero_point,
127-
const float* src,
128-
T* dst,
129-
size_t count = 8);
130115
template <typename T>
131116
TORCH_API Tensor quantize_tensor(
132117
Tensor rtensor,
133118
Tensor qtensor,
134119
double scale,
135120
int64_t zero_point);
136121
template <typename T>
137-
TORCH_API float dequantize_val(double scale, int64_t zero_point, T value);
138-
template <typename T>
139-
TORCH_API float dequantize_vec(
140-
double scale,
141-
int64_t zero_point,
142-
const T* src,
143-
float* dst,
144-
size_t count = 8);
145-
template <typename T>
146122
TORCH_API Tensor dequantize_tensor(
147123
Tensor qtensor,
148124
Tensor rtensor,
149125
double scale,
150126
int64_t zero_point);
151-
template <typename SRC_T, typename DST_T>
152-
TORCH_API DST_T requantize_val(double, int64_t, double, int64_t, SRC_T src);
153-
154-
// Given a multiplier and a zero_point, requantize int32_t computed values back
155-
// to quantized values. See comment above
156-
// make_per_tensor_affine_quantizer function for the usage of int64_t
157-
template <typename DST_T>
158-
TORCH_API DST_T
159-
requantize_from_int(double multiplier, int64_t zero_point, int64_t src);
160-
161-
int quantize_val_float_qparams(float scale, float zero_point, float value, int qmin, int qmax);
162127

163128
} // namespace native
164129
} // namespace at

0 commit comments

Comments
 (0)