Skip to content

Commit deb24a8

Browse files
Michael Gschwindfacebook-github-bot
authored andcommitted
Use Intel x86 intrinsics for half/float conversion (#1789)
Summary: Pull Request resolved: #1789 Use Intel x86 intrinsics for half/float conversion with GCC and clang Reviewed By: cpuhrsch Differential Revision: D53283798 fbshipit-source-id: a2365485258304b6b0b67b22d766ede7d4ba6994
1 parent 0c3f617 commit deb24a8

1 file changed

Lines changed: 21 additions & 0 deletions

File tree

  • runtime/core/portable_type

runtime/core/portable_type/half.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,16 @@
1212
#include <cstdint>
1313
#include <cstring>
1414

15+
#if defined(__GNUC__) || defined(__clang__)
16+
#if defined(__x86_64__) || defined(_M_X64) || defined(__i386) || \
17+
defined(_M_IX86)
18+
#if defined(__AVX2__)
19+
#define X86_F16 1
20+
#include <immintrin.h> // import conversion ops from f16cintrin.h
21+
#endif // __AVX2__
22+
#endif // __x86_64__ || _M_X64 || __i386 || _M_IX86
23+
#endif // __GNUC__ || __clang__
24+
1525
namespace torch {
1626
namespace executor {
1727

@@ -165,6 +175,10 @@ inline uint32_t fp16_ieee_to_fp32_bits(uint16_t h) {
165175
* between integer and floating-point variables.
166176
*/
167177
inline float fp16_ieee_to_fp32_value(uint16_t h) {
178+
#ifdef X86_F16
179+
return _cvtsh_ss(h);
180+
#else
181+
168182
/*
169183
* Extend the half-precision floating-point number to 32 bits and shift to the
170184
* upper part of the 32-bit word:
@@ -287,6 +301,8 @@ inline float fp16_ieee_to_fp32_value(uint16_t h) {
287301
(two_w < denormalized_cutoff ? fp32_to_bits(denormalized_value)
288302
: fp32_to_bits(normalized_value));
289303
return fp32_from_bits(result);
304+
305+
#endif // not X86_F16
290306
}
291307

292308
/*
@@ -299,6 +315,10 @@ inline float fp16_ieee_to_fp32_value(uint16_t h) {
299315
* between integer and floating-point variables.
300316
*/
301317
inline uint16_t fp16_ieee_from_fp32_value(float f) {
318+
#ifdef X86_F16
319+
return _cvtss_sh(f, _MM_FROUND_TO_NEAREST_INT);
320+
#else
321+
302322
// const float scale_to_inf = 0x1.0p+112f;
303323
// const float scale_to_zero = 0x1.0p-110f;
304324
constexpr uint32_t scale_to_inf_bits = (uint32_t)239 << 23;
@@ -332,6 +352,7 @@ inline uint16_t fp16_ieee_from_fp32_value(float f) {
332352
return static_cast<uint16_t>(
333353
(sign >> 16) |
334354
(shl1_w > UINT32_C(0xFF000000) ? UINT16_C(0x7E00) : nonsign));
355+
#endif // not X86_F16
335356
}
336357

337358
} // namespace internal

0 commit comments

Comments
 (0)