Skip to content

Commit b696d64

Browse files
Binaries without AVX512 kernels shouldn't report CPU Capability as AVX512 on machines with AVX512 support (#66703)
Summary: ### BUG If a PyTorch binary is built with a compiler that doesn't support all the AVX512 intrinsics in the codebase, then it won't have ATen AVX512 kernels, but at runtime, CPU capability would still be incorrectly returned as AVX512 on a machine that supports AVX512. It seems that PyTorch Linux releases are done on CentOS with `gcc 7.3`, so this bug would manifest in the 1.10 release, unless a fix such as this one is added. gcc versions below 9.0 don't support all the AVX512 intrinsics in the codebase, such as `_mm512_set_epi16`. ### FIX CPU Capability would be returned as AVX512 at runtime only if the binary was built with a compiler that supports all the AVX512 intrinsics in the codebase, and if the hardware the binary is being run on supports all the required AVX512 instruction sets. Pull Request resolved: #66703 Reviewed By: gchanan Differential Revision: D31732625 Pulled By: malfet fbshipit-source-id: e52d06b87fbe2af9b303a2e9c264189c8512d5ec
1 parent 33790c4 commit b696d64

1 file changed

Lines changed: 12 additions & 0 deletions

File tree

aten/src/ATen/native/DispatchStub.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,16 @@ static CPUCapability compute_cpu_capability() {
1616
return CPUCapability::VSX;
1717
}
1818
#else
19+
#ifdef HAVE_AVX512_CPU_DEFINITION
1920
if (strcmp(envar, "avx512") == 0) {
2021
return CPUCapability::AVX512;
2122
}
23+
#endif
24+
#ifdef HAVE_AVX2_CPU_DEFINITION
2225
if (strcmp(envar, "avx2") == 0) {
2326
return CPUCapability::AVX2;
2427
}
28+
#endif
2529
#endif
2630
if (strcmp(envar, "default") == 0) {
2731
return CPUCapability::DEFAULT;
@@ -31,13 +35,21 @@ static CPUCapability compute_cpu_capability() {
3135

3236
#if !defined(__powerpc__) && !defined(__s390x__)
3337
if (cpuinfo_initialize()) {
38+
#ifdef HAVE_AVX512_CPU_DEFINITION
39+
// GCC supports some AVX512 intrinsics such as _mm512_set_epi16 only in
40+
// versions 9 & beyond. So, we want to ensure that only releases built with
41+
// supported compilers on supported hardware return CPU Capability AVX512,
42+
// if it's supported on the hardware PyTorch is running on.
3443
if (cpuinfo_has_x86_avx512vl() && cpuinfo_has_x86_avx512bw() && \
3544
cpuinfo_has_x86_avx512dq() && cpuinfo_has_x86_fma3()) {
3645
return CPUCapability::AVX512;
3746
}
47+
#endif
48+
#ifdef HAVE_AVX2_CPU_DEFINITION
3849
if (cpuinfo_has_x86_avx2() && cpuinfo_has_x86_fma3()) {
3950
return CPUCapability::AVX2;
4051
}
52+
#endif
4153
}
4254
#endif
4355
#ifdef HAVE_VSX_CPU_DEFINITION

0 commit comments

Comments
 (0)