Skip to content

Commit bbb4e33

Browse files
committed
Remove force-sse2 config option from x86 builds.
Due to major refactoring done long ago, this option no longer avoids a branch in a hot path, it currently only removes a single if check during init.
1 parent a9ef3eb commit bbb4e33

5 files changed

Lines changed: 2 additions & 19 deletions

File tree

CMakeLists.txt

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1042,20 +1042,12 @@ if(WITH_OPTIM)
10421042
endif()
10431043
if(WITH_SSE2)
10441044
check_sse2_intrinsics()
1045-
# FORCE_SSE2 option will only be shown if HAVE_SSE2_INTRIN is true
1046-
if("${ARCH}" MATCHES "i[3-6]86")
1047-
cmake_dependent_option(FORCE_SSE2 "Always assume CPU is SSE2 capable" OFF "HAVE_SSE2_INTRIN" OFF)
1048-
endif()
10491045
if(HAVE_SSE2_INTRIN)
10501046
add_definitions(-DX86_SSE2)
10511047
set(SSE2_SRCS ${ARCHDIR}/chunkset_sse2.c ${ARCHDIR}/chorba_sse2.c ${ARCHDIR}/compare256_sse2.c ${ARCHDIR}/slide_hash_sse2.c)
10521048
list(APPEND ZLIB_ARCH_SRCS ${SSE2_SRCS})
10531049
if(NOT ${ARCH} MATCHES "x86_64")
10541050
set_property(SOURCE ${SSE2_SRCS} PROPERTY COMPILE_FLAGS "${SSE2FLAG} ${NOLTOFLAG}")
1055-
add_feature_info(FORCE_SSE2 FORCE_SSE2 "Assume CPU is SSE2 capable")
1056-
if(FORCE_SSE2)
1057-
add_definitions(-DX86_NOCHECK_SSE2)
1058-
endif()
10591051
endif()
10601052
else()
10611053
set(WITH_SSE2 OFF)

README.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,6 @@ Advanced Build Options
195195

196196
| CMake | configure | Description | Default |
197197
|:--------------------------------|:----------------------|:--------------------------------------------------------------------|------------------------|
198-
| FORCE_SSE2 | --force-sse2 | Skip runtime check for SSE2 instructions (Always on for x86_64) | OFF (x86) |
199198
| WITH_AVX2 | | Build with AVX2 intrinsics | ON |
200199
| WITH_AVX512 | | Build with AVX512 intrinsics | ON |
201200
| WITH_AVX512VNNI | | Build with AVX512VNNI intrinsics | ON |

arch/x86/x86_functions.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ uint32_t crc32_vpclmulqdq(uint32_t crc32, const uint8_t *buf, size_t len);
9191

9292
#ifdef DISABLE_RUNTIME_CPU_DETECTION
9393
// X86 - SSE2
94-
# if (defined(X86_SSE2) && defined(__SSE2__)) || defined(__x86_64__) || defined(_M_X64) || defined(X86_NOCHECK_SSE2)
94+
# if (defined(X86_SSE2) && defined(__SSE2__)) || defined(__x86_64__) || defined(_M_X64)
9595
# undef native_chunkmemset_safe
9696
# define native_chunkmemset_safe chunkmemset_safe_sse2
9797
# undef native_inflate_fast

configure

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,6 @@ builddfltccinflate=0
106106
buildcrc32vx=1
107107
buildcrc32la=1
108108
floatabi=
109-
forcesse2=0
110109
# For CPUs that can benefit from AVX512, it seems GCC generates suboptimal
111110
# instruction scheduling unless you specify a reasonable -mtune= target
112111
avx512flag="-mavx512f -mavx512dq -mavx512bw -mavx512vl -mbmi2"
@@ -190,7 +189,6 @@ case "$1" in
190189
echo ' [--with-dfltcc-inflate] Use DEFLATE CONVERSION CALL instruction for decompression on IBM Z' | tee -a configure.log
191190
echo ' [--without-crc32-vx] Build without vectorized CRC32 on IBM Z' | tee -a configure.log
192191
echo ' [--with-reduced-mem] Reduced memory usage for special cases (reduces performance)' | tee -a configure.log
193-
echo ' [--force-sse2] Assume SSE2 instructions are always available (disabled by default on x86, enabled on x86_64)' | tee -a configure.log
194192
exit 0 ;;
195193
-p*=* | --prefix=*) prefix=$(echo $1 | sed 's/.*=//'); shift ;;
196194
-e*=* | --eprefix=*) exec_prefix=$(echo $1 | sed 's/.*=//'); shift ;;
@@ -229,7 +227,6 @@ case "$1" in
229227
--without-crc32-vx) buildcrc32vx=0; shift ;;
230228
--without-crc32-la) buildcrc32la=0; shift ;;
231229
--with-reduced-mem) reducedmem=1; shift ;;
232-
--force-sse2) forcesse2=1; shift ;;
233230
-a*=* | --archs=*) ARCHS=$(echo $1 | sed 's/.*=//'); shift ;;
234231
--sysconfdir=*) echo "ignored option: --sysconfdir" | tee -a configure.log; shift ;;
235232
--localstatedir=*) echo "ignored option: --localstatedir" | tee -a configure.log; shift ;;
@@ -1811,11 +1808,6 @@ case "${ARCH}" in
18111808
SFLAGS="${SFLAGS} -DX86_SSE2"
18121809
ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} chunkset_sse2.o chorba_sse2.o compare256_sse2.o slide_hash_sse2.o"
18131810
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} chunkset_sse2.lo chorba_sse2.lo compare256_sse2.lo slide_hash_sse2.lo"
1814-
1815-
if test $forcesse2 -eq 1; then
1816-
CFLAGS="${CFLAGS} -DX86_NOCHECK_SSE2"
1817-
SFLAGS="${SFLAGS} -DX86_NOCHECK_SSE2"
1818-
fi
18191811
fi
18201812

18211813
check_ssse3_intrinsics

functable.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ static void init_functable(void) {
6868

6969
// X86 - SSE2
7070
#ifdef X86_SSE2
71-
# if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2)
71+
# if !defined(__x86_64__) && !defined(_M_X64)
7272
if (cf.x86.has_sse2)
7373
# endif
7474
{

0 commit comments

Comments
 (0)