Skip to content

Commit bdf1473

Browse files
committed
Don't build C-fallback functions that never get used on x86_64
1 parent a9ef3eb commit bdf1473

6 files changed

Lines changed: 73 additions & 14 deletions

File tree

.github/workflows/pkgcheck.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ jobs:
137137
CFLAGS: ${{ matrix.cflags }}
138138
CXXFLAGS: ${{ matrix.cxxflags }}
139139
CHOST: ${{ matrix.chost }}
140-
CMAKE_ARGS: ${{ matrix.cmake-args }}
140+
CMAKE_ARGS: ${{ matrix.cmake-args }} -DWITH_ALL_FALLBACKS=ON
141141
CONFIGURE_ARGS: ${{ matrix.configure-args }}
142142
LDFLAGS: ${{ matrix.ldflags }}
143143

@@ -147,7 +147,7 @@ jobs:
147147
CC: ${{ matrix.compiler }}
148148
CFLAGS: ${{ matrix.cflags }}
149149
CHOST: ${{ matrix.chost }}
150-
CMAKE_ARGS: ${{ matrix.cmake-args }}
150+
CMAKE_ARGS: ${{ matrix.cmake-args }} -DWITH_ALL_FALLBACKS=ON
151151
CONFIGURE_ARGS: ${{ matrix.configure-args }}
152152
LDFLAGS: ${{ matrix.ldflags }}
153153

CMakeLists.txt

Lines changed: 40 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ cmake_dependent_option(WITH_GTEST "Build gtest_zlib" ON "ZLIB_ENABLE_TESTS" OFF)
5454
cmake_dependent_option(WITH_FUZZERS "Build test/fuzz" OFF "ZLIB_ENABLE_TESTS" OFF)
5555
cmake_dependent_option(WITH_BENCHMARKS "Build test/benchmarks" OFF "ZLIB_ENABLE_TESTS" OFF)
5656
cmake_dependent_option(WITH_BENCHMARK_APPS "Build application benchmarks" OFF "WITH_BENCHMARKS" OFF)
57+
cmake_dependent_option(WITH_ALL_FALLBACKS "Build all generic fallback functions (Useful for Gbench)" OFF "WITH_BENCHMARKS" OFF)
5758

5859
if(WITH_GTEST OR WITH_FUZZERS OR WITH_BENCHMARKS)
5960
enable_language(CXX)
@@ -1168,6 +1169,13 @@ if(WITH_OPTIM)
11681169
endif()
11691170
endif()
11701171
endif()
1172+
else()
1173+
# If WITH_OPTIM is disabled, we need all the fallbacks.
1174+
set(WITH_ALL_FALLBACKS ON)
1175+
endif()
1176+
1177+
if(WITH_ALL_FALLBACKS)
1178+
add_definitions(-DWITH_ALL_FALLBACKS)
11711179
endif()
11721180

11731181
message(STATUS "Architecture-specific source files: ${ZLIB_ARCH_SRCS}")
@@ -1275,14 +1283,6 @@ set(ZLIB_PRIVATE_HDRS
12751283
zutil_p.h
12761284
)
12771285
set(ZLIB_SRCS
1278-
arch/generic/adler32_c.c
1279-
arch/generic/adler32_fold_c.c
1280-
arch/generic/chunkset_c.c
1281-
arch/generic/compare256_c.c
1282-
arch/generic/crc32_braid_c.c
1283-
arch/generic/crc32_c.c
1284-
arch/generic/crc32_fold_c.c
1285-
arch/generic/slide_hash_c.c
12861286
adler32.c
12871287
compress.c
12881288
crc32.c
@@ -1306,6 +1306,37 @@ set(ZLIB_SRCS
13061306
zutil.c
13071307
)
13081308

1309+
set(ZLIB_ALL_FALLBACK_SRCS
1310+
arch/generic/adler32_c.c
1311+
arch/generic/adler32_fold_c.c
1312+
arch/generic/crc32_braid_c.c
1313+
arch/generic/crc32_c.c
1314+
arch/generic/crc32_fold_c.c
1315+
arch/generic/slide_hash_c.c
1316+
arch/generic/chunkset_c.c
1317+
arch/generic/compare256_c.c
1318+
)
1319+
1320+
if(WITH_ALL_FALLBACKS)
1321+
list(APPEND ZLIB_GENERIC_SRCS ${ZLIB_ALL_FALLBACK_SRCS})
1322+
elseif(${ARCH} STREQUAL "x86_64" AND WITH_SSE2)
1323+
# x86_64 always has SSE2, so let the SSE2 functions act as fallbacks.
1324+
list(APPEND ZLIB_GENERIC_SRCS
1325+
arch/generic/adler32_c.c
1326+
arch/generic/adler32_fold_c.c
1327+
arch/generic/crc32_braid_c.c
1328+
arch/generic/crc32_c.c
1329+
arch/generic/crc32_fold_c.c
1330+
)
1331+
1332+
# x86_64 does not need compare256 fallback if we have BUILTIN_CTZ
1333+
if(NOT HAVE_BUILTIN_CTZ)
1334+
list(APPEND ZLIB_GENERIC_SRCS arch/generic/compare256_c.c)
1335+
endif()
1336+
else()
1337+
list(APPEND ZLIB_GENERIC_SRCS ${ZLIB_ALL_FALLBACK_SRCS})
1338+
endif()
1339+
13091340
if(WITH_CRC32_CHORBA)
13101341
list(APPEND ZLIB_SRCS arch/generic/crc32_chorba_c.c)
13111342
endif()
@@ -1324,7 +1355,7 @@ set(ZLIB_GZFILE_SRCS
13241355
gzwrite.c
13251356
)
13261357

1327-
set(ZLIB_ALL_SRCS ${ZLIB_SRCS} ${ZLIB_ARCH_HDRS} ${ZLIB_ARCH_SRCS} ${ZLIB_PUBLIC_HDRS} ${ZLIB_PRIVATE_HDRS})
1358+
set(ZLIB_ALL_SRCS ${ZLIB_GENERIC_SRCS} ${ZLIB_SRCS} ${ZLIB_ARCH_HDRS} ${ZLIB_ARCH_SRCS} ${ZLIB_PUBLIC_HDRS} ${ZLIB_PRIVATE_HDRS})
13281359
if(WITH_GZFILEOP)
13291360
list(APPEND ZLIB_ALL_SRCS ${ZLIB_GZFILE_PRIVATE_HDRS} ${ZLIB_GZFILE_SRCS})
13301361
endif()

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ Advanced Build Options
204204
| WITH_SSE42 | | Build with SSE42 intrinsics | ON |
205205
| WITH_PCLMULQDQ | | Build with PCLMULQDQ intrinsics | ON |
206206
| WITH_VPCLMULQDQ | --without-vpclmulqdq | Build with VPCLMULQDQ intrinsics | ON |
207-
| WITH_ARMV8 | --without-armv8 | Build with ARMv8 intrinsics | ON |
207+
| WITH_ARMV8 | --without-armv8 | Build with ARMv8 intrinsics | ON |
208208
| WITH_NEON | --without-neon | Build with NEON intrinsics | ON |
209209
| WITH_ARMV6 | --without-armv6 | Build with ARMv6 intrinsics | ON |
210210
| WITH_ALTIVEC | --without-altivec | Build with AltiVec (VMX) intrinsics | ON |
@@ -217,6 +217,7 @@ Advanced Build Options
217217
| WITH_INFLATE_ALLOW_INVALID_DIST | | Build with zero fill for inflate invalid distances | OFF |
218218
| INSTALL_UTILS | | Copy minigzip and minideflate during install | OFF |
219219
| ZLIBNG_ENABLE_TESTS | | Test zlib-ng specific API | ON |
220+
| WITH_ALL_FALLBACKS | | Build with all c-fallbacks (useful for Gbench comparisons) | OFF |
220221

221222

222223
Related Projects

configure

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1777,6 +1777,11 @@ if test $without_new_strategies -eq 1; then
17771777
SFLAGS="${SFLAGS} -DNO_QUICK_STRATEGY -DNO_MEDIUM_STRATEGY"
17781778
fi
17791779

1780+
# CMake can exclude building some of the generic fallback functions,
1781+
# configure does not have the detection code to do so.
1782+
CFLAGS="${CFLAGS} -DWITH_ALL_FALLBACKS"
1783+
SFLAGS="${SFLAGS} -DWITH_ALL_FALLBACKS"
1784+
17801785
ARCHDIR='arch/generic'
17811786
ARCH_STATIC_OBJS=''
17821787
ARCH_SHARED_OBJS=''

functable.c

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,25 @@ static void init_functable(void) {
4747
struct cpu_features cf;
4848

4949
cpu_check_features(&cf);
50-
51-
// Generic code
5250
ft.force_init = &force_init_empty;
51+
52+
// Set up generic C code fallbacks
53+
#ifndef WITH_ALL_FALLBACKS
54+
# ifdef __x86_64__
55+
ft.adler32 = &adler32_c;
56+
ft.adler32_fold_copy = &adler32_fold_copy_c;
57+
ft.crc32 = &crc32_c;
58+
ft.crc32_fold = &crc32_fold_c;
59+
ft.crc32_fold_copy = &crc32_fold_copy_c;
60+
ft.crc32_fold_final = &crc32_fold_final_c;
61+
ft.crc32_fold_reset = &crc32_fold_reset_c;
62+
# ifndef HAVE_BUILTIN_CTZ
63+
ft.longest_match = &longest_match_c;
64+
ft.longest_match_slow = &longest_match_slow_c;
65+
ft.compare256 = &compare256_c;
66+
# endif
67+
# endif
68+
#else
5369
ft.adler32 = &adler32_c;
5470
ft.adler32_fold_copy = &adler32_fold_copy_c;
5571
ft.chunkmemset_safe = &chunkmemset_safe_c;
@@ -63,8 +79,10 @@ static void init_functable(void) {
6379
ft.longest_match = &longest_match_c;
6480
ft.longest_match_slow = &longest_match_slow_c;
6581
ft.compare256 = &compare256_c;
82+
#endif
6683

6784
// Select arch-optimized functions
85+
#ifdef WITH_OPTIM
6886

6987
// X86 - SSE2
7088
#ifdef X86_SSE2
@@ -301,6 +319,8 @@ static void init_functable(void) {
301319
}
302320
#endif
303321

322+
#endif // WITH_OPTIM
323+
304324
// Assign function pointers individually for atomic operation
305325
FUNCTABLE_ASSIGN(ft, force_init);
306326
FUNCTABLE_ASSIGN(ft, adler32);

test/benchmarks/benchmark_slidehash.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,9 @@ class slide_hash: public benchmark::Fixture {
6868
} \
6969
BENCHMARK_REGISTER_F(slide_hash, name)->RangeMultiplier(2)->Range(512, MAX_RANDOM_INTS);
7070

71+
#if defined(WITH_ALL_FALLBACKS) || !defined(__x86_64__)
7172
BENCHMARK_SLIDEHASH(c, slide_hash_c, 1);
73+
#endif
7274

7375
#ifdef DISABLE_RUNTIME_CPU_DETECTION
7476
BENCHMARK_SLIDEHASH(native, native_slide_hash, 1);

0 commit comments

Comments
 (0)