Skip to content

Commit aa8069a

Browse files
committed
- Unify crc32_chorba, chorba_sse2 and chorba_sse41 dispatch functions.
- Fixed alignment diff calculation in crc32_chorba. - Fixed length check to happen early, avoiding extra branches for too short lengths, this also allows removing one function call to crc32_braid_internal to handle those. Gbench shows ~0.15-0.25ns saved per call for lengths shorter than CHORBA_SMALL_THRESHOLD. - Avoid calculating aligned len if buffer is already aligned
1 parent 0929dc8 commit aa8069a

4 files changed

Lines changed: 42 additions & 48 deletions

File tree

arch/generic/crc32_chorba_c.c

Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1444,32 +1444,31 @@ Z_INTERNAL uint32_t crc32_chorba_small_nondestructive_32bit (uint32_t crc, const
14441444
}
14451445

14461446
Z_INTERNAL uint32_t crc32_chorba(uint32_t crc, const uint8_t *buf, size_t len) {
1447+
uint64_t* aligned_buf;
14471448
uint32_t c = (~crc) & 0xffffffff;
1449+
uintptr_t algn_diff = ((uintptr_t)8 - ((uintptr_t)buf & 7)) & 7;
14481450

1449-
uint64_t* aligned_buf;
1450-
size_t aligned_len;
1451-
unsigned long algn_diff = ((uintptr_t)8 - ((uintptr_t)buf & 0xF)) & 0xF;
1452-
if (algn_diff < len) {
1451+
if (len >= algn_diff + CHORBA_SMALL_THRESHOLD) {
14531452
if (algn_diff) {
14541453
c = crc32_braid_internal(c, buf, algn_diff);
1454+
len -= algn_diff;
14551455
}
14561456
aligned_buf = (uint64_t*) (buf + algn_diff);
1457-
aligned_len = len - algn_diff;
1458-
if(aligned_len > CHORBA_LARGE_THRESHOLD)
1459-
c = crc32_chorba_118960_nondestructive(c, (z_word_t*) aligned_buf, aligned_len);
1457+
if(len > CHORBA_LARGE_THRESHOLD) {
1458+
c = crc32_chorba_118960_nondestructive(c, (z_word_t*) aligned_buf, len);
1459+
# if OPTIMAL_CMP == 64
1460+
} else if (len > CHORBA_MEDIUM_LOWER_THRESHOLD && len <= CHORBA_MEDIUM_UPPER_THRESHOLD) {
1461+
c = crc32_chorba_32768_nondestructive(c, (uint64_t*) aligned_buf, len);
1462+
# endif
1463+
} else {
14601464
# if OPTIMAL_CMP == 64
1461-
else if (aligned_len > CHORBA_MEDIUM_LOWER_THRESHOLD && aligned_len <= CHORBA_MEDIUM_UPPER_THRESHOLD)
1462-
c = crc32_chorba_32768_nondestructive(c, (uint64_t*) aligned_buf, aligned_len);
1463-
else if (aligned_len > CHORBA_SMALL_THRESHOLD_64BIT)
1464-
c = crc32_chorba_small_nondestructive(c, (uint64_t*) aligned_buf, aligned_len);
1465+
c = crc32_chorba_small_nondestructive(c, (uint64_t*) aligned_buf, len);
14651466
# else
1466-
else if (aligned_len > CHORBA_SMALL_THRESHOLD_32BIT)
1467-
c = crc32_chorba_small_nondestructive_32bit(c, (uint32_t*) aligned_buf, aligned_len);
1467+
c = crc32_chorba_small_nondestructive_32bit(c, (uint32_t*) aligned_buf, len);
14681468
# endif
1469-
else
1470-
c = crc32_braid_internal(c, (uint8_t*) aligned_buf, aligned_len);
1471-
}
1472-
else {
1469+
}
1470+
} else {
1471+
// Process too short lengths using crc32_braid
14731472
c = crc32_braid_internal(c, buf, len);
14741473
}
14751474

arch/x86/chorba_sse2.c

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -850,30 +850,26 @@ Z_INTERNAL uint32_t chorba_small_nondestructive_sse2(uint32_t crc, const uint64_
850850
}
851851

852852
Z_INTERNAL uint32_t crc32_chorba_sse2(uint32_t crc, const uint8_t *buf, size_t len) {
853-
uint32_t c;
854853
uint64_t* aligned_buf;
855-
size_t aligned_len;
854+
uint32_t c = (~crc) & 0xffffffff;
855+
uintptr_t algn_diff = ((uintptr_t)16 - ((uintptr_t)buf & 15)) & 15;
856856

857-
c = (~crc) & 0xffffffff;
858-
unsigned long algn_diff = ((uintptr_t)16 - ((uintptr_t)buf & 15)) & 15;
859-
if (algn_diff < len) {
857+
if (len > algn_diff + CHORBA_SMALL_THRESHOLD_64BIT) {
860858
if (algn_diff) {
861859
c = crc32_braid_internal(c, buf, algn_diff);
860+
len -= algn_diff;
862861
}
863862
aligned_buf = (uint64_t*) (buf + algn_diff);
864-
aligned_len = len - algn_diff;
865863
#if !defined(WITHOUT_CHORBA)
866-
if(aligned_len > CHORBA_LARGE_THRESHOLD) {
867-
c = crc32_chorba_118960_nondestructive(c, (z_word_t*) aligned_buf, aligned_len);
864+
if(len > CHORBA_LARGE_THRESHOLD) {
865+
c = crc32_chorba_118960_nondestructive(c, (z_word_t*) aligned_buf, len);
868866
} else
869867
#endif
870-
if (aligned_len > CHORBA_SMALL_THRESHOLD_64BIT) {
871-
c = chorba_small_nondestructive_sse2(c, aligned_buf, aligned_len);
872-
} else {
873-
c = crc32_braid_internal(c, (uint8_t*) aligned_buf, aligned_len);
868+
{
869+
c = chorba_small_nondestructive_sse2(c, aligned_buf, len);
874870
}
875-
}
876-
else {
871+
} else {
872+
// Process too short lengths using crc32_braid
877873
c = crc32_braid_internal(c, buf, len);
878874
}
879875

arch/x86/chorba_sse41.c

Lines changed: 11 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -308,33 +308,28 @@ static Z_FORCEINLINE uint32_t crc32_chorba_32768_nondestructive_sse41(uint32_t c
308308
}
309309

310310
Z_INTERNAL uint32_t crc32_chorba_sse41(uint32_t crc, const uint8_t *buf, size_t len) {
311-
uint32_t c;
312311
uint64_t* aligned_buf;
313-
size_t aligned_len;
314-
315-
c = (~crc) & 0xffffffff;
312+
uint32_t c = (~crc) & 0xffffffff;
316313
uintptr_t algn_diff = ((uintptr_t)16 - ((uintptr_t)buf & 15)) & 15;
317-
if (algn_diff < len) {
314+
315+
if (len > algn_diff + CHORBA_SMALL_THRESHOLD_64BIT) {
318316
if (algn_diff) {
319317
c = crc32_braid_internal(c, buf, algn_diff);
318+
len -= algn_diff;
320319
}
321320
aligned_buf = (uint64_t*) (buf + algn_diff);
322-
aligned_len = len - algn_diff;
323321
#if !defined(WITHOUT_CHORBA)
324-
if(aligned_len > CHORBA_LARGE_THRESHOLD) {
325-
c = crc32_chorba_118960_nondestructive(c, (z_word_t*) aligned_buf, aligned_len);
322+
if(len > CHORBA_LARGE_THRESHOLD) {
323+
c = crc32_chorba_118960_nondestructive(c, (z_word_t*) aligned_buf, len);
326324
} else
327325
#endif
328-
if (aligned_len > CHORBA_MEDIUM_LOWER_THRESHOLD &&
329-
aligned_len <= CHORBA_MEDIUM_UPPER_THRESHOLD) {
330-
c = crc32_chorba_32768_nondestructive_sse41(c, aligned_buf, aligned_len);
331-
} else if (aligned_len > CHORBA_SMALL_THRESHOLD_64BIT) {
332-
c = chorba_small_nondestructive_sse2(c, aligned_buf, aligned_len);
326+
if (len > CHORBA_MEDIUM_LOWER_THRESHOLD && len <= CHORBA_MEDIUM_UPPER_THRESHOLD) {
327+
c = crc32_chorba_32768_nondestructive_sse41(c, aligned_buf, len);
333328
} else {
334-
c = crc32_braid_internal(c, (uint8_t*) aligned_buf, aligned_len);
329+
c = chorba_small_nondestructive_sse2(c, aligned_buf, len);
335330
}
336-
}
337-
else {
331+
} else {
332+
// Process too short lengths using crc32_braid
338333
c = crc32_braid_internal(c, buf, len);
339334
}
340335

crc32.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,11 @@
1313
#define CHORBA_MEDIUM_UPPER_THRESHOLD 32768
1414
#define CHORBA_MEDIUM_LOWER_THRESHOLD 8192
1515
#define CHORBA_SMALL_THRESHOLD_64BIT 72
16-
#define CHORBA_SMALL_THRESHOLD_32BIT 80
16+
#if OPTIMAL_CMP == 64
17+
# define CHORBA_SMALL_THRESHOLD 72
18+
#else
19+
# define CHORBA_SMALL_THRESHOLD 80
20+
#endif
1721

1822
typedef struct crc32_fold_s {
1923
uint8_t fold[CRC32_FOLD_BUFFER_SIZE];

0 commit comments

Comments
 (0)