Skip to content

Commit b36e16b

Browse files
committed
- Unify crc32_chorba, chorba_sse2 and chorba_sse41 dispatch functions.
- Fixed alignment diff calculation in crc32_chorba. - Fixed length check to happen early, avoiding extra branches for too short lengths, this also allows removing one function call to crc32_braid_internal to handle those. Gbench shows ~0.15-0.25ns saved per call for lengths shorter than CHORBA_SMALL_THRESHOLD. - Avoid calculating aligned len if buffer is already aligned
1 parent 8003f57 commit b36e16b

4 files changed

Lines changed: 42 additions & 48 deletions

File tree

arch/generic/crc32_chorba_c.c

Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1448,32 +1448,31 @@ Z_INTERNAL uint32_t crc32_chorba_small_nondestructive_32bit (uint32_t crc, const
14481448
#endif // OPTIMAL_CMP == 64
14491449

14501450
Z_INTERNAL uint32_t crc32_chorba(uint32_t crc, const uint8_t *buf, size_t len) {
1451+
uint64_t* aligned_buf;
14511452
uint32_t c = (~crc) & 0xffffffff;
1453+
uintptr_t algn_diff = ((uintptr_t)8 - ((uintptr_t)buf & 7)) & 7;
14521454

1453-
uint64_t* aligned_buf;
1454-
size_t aligned_len;
1455-
unsigned long algn_diff = ((uintptr_t)8 - ((uintptr_t)buf & 0xF)) & 0xF;
1456-
if (algn_diff < len) {
1455+
if (len > algn_diff + CHORBA_SMALL_THRESHOLD) {
14571456
if (algn_diff) {
14581457
c = crc32_braid_internal(c, buf, algn_diff);
1458+
len -= algn_diff;
14591459
}
14601460
aligned_buf = (uint64_t*) (buf + algn_diff);
1461-
aligned_len = len - algn_diff;
1462-
if(aligned_len > CHORBA_LARGE_THRESHOLD)
1463-
c = crc32_chorba_118960_nondestructive(c, (z_word_t*) aligned_buf, aligned_len);
1461+
if(len > CHORBA_LARGE_THRESHOLD) {
1462+
c = crc32_chorba_118960_nondestructive(c, (z_word_t*) aligned_buf, len);
1463+
# if OPTIMAL_CMP == 64
1464+
} else if (len > CHORBA_MEDIUM_LOWER_THRESHOLD && len <= CHORBA_MEDIUM_UPPER_THRESHOLD) {
1465+
c = crc32_chorba_32768_nondestructive(c, (uint64_t*) aligned_buf, len);
1466+
# endif
1467+
} else {
14641468
# if OPTIMAL_CMP == 64
1465-
else if (aligned_len > CHORBA_MEDIUM_LOWER_THRESHOLD && aligned_len <= CHORBA_MEDIUM_UPPER_THRESHOLD)
1466-
c = crc32_chorba_32768_nondestructive(c, (uint64_t*) aligned_buf, aligned_len);
1467-
else if (aligned_len > CHORBA_SMALL_THRESHOLD_64BIT)
1468-
c = crc32_chorba_small_nondestructive(c, (uint64_t*) aligned_buf, aligned_len);
1469+
c = crc32_chorba_small_nondestructive(c, (uint64_t*) aligned_buf, len);
14691470
# else
1470-
else if (aligned_len > CHORBA_SMALL_THRESHOLD_32BIT)
1471-
c = crc32_chorba_small_nondestructive_32bit(c, (uint32_t*) aligned_buf, aligned_len);
1471+
c = crc32_chorba_small_nondestructive_32bit(c, (uint32_t*) aligned_buf, len);
14721472
# endif
1473-
else
1474-
c = crc32_braid_internal(c, (uint8_t*) aligned_buf, aligned_len);
1475-
}
1476-
else {
1473+
}
1474+
} else {
1475+
// Process too short lengths using crc32_braid
14771476
c = crc32_braid_internal(c, buf, len);
14781477
}
14791478

arch/x86/chorba_sse2.c

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -847,30 +847,26 @@ Z_INTERNAL uint32_t chorba_small_nondestructive_sse2(uint32_t crc, const uint64_
847847
}
848848

849849
Z_INTERNAL uint32_t crc32_chorba_sse2(uint32_t crc, const uint8_t *buf, size_t len) {
850-
uint32_t c;
851850
uint64_t* aligned_buf;
852-
size_t aligned_len;
851+
uint32_t c = (~crc) & 0xffffffff;
852+
uintptr_t algn_diff = ((uintptr_t)16 - ((uintptr_t)buf & 15)) & 15;
853853

854-
c = (~crc) & 0xffffffff;
855-
unsigned long algn_diff = ((uintptr_t)16 - ((uintptr_t)buf & 15)) & 15;
856-
if (algn_diff < len) {
854+
if (len > algn_diff + CHORBA_SMALL_THRESHOLD_64BIT) {
857855
if (algn_diff) {
858856
c = crc32_braid_internal(c, buf, algn_diff);
857+
len -= algn_diff;
859858
}
860859
aligned_buf = (uint64_t*) (buf + algn_diff);
861-
aligned_len = len - algn_diff;
862860
#if !defined(WITHOUT_CHORBA)
863-
if(aligned_len > CHORBA_LARGE_THRESHOLD) {
864-
c = crc32_chorba_118960_nondestructive(c, (z_word_t*) aligned_buf, aligned_len);
861+
if(len > CHORBA_LARGE_THRESHOLD) {
862+
c = crc32_chorba_118960_nondestructive(c, (z_word_t*) aligned_buf, len);
865863
} else
866864
#endif
867-
if (aligned_len > CHORBA_SMALL_THRESHOLD_64BIT) {
868-
c = chorba_small_nondestructive_sse2(c, aligned_buf, aligned_len);
869-
} else {
870-
c = crc32_braid_internal(c, (uint8_t*) aligned_buf, aligned_len);
865+
{
866+
c = chorba_small_nondestructive_sse2(c, aligned_buf, len);
871867
}
872-
}
873-
else {
868+
} else {
869+
// Process too short lengths using crc32_braid
874870
c = crc32_braid_internal(c, buf, len);
875871
}
876872

arch/x86/chorba_sse41.c

Lines changed: 11 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -305,33 +305,28 @@ static Z_FORCEINLINE uint32_t crc32_chorba_32768_nondestructive_sse41(uint32_t c
305305
}
306306

307307
Z_INTERNAL uint32_t crc32_chorba_sse41(uint32_t crc, const uint8_t *buf, size_t len) {
308-
uint32_t c;
309308
uint64_t* aligned_buf;
310-
size_t aligned_len;
311-
312-
c = (~crc) & 0xffffffff;
309+
uint32_t c = (~crc) & 0xffffffff;
313310
uintptr_t algn_diff = ((uintptr_t)16 - ((uintptr_t)buf & 15)) & 15;
314-
if (algn_diff < len) {
311+
312+
if (len > algn_diff + CHORBA_SMALL_THRESHOLD_64BIT) {
315313
if (algn_diff) {
316314
c = crc32_braid_internal(c, buf, algn_diff);
315+
len -= algn_diff;
317316
}
318317
aligned_buf = (uint64_t*) (buf + algn_diff);
319-
aligned_len = len - algn_diff;
320318
#if !defined(WITHOUT_CHORBA)
321-
if(aligned_len > CHORBA_LARGE_THRESHOLD) {
322-
c = crc32_chorba_118960_nondestructive(c, (z_word_t*) aligned_buf, aligned_len);
319+
if(len > CHORBA_LARGE_THRESHOLD) {
320+
c = crc32_chorba_118960_nondestructive(c, (z_word_t*) aligned_buf, len);
323321
} else
324322
#endif
325-
if (aligned_len > CHORBA_MEDIUM_LOWER_THRESHOLD &&
326-
aligned_len <= CHORBA_MEDIUM_UPPER_THRESHOLD) {
327-
c = crc32_chorba_32768_nondestructive_sse41(c, aligned_buf, aligned_len);
328-
} else if (aligned_len > CHORBA_SMALL_THRESHOLD_64BIT) {
329-
c = chorba_small_nondestructive_sse2(c, aligned_buf, aligned_len);
323+
if (len > CHORBA_MEDIUM_LOWER_THRESHOLD && len <= CHORBA_MEDIUM_UPPER_THRESHOLD) {
324+
c = crc32_chorba_32768_nondestructive_sse41(c, aligned_buf, len);
330325
} else {
331-
c = crc32_braid_internal(c, (uint8_t*) aligned_buf, aligned_len);
326+
c = chorba_small_nondestructive_sse2(c, aligned_buf, len);
332327
}
333-
}
334-
else {
328+
} else {
329+
// Process too short lengths using crc32_braid
335330
c = crc32_braid_internal(c, buf, len);
336331
}
337332

crc32.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,11 @@
1313
#define CHORBA_MEDIUM_UPPER_THRESHOLD 32768
1414
#define CHORBA_MEDIUM_LOWER_THRESHOLD 8192
1515
#define CHORBA_SMALL_THRESHOLD_64BIT 72
16-
#define CHORBA_SMALL_THRESHOLD_32BIT 80
16+
#if OPTIMAL_CMP == 64
17+
# define CHORBA_SMALL_THRESHOLD 72
18+
#else
19+
# define CHORBA_SMALL_THRESHOLD 80
20+
#endif
1721

1822
typedef struct crc32_fold_s {
1923
uint8_t fold[CRC32_FOLD_BUFFER_SIZE];

0 commit comments

Comments
 (0)