Skip to content

Commit 2b254c6

Browse files
committed
optimize wildcopy
the initial check in the the 16 byte wild copy is unnecessary, since it is already done before calling the method
1 parent a75f532 commit 2b254c6

1 file changed

Lines changed: 5 additions & 2 deletions

File tree

src/block/decompress.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,13 @@ fn wild_copy_from_src_16(mut source: *const u8, mut dst_ptr: *mut u8, num_items:
3838
// It's not the case for 16 bytes stepsize, but for 8 bytes.
3939
unsafe {
4040
let dst_ptr_end = dst_ptr.add(num_items);
41-
while (dst_ptr as usize) < dst_ptr_end as usize {
41+
loop {
4242
core::ptr::copy_nonoverlapping(source, dst_ptr, 16);
4343
source = source.add(16);
4444
dst_ptr = dst_ptr.add(16);
45+
if dst_ptr >= dst_ptr_end {
46+
break;
47+
}
4548
}
4649
}
4750
}
@@ -60,7 +63,7 @@ unsafe fn duplicate_overlapping(
6063
// This is the same strategy used by the reference C implementation https://github.com/lz4/lz4/pull/772
6164
output_ptr.write(0u8);
6265
let dst_ptr_end = output_ptr.add(match_length);
63-
while (*output_ptr as usize) < dst_ptr_end as usize {
66+
while *output_ptr < dst_ptr_end {
6467
// Note that we copy 4 bytes, instead of one.
6568
// Without that the compiler will unroll/auto-vectorize the copy with a lot of branches.
6669
// This is not what we want, as large overlapping copies are not that common.

0 commit comments

Comments
 (0)