Skip to content

Commit 20bb1ab

Browse files
committed
switch to use only 3 kinds of hashtable
use only hashtables with fixed sizes and bit shifts, that allow to remove bounds checks.
1 parent 8032df4 commit 20bb1ab

3 files changed

Lines changed: 64 additions & 149 deletions

File tree

src/block/compress.rs

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,7 @@
44
//! high performance. It has fixed memory usage, which contrary to other approachs, makes it less
55
//! memory hungry.
66
7-
use crate::block::hashtable::get_table_size;
87
use crate::block::hashtable::HashTable;
9-
use crate::block::hashtable::{HashTableU16, HashTableU32, HashTableUsize};
108
use crate::block::END_OFFSET;
119
use crate::block::LZ4_MIN_LENGTH;
1210
use crate::block::MAX_DISTANCE;
@@ -18,6 +16,8 @@ use alloc::vec::Vec;
1816
#[cfg(feature = "safe-encode")]
1917
use core::convert::TryInto;
2018

19+
use super::hashtable::HashTable4KU16;
20+
use super::hashtable::HashTable8K;
2121
use super::{CompressError, WINDOW_SIZE};
2222

2323
pub(crate) fn get_vec_with_size(size: usize) -> Vec<u8> {
@@ -346,7 +346,7 @@ fn backtrack_match(
346346
/// show significant improvement though.
347347
// Intentionally avoid inlining.
348348
// Empirical tests revealed it to be rarely better but often significantly detrimental.
349-
#[inline(never)]
349+
#[inline]
350350
pub(crate) fn compress_internal<T: HashTable, const USE_DICT: bool>(
351351
input: &[u8],
352352
input_pos: usize,
@@ -596,17 +596,13 @@ pub(crate) fn compress_into_sink_with_dict<const USE_DICT: bool>(
596596
output: &mut SliceSink,
597597
mut dict_data: &[u8],
598598
) -> Result<usize, CompressError> {
599-
let (dict_size, dict_bitshift) = get_table_size(input.len());
600599
if dict_data.len() + input.len() < u16::MAX as usize {
601-
let mut dict = HashTableU16::new(dict_size, dict_bitshift);
602-
init_dict(&mut dict, &mut dict_data);
603-
compress_internal::<_, USE_DICT>(input, 0, output, &mut dict, dict_data, dict_data.len())
604-
} else if dict_data.len() + input.len() < u32::MAX as usize {
605-
let mut dict = HashTableU32::new(dict_size, dict_bitshift);
600+
let mut dict = HashTable4KU16::new();
606601
init_dict(&mut dict, &mut dict_data);
607602
compress_internal::<_, USE_DICT>(input, 0, output, &mut dict, dict_data, dict_data.len())
608603
} else {
609-
let mut dict = HashTableUsize::new(dict_size, dict_bitshift);
604+
// For some reason using a 4K hashtable causes a performance regression (memory layout?)
605+
let mut dict = HashTable8K::new();
610606
init_dict(&mut dict, &mut dict_data);
611607
compress_internal::<_, USE_DICT>(input, 0, output, &mut dict, dict_data, dict_data.len())
612608
}

src/block/hashtable.rs

Lines changed: 41 additions & 137 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
11
use alloc::boxed::Box;
2-
use alloc::vec::Vec;
3-
#[cfg(feature = "frame")]
42
use core::convert::TryInto;
53

64
/// The Hashtable trait used by the compression to store hashed bytes to their position.
@@ -51,64 +49,55 @@ pub trait HashTable {
5149
}
5250
}
5351

52+
const HASHTABLE_SIZE_4K: usize = 4 * 1024;
53+
const HASHTABLE_BIT_SHIFT_4K: usize = 4;
54+
5455
#[derive(Debug)]
55-
pub struct HashTableUsize {
56-
dict: Vec<usize>,
57-
/// Shift the hash value for the dictionary to the right, to match the dictionary size.
58-
dict_bitshift: usize,
56+
#[repr(align(64))]
57+
pub struct HashTable4KU16 {
58+
dict: Box<[u16; HASHTABLE_SIZE_4K]>,
5959
}
60-
61-
impl HashTableUsize {
60+
impl HashTable4KU16 {
6261
#[inline]
63-
pub fn new(dict_size: usize, dict_bitshift: usize) -> Self {
64-
let dict = alloc::vec![0; dict_size];
65-
Self {
66-
dict,
67-
dict_bitshift,
68-
}
62+
pub fn new() -> Self {
63+
// This generates more efficient assembly in contrast to Box::new(slice), because of an
64+
// optmized call alloc_zeroed, vs. alloc + memset
65+
// try_into is optimized away
66+
let dict = alloc::vec![0; HASHTABLE_SIZE_4K]
67+
.into_boxed_slice()
68+
.try_into()
69+
.unwrap();
70+
Self { dict }
6971
}
7072
}
71-
72-
impl HashTable for HashTableUsize {
73-
#[inline]
74-
#[cfg(feature = "safe-encode")]
75-
fn get_at(&self, hash: usize) -> usize {
76-
self.dict[hash >> self.dict_bitshift] as usize
77-
}
73+
impl HashTable for HashTable4KU16 {
7874
#[inline]
79-
#[cfg(not(feature = "safe-encode"))]
8075
fn get_at(&self, hash: usize) -> usize {
81-
unsafe { *self.dict.get_unchecked(hash >> self.dict_bitshift) as usize }
82-
}
83-
84-
#[inline]
85-
#[cfg(feature = "safe-encode")]
86-
fn put_at(&mut self, hash: usize, val: usize) {
87-
self.dict[hash >> self.dict_bitshift] = val;
76+
self.dict[hash >> HASHTABLE_BIT_SHIFT_4K] as usize
8877
}
8978
#[inline]
90-
#[cfg(not(feature = "safe-encode"))]
9179
fn put_at(&mut self, hash: usize, val: usize) {
92-
(*unsafe { self.dict.get_unchecked_mut(hash >> self.dict_bitshift) }) = val;
80+
self.dict[hash >> HASHTABLE_BIT_SHIFT_4K] = val as u16;
9381
}
94-
9582
#[inline]
9683
fn clear(&mut self) {
9784
self.dict.fill(0);
9885
}
86+
#[inline]
87+
fn get_hash_at(input: &[u8], pos: usize) -> usize {
88+
hash(super::get_batch(input, pos)) as usize
89+
}
9990
}
10091

101-
const HASHTABLE_SIZE_4K: usize = 4 * 1024;
102-
const HASHTABLE_BIT_SHIFT_4K: usize = 4;
103-
10492
#[derive(Debug)]
10593
#[repr(align(64))]
94+
#[cfg(feature = "frame")]
10695
pub struct HashTable4K {
10796
dict: Box<[u32; HASHTABLE_SIZE_4K]>,
10897
}
98+
#[cfg(feature = "frame")]
10999
impl HashTable4K {
110100
#[inline]
111-
#[cfg(feature = "frame")]
112101
pub fn new() -> Self {
113102
let dict = alloc::vec![0; HASHTABLE_SIZE_4K]
114103
.into_boxed_slice()
@@ -125,6 +114,7 @@ impl HashTable4K {
125114
}
126115
}
127116
}
117+
#[cfg(feature = "frame")]
128118
impl HashTable for HashTable4K {
129119
#[inline]
130120
fn get_at(&self, hash: usize) -> usize {
@@ -140,122 +130,36 @@ impl HashTable for HashTable4K {
140130
}
141131
}
142132

143-
#[derive(Debug)]
144-
#[repr(align(64))]
145-
pub struct HashTableU32 {
146-
dict: Vec<u32>,
147-
/// Shift the hash value for the dictionary to the right, to match the dictionary size.
148-
dict_bitshift: usize,
149-
}
150-
impl HashTableU32 {
151-
#[inline]
152-
pub fn new(dict_size: usize, dict_bitshift: usize) -> Self {
153-
let dict = alloc::vec![0; dict_size];
154-
Self {
155-
dict,
156-
dict_bitshift,
157-
}
158-
}
159-
}
160-
impl HashTable for HashTableU32 {
161-
#[inline]
162-
#[cfg(feature = "safe-encode")]
163-
fn get_at(&self, hash: usize) -> usize {
164-
self.dict[hash >> self.dict_bitshift] as usize
165-
}
166-
#[inline]
167-
#[cfg(not(feature = "safe-encode"))]
168-
fn get_at(&self, hash: usize) -> usize {
169-
unsafe { *self.dict.get_unchecked(hash >> self.dict_bitshift) as usize }
170-
}
171-
#[inline]
172-
#[cfg(feature = "safe-encode")]
173-
fn put_at(&mut self, hash: usize, val: usize) {
174-
self.dict[hash >> self.dict_bitshift] = val as u32;
175-
}
176-
#[inline]
177-
#[cfg(not(feature = "safe-encode"))]
178-
fn put_at(&mut self, hash: usize, val: usize) {
179-
(*unsafe { self.dict.get_unchecked_mut(hash >> self.dict_bitshift) }) = val as u32;
180-
}
181-
#[inline]
182-
fn clear(&mut self) {
183-
self.dict.fill(0);
184-
}
185-
}
133+
const HASHTABLE_SIZE_8K: usize = 8 * 1024;
134+
const HASH_TABLE_BIT_SHIFT_8K: usize = 3;
186135

187136
#[derive(Debug)]
188137
#[repr(align(64))]
189-
pub struct HashTableU16 {
190-
dict: Vec<u16>,
191-
/// Shift the hash value for the dictionary to the right, to match the dictionary size.
192-
dict_bitshift: usize,
138+
pub struct HashTable8K {
139+
dict: Box<[u32; HASHTABLE_SIZE_8K]>,
193140
}
194-
impl HashTableU16 {
141+
impl HashTable8K {
195142
#[inline]
196-
pub fn new(dict_size: usize, dict_bitshift: usize) -> Self {
197-
let dict = alloc::vec![0; dict_size];
198-
Self {
199-
dict,
200-
dict_bitshift,
201-
}
143+
pub fn new() -> Self {
144+
let dict = alloc::vec![0; HASHTABLE_SIZE_8K]
145+
.into_boxed_slice()
146+
.try_into()
147+
.unwrap();
148+
149+
Self { dict }
202150
}
203151
}
204-
impl HashTable for HashTableU16 {
205-
#[inline]
206-
#[cfg(feature = "safe-encode")]
207-
fn get_at(&self, hash: usize) -> usize {
208-
self.dict[hash >> self.dict_bitshift] as usize
209-
}
152+
impl HashTable for HashTable8K {
210153
#[inline]
211-
#[cfg(not(feature = "safe-encode"))]
212154
fn get_at(&self, hash: usize) -> usize {
213-
unsafe { *self.dict.get_unchecked(hash >> self.dict_bitshift) as usize }
155+
self.dict[hash >> HASH_TABLE_BIT_SHIFT_8K] as usize
214156
}
215157
#[inline]
216-
#[cfg(feature = "safe-encode")]
217158
fn put_at(&mut self, hash: usize, val: usize) {
218-
self.dict[hash >> self.dict_bitshift] = val as u16;
219-
}
220-
#[inline]
221-
#[cfg(not(feature = "safe-encode"))]
222-
fn put_at(&mut self, hash: usize, val: usize) {
223-
(*unsafe { self.dict.get_unchecked_mut(hash >> self.dict_bitshift) }) = val as u16;
159+
self.dict[hash >> HASH_TABLE_BIT_SHIFT_8K] = val as u32;
224160
}
225161
#[inline]
226162
fn clear(&mut self) {
227163
self.dict.fill(0);
228164
}
229-
#[inline]
230-
fn get_hash_at(input: &[u8], pos: usize) -> usize {
231-
hash(super::get_batch(input, pos)) as usize
232-
}
233-
}
234-
235-
#[inline]
236-
pub fn get_table_size(input_len: usize) -> (usize, usize) {
237-
let (dict_size, dict_bitshift) = match input_len {
238-
// U16 Positions
239-
0..=65535 => {
240-
// Considering we want a table with up to 16K bytes and each slot takes 2 bytes.
241-
// Calculate size the matching table size according to the input size,
242-
// so the overhead of "zeroing" the table is not too large for small inputs.
243-
let size = input_len.next_power_of_two().clamp(256, 16 * 1024) / 2;
244-
(size, 16 - size.trailing_zeros() as usize)
245-
}
246-
// U32 positions => 16KB table
247-
// Usize (U64) positions => 32KB table
248-
_ => (4096, 4),
249-
};
250-
(dict_size, dict_bitshift)
251-
}
252-
253-
#[test]
254-
fn test_get_table_size() {
255-
const MAX_HASH: usize = u16::MAX as usize;
256-
for i in 0..32 {
257-
let input_len = 2usize.pow(i);
258-
let (size, shift) = get_table_size(input_len);
259-
assert_eq!(size, (MAX_HASH >> shift) + 1);
260-
}
261165
}

tests/tests.rs

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
#[macro_use]
44
extern crate more_asserts;
55

6+
use std::iter;
7+
68
use lz4_compress::compress as lz4_rust_compress;
79
#[cfg(feature = "frame")]
810
use lz4_flex::frame::BlockMode;
@@ -158,6 +160,13 @@ fn test_minimum_compression_ratio() {
158160
let ratio = compressed.len() as f64 / COMPRESSION34K.len() as f64;
159161
assert_lt!(ratio, 0.585); // TODO check why compression is not deterministic (fails in ci for
160162
// 0.58)
163+
let compressed = compress(COMPRESSION65);
164+
let ratio = compressed.len() as f64 / COMPRESSION65.len() as f64;
165+
assert_lt!(ratio, 0.574);
166+
167+
let compressed = compress(COMPRESSION66JSON);
168+
let ratio = compressed.len() as f64 / COMPRESSION66JSON.len() as f64;
169+
assert_lt!(ratio, 0.229);
161170
}
162171

163172
use lz_fear::raw::compress2;
@@ -407,6 +416,12 @@ fn buf_fuzz_5() {
407416
test_roundtrip(data);
408417
}
409418

419+
#[test]
420+
fn test_so_many_zeros() {
421+
let data: Vec<u8> = iter::repeat(0).take(30_000).collect();
422+
test_roundtrip(data);
423+
}
424+
410425
#[test]
411426
fn compression_works() {
412427
let s = r#"An iterator that knows its exact length.
@@ -432,9 +447,9 @@ fn compression_works() {
432447
#[ignore]
433448
#[test]
434449
fn big_compression() {
435-
let mut s = Vec::with_capacity(80_000000);
450+
let mut s = Vec::with_capacity(80_000_000);
436451

437-
for n in 0..80_000000 {
452+
for n in 0..80_000_000 {
438453
s.push((n as u8).wrapping_mul(0xA).wrapping_add(33) ^ 0xA2);
439454
}
440455

0 commit comments

Comments
 (0)