11use alloc:: boxed:: Box ;
2- use alloc:: vec:: Vec ;
3- #[ cfg( feature = "frame" ) ]
42use core:: convert:: TryInto ;
53
64/// The Hashtable trait used by the compression to store hashed bytes to their position.
@@ -51,64 +49,55 @@ pub trait HashTable {
5149 }
5250}
5351
52+ const HASHTABLE_SIZE_4K : usize = 4 * 1024 ;
53+ const HASHTABLE_BIT_SHIFT_4K : usize = 4 ;
54+
5455#[ derive( Debug ) ]
55- pub struct HashTableUsize {
56- dict : Vec < usize > ,
57- /// Shift the hash value for the dictionary to the right, to match the dictionary size.
58- dict_bitshift : usize ,
56+ #[ repr( align( 64 ) ) ]
57+ pub struct HashTable4KU16 {
58+ dict : Box < [ u16 ; HASHTABLE_SIZE_4K ] > ,
5959}
60-
61- impl HashTableUsize {
60+ impl HashTable4KU16 {
6261 #[ inline]
63- pub fn new ( dict_size : usize , dict_bitshift : usize ) -> Self {
64- let dict = alloc:: vec![ 0 ; dict_size] ;
65- Self {
66- dict,
67- dict_bitshift,
68- }
62+ pub fn new ( ) -> Self {
63+ // This generates more efficient assembly in contrast to Box::new(slice), because of an
64+ // optmized call alloc_zeroed, vs. alloc + memset
65+ // try_into is optimized away
66+ let dict = alloc:: vec![ 0 ; HASHTABLE_SIZE_4K ]
67+ . into_boxed_slice ( )
68+ . try_into ( )
69+ . unwrap ( ) ;
70+ Self { dict }
6971 }
7072}
71-
72- impl HashTable for HashTableUsize {
73- #[ inline]
74- #[ cfg( feature = "safe-encode" ) ]
75- fn get_at ( & self , hash : usize ) -> usize {
76- self . dict [ hash >> self . dict_bitshift ] as usize
77- }
73+ impl HashTable for HashTable4KU16 {
7874 #[ inline]
79- #[ cfg( not( feature = "safe-encode" ) ) ]
8075 fn get_at ( & self , hash : usize ) -> usize {
81- unsafe { * self . dict . get_unchecked ( hash >> self . dict_bitshift ) as usize }
82- }
83-
84- #[ inline]
85- #[ cfg( feature = "safe-encode" ) ]
86- fn put_at ( & mut self , hash : usize , val : usize ) {
87- self . dict [ hash >> self . dict_bitshift ] = val;
76+ self . dict [ hash >> HASHTABLE_BIT_SHIFT_4K ] as usize
8877 }
8978 #[ inline]
90- #[ cfg( not( feature = "safe-encode" ) ) ]
9179 fn put_at ( & mut self , hash : usize , val : usize ) {
92- ( * unsafe { self . dict . get_unchecked_mut ( hash >> self . dict_bitshift ) } ) = val;
80+ self . dict [ hash >> HASHTABLE_BIT_SHIFT_4K ] = val as u16 ;
9381 }
94-
9582 #[ inline]
9683 fn clear ( & mut self ) {
9784 self . dict . fill ( 0 ) ;
9885 }
86+ #[ inline]
87+ fn get_hash_at ( input : & [ u8 ] , pos : usize ) -> usize {
88+ hash ( super :: get_batch ( input, pos) ) as usize
89+ }
9990}
10091
101- const HASHTABLE_SIZE_4K : usize = 4 * 1024 ;
102- const HASHTABLE_BIT_SHIFT_4K : usize = 4 ;
103-
10492#[ derive( Debug ) ]
10593#[ repr( align( 64 ) ) ]
94+ #[ cfg( feature = "frame" ) ]
10695pub struct HashTable4K {
10796 dict : Box < [ u32 ; HASHTABLE_SIZE_4K ] > ,
10897}
98+ #[ cfg( feature = "frame" ) ]
10999impl HashTable4K {
110100 #[ inline]
111- #[ cfg( feature = "frame" ) ]
112101 pub fn new ( ) -> Self {
113102 let dict = alloc:: vec![ 0 ; HASHTABLE_SIZE_4K ]
114103 . into_boxed_slice ( )
@@ -125,6 +114,7 @@ impl HashTable4K {
125114 }
126115 }
127116}
117+ #[ cfg( feature = "frame" ) ]
128118impl HashTable for HashTable4K {
129119 #[ inline]
130120 fn get_at ( & self , hash : usize ) -> usize {
@@ -140,122 +130,36 @@ impl HashTable for HashTable4K {
140130 }
141131}
142132
143- #[ derive( Debug ) ]
144- #[ repr( align( 64 ) ) ]
145- pub struct HashTableU32 {
146- dict : Vec < u32 > ,
147- /// Shift the hash value for the dictionary to the right, to match the dictionary size.
148- dict_bitshift : usize ,
149- }
150- impl HashTableU32 {
151- #[ inline]
152- pub fn new ( dict_size : usize , dict_bitshift : usize ) -> Self {
153- let dict = alloc:: vec![ 0 ; dict_size] ;
154- Self {
155- dict,
156- dict_bitshift,
157- }
158- }
159- }
160- impl HashTable for HashTableU32 {
161- #[ inline]
162- #[ cfg( feature = "safe-encode" ) ]
163- fn get_at ( & self , hash : usize ) -> usize {
164- self . dict [ hash >> self . dict_bitshift ] as usize
165- }
166- #[ inline]
167- #[ cfg( not( feature = "safe-encode" ) ) ]
168- fn get_at ( & self , hash : usize ) -> usize {
169- unsafe { * self . dict . get_unchecked ( hash >> self . dict_bitshift ) as usize }
170- }
171- #[ inline]
172- #[ cfg( feature = "safe-encode" ) ]
173- fn put_at ( & mut self , hash : usize , val : usize ) {
174- self . dict [ hash >> self . dict_bitshift ] = val as u32 ;
175- }
176- #[ inline]
177- #[ cfg( not( feature = "safe-encode" ) ) ]
178- fn put_at ( & mut self , hash : usize , val : usize ) {
179- ( * unsafe { self . dict . get_unchecked_mut ( hash >> self . dict_bitshift ) } ) = val as u32 ;
180- }
181- #[ inline]
182- fn clear ( & mut self ) {
183- self . dict . fill ( 0 ) ;
184- }
185- }
133+ const HASHTABLE_SIZE_8K : usize = 8 * 1024 ;
134+ const HASH_TABLE_BIT_SHIFT_8K : usize = 3 ;
186135
187136#[ derive( Debug ) ]
188137#[ repr( align( 64 ) ) ]
189- pub struct HashTableU16 {
190- dict : Vec < u16 > ,
191- /// Shift the hash value for the dictionary to the right, to match the dictionary size.
192- dict_bitshift : usize ,
138+ pub struct HashTable8K {
139+ dict : Box < [ u32 ; HASHTABLE_SIZE_8K ] > ,
193140}
194- impl HashTableU16 {
141+ impl HashTable8K {
195142 #[ inline]
196- pub fn new ( dict_size : usize , dict_bitshift : usize ) -> Self {
197- let dict = alloc:: vec![ 0 ; dict_size] ;
198- Self {
199- dict,
200- dict_bitshift,
201- }
143+ pub fn new ( ) -> Self {
144+ let dict = alloc:: vec![ 0 ; HASHTABLE_SIZE_8K ]
145+ . into_boxed_slice ( )
146+ . try_into ( )
147+ . unwrap ( ) ;
148+
149+ Self { dict }
202150 }
203151}
204- impl HashTable for HashTableU16 {
205- #[ inline]
206- #[ cfg( feature = "safe-encode" ) ]
207- fn get_at ( & self , hash : usize ) -> usize {
208- self . dict [ hash >> self . dict_bitshift ] as usize
209- }
152+ impl HashTable for HashTable8K {
210153 #[ inline]
211- #[ cfg( not( feature = "safe-encode" ) ) ]
212154 fn get_at ( & self , hash : usize ) -> usize {
213- unsafe { * self . dict . get_unchecked ( hash >> self . dict_bitshift ) as usize }
155+ self . dict [ hash >> HASH_TABLE_BIT_SHIFT_8K ] as usize
214156 }
215157 #[ inline]
216- #[ cfg( feature = "safe-encode" ) ]
217158 fn put_at ( & mut self , hash : usize , val : usize ) {
218- self . dict [ hash >> self . dict_bitshift ] = val as u16 ;
219- }
220- #[ inline]
221- #[ cfg( not( feature = "safe-encode" ) ) ]
222- fn put_at ( & mut self , hash : usize , val : usize ) {
223- ( * unsafe { self . dict . get_unchecked_mut ( hash >> self . dict_bitshift ) } ) = val as u16 ;
159+ self . dict [ hash >> HASH_TABLE_BIT_SHIFT_8K ] = val as u32 ;
224160 }
225161 #[ inline]
226162 fn clear ( & mut self ) {
227163 self . dict . fill ( 0 ) ;
228164 }
229- #[ inline]
230- fn get_hash_at ( input : & [ u8 ] , pos : usize ) -> usize {
231- hash ( super :: get_batch ( input, pos) ) as usize
232- }
233- }
234-
235- #[ inline]
236- pub fn get_table_size ( input_len : usize ) -> ( usize , usize ) {
237- let ( dict_size, dict_bitshift) = match input_len {
238- // U16 Positions
239- 0 ..=65535 => {
240- // Considering we want a table with up to 16K bytes and each slot takes 2 bytes.
241- // Calculate size the matching table size according to the input size,
242- // so the overhead of "zeroing" the table is not too large for small inputs.
243- let size = input_len. next_power_of_two ( ) . clamp ( 256 , 16 * 1024 ) / 2 ;
244- ( size, 16 - size. trailing_zeros ( ) as usize )
245- }
246- // U32 positions => 16KB table
247- // Usize (U64) positions => 32KB table
248- _ => ( 4096 , 4 ) ,
249- } ;
250- ( dict_size, dict_bitshift)
251- }
252-
253- #[ test]
254- fn test_get_table_size ( ) {
255- const MAX_HASH : usize = u16:: MAX as usize ;
256- for i in 0 ..32 {
257- let input_len = 2usize . pow ( i) ;
258- let ( size, shift) = get_table_size ( input_len) ;
259- assert_eq ! ( size, ( MAX_HASH >> shift) + 1 ) ;
260- }
261165}
0 commit comments