Speed up compression by 2100% (#18)

* Speed up compression by 2100% * Fix typo * Restore comment * Avoid computing hash value more than once * Optimize longest_common_prefix
decompals · Jun 3, 2024 · 4f0d435 · 4f0d435
1 parent 5c72565
commit 4f0d435
Show file tree

Hide file tree

Showing 4 changed files with 144 additions and 73 deletions.
diff --git a/lib/src/mio0.rs b/lib/src/mio0.rs
@@ -89,6 +89,8 @@ fn size_for_compressed_buffer(input_size: usize) -> Result<usize, Crunch64Error>
 pub fn compress(bytes: &[u8]) -> Result<Box<[u8]>, Crunch64Error> {
     let input_size = bytes.len();
 
+    let mut window = utils::Window::new(bytes);
+
     let mut pp: usize = 0;
     let mut index_cur_layout_byte: usize = 0;
 
@@ -100,7 +102,7 @@ pub fn compress(bytes: &[u8]) -> Result<Box<[u8]>, Crunch64Error> {
     let mut cur_layout_bit: u32 = 0x80000000;
 
     while input_pos < input_size {
-        let (mut group_pos, mut group_size) = utils::search(input_pos, bytes, 18);
+        let (mut group_pos, mut group_size) = window.search(input_pos, 18);
 
         // If the group isn't larger than 2 bytes, copying the input without compression is smaller
         if group_size <= 2 {
@@ -110,7 +112,7 @@ pub fn compress(bytes: &[u8]) -> Result<Box<[u8]>, Crunch64Error> {
             input_pos += 1;
         } else {
             // Search for a new group after one position after the current one
-            let (new_position, new_size) = utils::search(input_pos + 1, bytes, 18);
+            let (new_position, new_size) = window.search(input_pos + 1, 18);
 
             // If the new group is better than the current group by at least 2 bytes, use it instead
             if new_size >= group_size + 2 {

diff --git a/lib/src/utils.rs b/lib/src/utils.rs
@@ -78,92 +78,158 @@ pub(crate) fn set_pointer_array_from_u8_array(
     Ok(())
 }
 
-pub(crate) fn search(input_pos: usize, data_in: &[u8], max_match_length: usize) -> (u32, u32) {
-    let mut cur_size = 3;
-    let mut found_pos = 0;
-    let mut search_pos = cmp::max(input_pos as isize - 0x1000, 0) as usize;
-    let search_size = cmp::min(data_in.len() - input_pos, max_match_length);
-
-    if search_size < 3 {
-        return (0, 0);
-    }
+pub(crate) fn longest_common_prefix(a: &[u8], b: &[u8]) -> usize {
+    a.iter().zip(b.iter()).take_while(|&(a, b)| a == b).count()
+}
+
+const HASH_SIZE: usize = 1 << 15;
+const HASH_MASK: usize = HASH_SIZE - 1;
+
+const WINDOW_SIZE: usize = 0x1000;
+const WINDOW_MASK: usize = WINDOW_SIZE - 1;
 
-    while search_pos < input_pos {
-        let found_offset = mischarsearch(
-            &data_in[input_pos..],
-            cur_size,
-            &data_in[search_pos..],
-            cur_size + input_pos - search_pos,
-        );
+const MIN_MATCH: usize = 3;
+const NULL: u16 = 0xFFFF;
+
+// Updates a running hash value with a new byte. The shift ensure that only the
+// last 3 bytes of the input can affect the hash value.
+fn update_hash(hash: usize, byte: u8) -> usize {
+    ((hash << 5) ^ (byte as usize)) & HASH_MASK
+}
+
+// Finds the longest match in a 0x1000-byte sliding window, searching
+// front-to-back with a minimum match size of 3 bytes. The algorithm is similar
+// to the one described in section 4 of RFC 1951
+// (https://www.rfc-editor.org/rfc/rfc1951.html#section-4), using a chained hash
+// table of 3-byte sequences to find matches. Each character in the window is
+// identified by its position & 0xFFF (like in a circular buffer).
+pub(crate) struct Window<'a> {
+    // Compression input
+    input: &'a [u8],
+    // Current position in the input
+    input_pos: usize,
+    // Hash value at the window start
+    hash_start: usize,
+    // Hash value at the current input position
+    hash_end: usize,
+    // Head of hash chain for each hash value, or NULL
+    head: [u16; HASH_SIZE],
+    // Tail of hash chain for each hash value, or NULL
+    tail: [u16; HASH_SIZE],
+    // Next index in the hash chain, or NULL
+    next: [u16; WINDOW_SIZE],
+}
 
-        if found_offset >= input_pos - search_pos {
-            break;
+impl Window<'_> {
+    pub(crate) fn new(input: &[u8]) -> Window {
+        let mut hash = 0;
+        for &b in input.iter().take(MIN_MATCH) {
+            hash = update_hash(hash, b);
         }
 
-        while cur_size < search_size {
-            if data_in[cur_size + search_pos + found_offset] != data_in[cur_size + input_pos] {
-                break;
-            }
-            cur_size += 1;
+        Window {
+            input,
+            input_pos: 0,
+            hash_start: hash,
+            hash_end: hash,
+            head: [NULL; HASH_SIZE],
+            tail: [NULL; HASH_SIZE],
+            next: [NULL; WINDOW_SIZE],
         }
+    }
 
-        if search_size == cur_size {
-            return ((found_offset + search_pos) as u32, cur_size as u32);
+    // Advances the window by one byte, updating the hash chains.
+    pub(crate) fn advance(&mut self) {
+        if self.input_pos >= self.input.len() {
+            return;
         }
 
-        found_pos = (search_pos + found_offset) as isize;
-        search_pos = (found_pos + 1) as usize;
-        cur_size += 1;
-    }
+        // Remove the oldest byte from the hash chain
+        if self.input_pos >= WINDOW_SIZE {
+            let head = self.head[self.hash_start];
+            let next = self.next[head as usize];
 
-    (found_pos as u32, cmp::max(cur_size as isize - 1, 0) as u32)
-}
+            self.head[self.hash_start] = next;
+            if next == NULL {
+                self.tail[self.hash_start] = NULL;
+            }
+
+            self.hash_start = update_hash(
+                self.hash_start,
+                self.input[self.input_pos - WINDOW_SIZE + MIN_MATCH],
+            );
+        }
+
+        // Add the current byte to the hash chain
+        if self.input_pos + MIN_MATCH < self.input.len() {
+            let tail = self.tail[self.hash_end];
+            let pos = (self.input_pos & WINDOW_MASK) as u16;
 
-fn mischarsearch(pattern: &[u8], pattern_len: usize, data: &[u8], data_len: usize) -> usize {
-    let mut skip_table = [0u16; 256];
-    let mut i: isize;
+            self.next[pos as usize] = NULL;
+            self.tail[self.hash_end] = pos;
+            if tail == NULL {
+                self.head[self.hash_end] = pos;
+            } else {
+                self.next[tail as usize] = pos;
+            }
 
-    let mut v6: isize;
-    let mut j: isize;
+            self.hash_end = update_hash(self.hash_end, self.input[self.input_pos + MIN_MATCH]);
+        }
 
-    if pattern_len <= data_len {
-        initskip(pattern, pattern_len, &mut skip_table);
+        self.input_pos += 1;
+    }
 
-        i = pattern_len as isize - 1;
-        loop {
-            if pattern[pattern_len - 1] == data[i as usize] {
-                i -= 1;
-                j = pattern_len as isize - 2;
-                if j < 0 {
-                    return (i + 1) as usize;
-                }
+    // Move the window forward the input position, and seach the window back-to-front for a match
+    // at most `max_match_length` bytes long, returning the offset and length of the longest match found.
+    // Successive searches can only be performed at increasing input positions.
+    pub(crate) fn search(&mut self, input_pos: usize, max_match_length: usize) -> (u32, u32) {
+        if input_pos < self.input_pos {
+            panic!("window moved backwards");
+        } else if input_pos >= self.input.len() {
+            return (0, 0);
+        }
 
-                while pattern[j as usize] == data[i as usize] {
-                    i -= 1;
-                    j -= 1;
-                    if j < 0 {
-                        return (i + 1) as usize;
-                    }
-                }
+        let max_match_length = cmp::min(max_match_length, self.input.len() - input_pos);
+        if max_match_length < MIN_MATCH {
+            return (0, 0);
+        }
 
-                v6 = pattern_len as isize - j;
+        while self.input_pos < input_pos {
+            self.advance();
+        }
 
-                if skip_table[data[i as usize] as usize] as isize > v6 {
-                    v6 = skip_table[data[i as usize] as usize] as isize;
+        let mut pos = self.head[self.hash_end];
+        let mut best_len = MIN_MATCH - 1;
+        let mut best_offset = 0;
+
+        while pos != NULL {
+            // Figure out the current match offset from `pos` (which is equal to `match_offset & WINDOW_MASK`)
+            // using the fact that `1 <= input_pos - match_offset <= WINDOW_SIZE`
+            let match_offset =
+                input_pos - 1 - (input_pos.wrapping_sub(pos as usize + 1) & WINDOW_MASK);
+
+            if self.input[input_pos] == self.input[match_offset]
+                && self.input[input_pos + 1] == self.input[match_offset + 1]
+                && self.input[match_offset + best_len] == self.input[input_pos + best_len]
+            {
+                // The hash function guarantees that if the hashes are equal and
+                // the first two bytes match, the third byte will too
+                let candidate_len = 3 + longest_common_prefix(
+                    &self.input[input_pos + 3..input_pos + max_match_length],
+                    &self.input[match_offset + 3..match_offset + max_match_length],
+                );
+                if candidate_len > best_len {
+                    best_len = candidate_len;
+                    best_offset = match_offset;
+                    if best_len == max_match_length {
+                        break;
+                    }
                 }
-            } else {
-                v6 = skip_table[data[i as usize] as usize] as isize;
             }
-            i += v6;
-        }
-    }
-    data_len
-}
 
-fn initskip(pattern: &[u8], len: usize, skip: &mut [u16; 256]) {
-    skip.fill(len as u16);
+            pos = self.next[pos as usize];
+        }
 
-    for i in 0..len {
-        skip[pattern[i] as usize] = (len - i - 1) as u16;
+        (best_offset as u32, best_len as u32)
     }
 }
diff --git a/lib/src/yay0.rs b/lib/src/yay0.rs
@@ -97,6 +97,8 @@ fn size_for_compressed_buffer(input_size: usize) -> Result<usize, Crunch64Error>
 pub fn compress(bytes: &[u8]) -> Result<Box<[u8]>, Crunch64Error> {
     let input_size = bytes.len();
 
+    let mut window = utils::Window::new(bytes);
+
     let mut pp: usize = 0;
     let mut index_cur_layout_byte: usize = 0;
 
@@ -108,7 +110,7 @@ pub fn compress(bytes: &[u8]) -> Result<Box<[u8]>, Crunch64Error> {
     let mut cur_layout_bit: u32 = 0x80000000;
 
     while input_pos < input_size {
-        let (mut group_pos, mut group_size) = utils::search(input_pos, bytes, 0x111);
+        let (mut group_pos, mut group_size) = window.search(input_pos, 0x111);
 
         // If the group isn't larger than 2 bytes, copying the input without compression is smaller
         if group_size <= 2 {
@@ -118,7 +120,7 @@ pub fn compress(bytes: &[u8]) -> Result<Box<[u8]>, Crunch64Error> {
             input_pos += 1;
         } else {
             // Search for a new group after one position after the current one
-            let (new_position, new_size) = utils::search(input_pos + 1, bytes, 0x111);
+            let (new_position, new_size) = window.search(input_pos + 1, 0x111);
 
             // If the new group is better than the current group by at least 2 bytes, use it instead
             if new_size >= group_size + 2 {

diff --git a/lib/src/yaz0.rs b/lib/src/yaz0.rs
@@ -97,6 +97,7 @@ pub fn compress(bytes: &[u8]) -> Result<Box<[u8]>, Crunch64Error> {
     let input_size = bytes.len();
 
     let mut output: Vec<u8> = Vec::with_capacity(size_for_compressed_buffer(input_size)?);
+    let mut window = utils::Window::new(bytes);
 
     write_header(&mut output, input_size)?;
 
@@ -116,7 +117,7 @@ pub fn compress(bytes: &[u8]) -> Result<Box<[u8]>, Crunch64Error> {
             index_out_ptr += 1;
         }
 
-        let (mut group_pos, mut group_size) = utils::search(input_pos, bytes, 0x111);
+        let (mut group_pos, mut group_size) = window.search(input_pos, 0x111);
 
         // If the group isn't larger than 2 bytes, copying the input without compression is smaller
         if group_size <= 2 {
@@ -127,7 +128,7 @@ pub fn compress(bytes: &[u8]) -> Result<Box<[u8]>, Crunch64Error> {
             index_out_ptr += 1;
         } else {
             // Search for a new group after one position after the current one
-            let (new_position, new_size) = utils::search(input_pos + 1, bytes, 0x111);
+            let (new_position, new_size) = window.search(input_pos + 1, 0x111);
 
             // If the new group is better than the current group by at least 2 bytes, use it instead
             if new_size >= group_size + 2 {