diff --git a/CHANGELOG.md b/CHANGELOG.md index 8019efa..edc22fc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Fixed + +- Fix a bug where the gzip compressor may output incorrect data when emitting + "fixed blocks" (which are emitted when compressing high-entropy data). + ## [0.5.0] - 2024-06-04 ### Added diff --git a/lib/src/gzip.rs b/lib/src/gzip.rs index 628f1ef..ac34a09 100644 --- a/lib/src/gzip.rs +++ b/lib/src/gzip.rs @@ -1022,16 +1022,34 @@ pub fn compress(bytes: &[u8], level: usize, small_mem: bool) -> Result pos += prev_match_len - 1; lookahead -= prev_match_len - 1; + if should_flush { + if pos >= block_length { + writer.flush_block(&mut output, Some(&window[pos - block_length..pos]), false); + } else { + writer.flush_block(&mut output, None, false); + } + block_length = 0; + } + has_prev_char = false; prev_match_len = MIN_MATCH - 1; prev_match_dist = 0; } else { - // Remember current match and emit previous character as literal if it exists + // Emit previous character as literal (if it exists) and remember current match if has_prev_char { writer.add_literal(window[pos - 1]); should_flush = writer.should_flush_block(block_length); } + if should_flush { + if pos >= block_length { + writer.flush_block(&mut output, Some(&window[pos - block_length..pos]), false); + } else { + writer.flush_block(&mut output, None, false); + } + block_length = 0; + } + block_length += 1; pos += 1; lookahead -= 1; @@ -1041,15 +1059,6 @@ pub fn compress(bytes: &[u8], level: usize, small_mem: bool) -> Result prev_match_dist = pos - 1 - best_pos; } - if should_flush { - if pos >= block_length { - writer.flush_block(&mut output, Some(&window[pos - block_length..pos]), false); - } else { - writer.flush_block(&mut output, None, false); - } - block_length = 0; - } - // Refill window if lookahead < MIN_LOOKAHEAD && !eof && pos >= WINDOW_SIZE + MAX_DIST { window.copy_within(WINDOW_SIZE..2 * WINDOW_SIZE, 0); diff --git a/test_data/dirt.png.gzip-6-small-mem b/test_data/dirt.png.gzip-6-small-mem index 96e67dc..9c548aa 100644 Binary files a/test_data/dirt.png.gzip-6-small-mem and b/test_data/dirt.png.gzip-6-small-mem differ diff --git a/test_data/dirt.png.gzip-9 b/test_data/dirt.png.gzip-9 index 7ac995c..2283bd4 100644 Binary files a/test_data/dirt.png.gzip-9 and b/test_data/dirt.png.gzip-9 differ diff --git a/test_data/dirt.png.gzip-9-small-mem b/test_data/dirt.png.gzip-9-small-mem index 96e67dc..9c548aa 100644 Binary files a/test_data/dirt.png.gzip-9-small-mem and b/test_data/dirt.png.gzip-9-small-mem differ diff --git a/test_data/ground.png.gzip-6-small-mem b/test_data/ground.png.gzip-6-small-mem index 30b61cb..8c94729 100644 Binary files a/test_data/ground.png.gzip-6-small-mem and b/test_data/ground.png.gzip-6-small-mem differ diff --git a/test_data/ground.png.gzip-9 b/test_data/ground.png.gzip-9 index bea320f..9a97b2c 100644 Binary files a/test_data/ground.png.gzip-9 and b/test_data/ground.png.gzip-9 differ diff --git a/test_data/ground.png.gzip-9-small-mem b/test_data/ground.png.gzip-9-small-mem index 30b61cb..8c94729 100644 Binary files a/test_data/ground.png.gzip-9-small-mem and b/test_data/ground.png.gzip-9-small-mem differ diff --git a/test_data/stones.png.gzip-6-small-mem b/test_data/stones.png.gzip-6-small-mem index d5f4d43..513851b 100644 Binary files a/test_data/stones.png.gzip-6-small-mem and b/test_data/stones.png.gzip-6-small-mem differ diff --git a/test_data/stones.png.gzip-9 b/test_data/stones.png.gzip-9 index 8cb149c..527868f 100644 Binary files a/test_data/stones.png.gzip-9 and b/test_data/stones.png.gzip-9 differ diff --git a/test_data/stones.png.gzip-9-small-mem b/test_data/stones.png.gzip-9-small-mem index d5f4d43..513851b 100644 Binary files a/test_data/stones.png.gzip-9-small-mem and b/test_data/stones.png.gzip-9-small-mem differ diff --git a/test_data/tile.png.gzip-6-small-mem b/test_data/tile.png.gzip-6-small-mem index dc03913..e48aac5 100644 Binary files a/test_data/tile.png.gzip-6-small-mem and b/test_data/tile.png.gzip-6-small-mem differ diff --git a/test_data/tile.png.gzip-9 b/test_data/tile.png.gzip-9 index 3b7c483..1dc1d69 100644 Binary files a/test_data/tile.png.gzip-9 and b/test_data/tile.png.gzip-9 differ diff --git a/test_data/tile.png.gzip-9-small-mem b/test_data/tile.png.gzip-9-small-mem index dc03913..e48aac5 100644 Binary files a/test_data/tile.png.gzip-9-small-mem and b/test_data/tile.png.gzip-9-small-mem differ