From 3d3064185b79f41448bf36d08a1915acbdd9ca29 Mon Sep 17 00:00:00 2001
From: Gustavo Noronha Silva <gustavo@noronha.dev.br>
Date: Sun, 21 Dec 2025 11:28:30 -0300
Subject: [PATCH] cmp: stop allocating for byte printing
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This makes verbose comparison of 37MB completely different files 2.34x
faster than our own baseline, putting our cmp at almost 6x faster than
GNU cmp (/opt/homebrew/bin/cmp) on my M4 Pro Mac. The output remains
identical to that of GNU cmp. Mostly equal and smaller files do not
regress.

Benchmark 1: ./bin/baseline/diffutils cmp -lb t/huge t/eguh
  Time (mean ± σ):      1.669 s ±  0.011 s    [User: 1.594 s, System: 0.073 s]
  Range (min … max):    1.654 s …  1.689 s    10 runs

  Warning: Ignoring non-zero exit code.

Benchmark 2: ./target/release/diffutils cmp -lb t/huge t/eguh
  Time (mean ± σ):     714.2 ms ±   4.1 ms    [User: 629.3 ms, System: 82.7 ms]
  Range (min … max):   707.2 ms … 721.5 ms    10 runs

  Warning: Ignoring non-zero exit code.

Benchmark 3: /opt/homebrew/bin/cmp -lb t/huge t/eguh
  Time (mean ± σ):      4.213 s ±  0.050 s    [User: 4.128 s, System: 0.081 s]
  Range (min … max):    4.160 s …  4.316 s    10 runs

  Warning: Ignoring non-zero exit code.

Benchmark 4: /usr/bin/cmp -lb t/huge t/eguh
  Time (mean ± σ):      3.892 s ±  0.048 s    [User: 3.819 s, System: 0.070 s]
  Range (min … max):    3.808 s …  3.976 s    10 runs

  Warning: Ignoring non-zero exit code.

Summary
  ./target/release/diffutils cmp -lb t/huge t/eguh ran
    2.34 ± 0.02 times faster than ./bin/baseline/diffutils cmp -lb t/huge t/eguh
    5.45 ± 0.07 times faster than /usr/bin/cmp -lb t/huge t/eguh
    5.90 ± 0.08 times faster than /opt/homebrew/bin/cmp -lb t/huge t/eguh
---
 src/cmp.rs | 96 +++++++++++++++++++++++++++++++++---------------------
 1 file changed, 59 insertions(+), 37 deletions(-)
diff --git a/src/cmp.rs b/src/cmp.rs
index d53df4f..9d20b19 100644
--- a/src/cmp.rs
+++ b/src/cmp.rs
@@ -500,12 +500,6 @@ pub fn main(opts: Peekable<ArgsOs>) -> ExitCode {
     }
 }
 
-#[inline]
-fn is_ascii_printable(byte: u8) -> bool {
-    let c = byte as char;
-    c.is_ascii() && !c.is_ascii_control()
-}
-
 #[inline]
 fn format_octal(byte: u8, buf: &mut [u8; 3]) -> &str {
     *buf = [b' ', b' ', b'0'];
@@ -525,32 +519,67 @@ fn format_octal(byte: u8, buf: &mut [u8; 3]) -> &str {
 }
 
 #[inline]
-fn format_byte(byte: u8) -> String {
-    let mut byte = byte;
-    let mut quoted = vec![];
-
-    if !is_ascii_printable(byte) {
-        if byte >= 128 {
-            quoted.push(b'M');
-            quoted.push(b'-');
-            byte -= 128;
+fn write_visible_byte(output: &mut Vec<u8>, byte: u8) -> usize {
+    match byte {
+        // Control characters: ^@, ^A, ..., ^_
+        0..=31 => {
+            output.push(b'^');
+            output.push(byte + 64);
+            2
         }
-
-        if byte < 32 {
-            quoted.push(b'^');
-            byte += 64;
-        } else if byte == 127 {
-            quoted.push(b'^');
-            byte = b'?';
+        // Printable ASCII (space through ~)
+        32..=126 => {
+            output.push(byte);
+            1
+        }
+        // DEL: ^?
+        127 => {
+            output.extend_from_slice(b"^?");
+            2
+        }
+        // High bytes with control equivalents: M-^@, M-^A, ..., M-^_
+        128..=159 => {
+            output.push(b'M');
+            output.push(b'-');
+            output.push(b'^');
+            output.push(byte - 64);
+            4
+        }
+        // High bytes: M-<space>, M-!, ..., M-~
+        160..=254 => {
+            output.push(b'M');
+            output.push(b'-');
+            output.push(byte - 128);
+            3
+        }
+        // Byte 255: M-^?
+        255 => {
+            output.extend_from_slice(b"M-^?");
+            4
         }
-        assert!((byte as char).is_ascii());
     }
+}
 
-    quoted.push(byte);
+/// Writes a byte in visible form with right-padding to the specified width
+#[inline]
+fn write_visible_byte_padded(output: &mut Vec<u8>, byte: u8, width: usize) {
+    let display_width = write_visible_byte(output, byte);
 
-    // SAFETY: the checks and shifts we do above match what cat and GNU
+    // Add right-padding spaces
+    let padding = width.saturating_sub(display_width);
+    for _ in 0..padding {
+        output.push(b' ');
+    }
+}
+
+/// Formats a byte as a visible string (for non-performance-critical path)
+#[inline]
+fn format_visible_byte(byte: u8) -> String {
+    let mut result = Vec::with_capacity(4);
+    write_visible_byte(&mut result, byte);
+    // SAFETY: the checks and shifts in write_visible_byte match what cat and GNU
     // cmp do to ensure characters fall inside the ascii range.
-    unsafe { String::from_utf8_unchecked(quoted) }
+    unsafe { String::from_utf8_unchecked(result) }
 }
 
 // This function has been optimized to not use the Rust fmt system, which
@@ -588,14 +617,7 @@ fn format_verbose_difference(
 
         output.push(b' ');
 
-        let from_byte_str = format_byte(from_byte);
-        let from_byte_padding = 4 - from_byte_str.len();
-
-        output.extend_from_slice(from_byte_str.as_bytes());
-
-        for _ in 0..from_byte_padding {
-            output.push(b' ')
-        }
+        write_visible_byte_padded(output, from_byte, 4);
 
         output.push(b' ');
 
@@ -603,7 +625,7 @@ fn format_verbose_difference(
 
         output.push(b' ');
 
-        output.extend_from_slice(format_byte(to_byte).as_bytes());
+        write_visible_byte(output, to_byte);
 
         output.push(b'\n');
     } else {
@@ -706,9 +728,9 @@ fn report_difference(from_byte: u8, to_byte: u8, at_byte: usize, at_line: usize,
         print!(
             " is {:>3o} {:char_width$} {:>3o} {:char_width$}",
             from_byte,
-            format_byte(from_byte),
+            format_visible_byte(from_byte),
             to_byte,
-            format_byte(to_byte)
+            format_visible_byte(to_byte)
         );
     }
     println!();