Skip to content

Commit 66c38da

Browse files
committed
docs: investigation notes + reproducible benchmark
Investigation doc records the full arc: Wuffs audit, structural rewrite, experiments tried/failed, real-world corpora results. strategy_compare bench (zenbench): Classic vs Streaming across 8 workloads (random, palette, RLE, solid, photo-predicted; LSB + MSB TIFF). Self-contained, no external files needed. Run: cargo bench --bench strategy_compare
1 parent 95d20be commit 66c38da

3 files changed

Lines changed: 976 additions & 0 deletions

File tree

Cargo.toml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ features = ["std"]
2424

2525
[dev-dependencies]
2626
criterion = "0.3.1"
27+
zenbench = "0.1.3"
2728
[dev-dependencies.tokio]
2829
version = "1"
2930
default-features = false
@@ -60,6 +61,11 @@ name = "msb8"
6061
harness = false
6162
required-features = ["std"]
6263

64+
[[bench]]
65+
name = "strategy_compare"
66+
harness = false
67+
required-features = ["alloc"]
68+
6369
[[example]]
6470
name = "lzw-compress"
6571
required-features = ["std"]
@@ -88,5 +94,9 @@ required-features = ["std"]
8894
name = "end_of_buffer"
8995
required-features = ["alloc"]
9096

97+
[[test]]
98+
name = "streaming_parity"
99+
required-features = ["alloc"]
100+
91101
[package.metadata.docs.rs]
92102
all-features = true

benches/strategy_compare.rs

Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
//! Benchmark comparing Classic vs Streaming decode strategies across
2+
//! workload types. Run with: `cargo bench --bench strategy_compare`
3+
//!
4+
//! Generates LZW data inline — no external corpus needed.
5+
6+
use std::sync::Arc;
7+
use weezl::{
8+
decode::{Configuration, TableStrategy},
9+
encode::Encoder,
10+
BitOrder, LzwStatus,
11+
};
12+
use zenbench::prelude::*;
13+
14+
fn decode_all(
15+
encoded: &[u8],
16+
out: &mut [u8],
17+
order: BitOrder,
18+
tiff: bool,
19+
strategy: TableStrategy,
20+
) -> usize {
21+
let config = if tiff {
22+
Configuration::with_tiff_size_switch(order, 8)
23+
} else {
24+
Configuration::new(order, 8)
25+
};
26+
let mut dec = config.with_table_strategy(strategy).build();
27+
let mut inp = encoded;
28+
let mut cursor = out;
29+
let mut written = 0;
30+
loop {
31+
let r = dec.decode_bytes(inp, cursor);
32+
inp = &inp[r.consumed_in..];
33+
written += r.consumed_out;
34+
cursor = &mut std::mem::take(&mut cursor)[r.consumed_out..];
35+
match r.status {
36+
Ok(LzwStatus::Done | LzwStatus::NoProgress) => return written,
37+
Ok(LzwStatus::Ok) => {
38+
if inp.is_empty() && cursor.is_empty() {
39+
return written;
40+
}
41+
}
42+
Err(_) => return written,
43+
}
44+
}
45+
}
46+
47+
// --- Data generators ---
48+
49+
fn gen_random(len: usize, seed: u32) -> Vec<u8> {
50+
let mut state = seed | 1;
51+
(0..len)
52+
.map(|_| {
53+
state ^= state << 13;
54+
state ^= state >> 17;
55+
state ^= state << 5;
56+
(state >> 8) as u8
57+
})
58+
.collect()
59+
}
60+
61+
fn gen_palette16(len: usize) -> Vec<u8> {
62+
(0..len).map(|i| (i % 16) as u8).collect()
63+
}
64+
65+
fn gen_rle(len: usize) -> Vec<u8> {
66+
let mut out = Vec::with_capacity(len);
67+
let mut val = 0u8;
68+
while out.len() < len {
69+
let run = 32 + (val as usize % 33);
70+
for _ in 0..run.min(len - out.len()) {
71+
out.push(val);
72+
}
73+
val = val.wrapping_add(7);
74+
}
75+
out
76+
}
77+
78+
fn gen_solid(len: usize) -> Vec<u8> {
79+
vec![42u8; len]
80+
}
81+
82+
fn gen_photo_predicted(len: usize) -> Vec<u8> {
83+
let mut state = 0x12345678u32;
84+
(0..len)
85+
.map(|_| {
86+
state ^= state << 13;
87+
state ^= state >> 17;
88+
state ^= state << 5;
89+
let r = (state >> 8) as u8;
90+
if r < 200 {
91+
r & 0x0F
92+
} else {
93+
r
94+
}
95+
})
96+
.collect()
97+
}
98+
99+
struct Workload {
100+
name: &'static str,
101+
encoded: Arc<Vec<u8>>,
102+
decoded_size: usize,
103+
order: BitOrder,
104+
tiff: bool,
105+
}
106+
107+
fn make_workload(name: &'static str, data: &[u8], order: BitOrder, tiff: bool) -> Workload {
108+
let encoded = if tiff {
109+
Encoder::with_tiff_size_switch(order, 8)
110+
.encode(data)
111+
.unwrap()
112+
} else {
113+
Encoder::new(order, 8).encode(data).unwrap()
114+
};
115+
// Determine actual decoded size
116+
let mut scratch = vec![0u8; data.len() + 4096];
117+
let decoded_size = decode_all(&encoded, &mut scratch, order, tiff, TableStrategy::Classic);
118+
Workload {
119+
name,
120+
encoded: Arc::new(encoded),
121+
decoded_size,
122+
order,
123+
tiff,
124+
}
125+
}
126+
127+
fn bench_workload(g: &mut BenchGroup, w: &Workload) {
128+
g.throughput(Throughput::Bytes(w.decoded_size as u64));
129+
let out_cap = w.decoded_size + 4096;
130+
131+
for &(label, strategy) in &[
132+
("classic", TableStrategy::Classic),
133+
("streaming", TableStrategy::Streaming),
134+
] {
135+
let enc = Arc::clone(&w.encoded);
136+
let order = w.order;
137+
let tiff = w.tiff;
138+
g.bench(label, move |b| {
139+
let enc = Arc::clone(&enc);
140+
let mut out = vec![0u8; out_cap];
141+
b.iter(move || {
142+
let n = decode_all(&enc, &mut out, order, tiff, strategy);
143+
black_box(&out[..n]);
144+
n
145+
});
146+
});
147+
}
148+
}
149+
150+
fn bench_strategies(suite: &mut Suite) {
151+
let size = 256 * 1024; // 256 KiB
152+
153+
let random = gen_random(size, 0xDEADBEEF);
154+
let palette = gen_palette16(size);
155+
let rle = gen_rle(size);
156+
let solid = gen_solid(size);
157+
let photo = gen_photo_predicted(size);
158+
159+
let workloads = vec![
160+
// LSB (GIF-style)
161+
make_workload("random", &random, BitOrder::Lsb, false),
162+
make_workload("palette16", &palette, BitOrder::Lsb, false),
163+
make_workload("rle", &rle, BitOrder::Lsb, false),
164+
make_workload("solid", &solid, BitOrder::Lsb, false),
165+
// MSB + TIFF (image-tiff style)
166+
make_workload("random", &random, BitOrder::Msb, true),
167+
make_workload("photo-pred", &photo, BitOrder::Msb, true),
168+
make_workload("rle", &rle, BitOrder::Msb, true),
169+
make_workload("solid", &solid, BitOrder::Msb, true),
170+
];
171+
172+
for w in &workloads {
173+
let mode = if w.tiff { "msb-tiff" } else { "lsb" };
174+
let group_name = format!("{}/{}", mode, w.name);
175+
suite.group(group_name, |g| bench_workload(g, w));
176+
}
177+
}
178+
179+
zenbench::main!(bench_strategies);

0 commit comments

Comments
 (0)