Skip to main content

max / makenotwork

33.8 KB · 881 lines History Blame Raw
1 //! Layer 3: Archive / compression-bomb safety checks.
2 //!
3 //! Two families of check:
4 //! 1. **ZIP archives** — inspected for excessive compression ratios, deeply
5 //! nested archives, path-traversal entry names, and unreasonable
6 //! uncompressed sizes. ZIPs are detected both by the offset-0 local-file
7 //! header AND by an end-of-central-directory scan, so a prefixed / self-
8 //! extracting ZIP (a stub prepended to the archive) can't slip past.
9 //! 2. **Single-stream compressors** — gzip, bzip2, xz, and zstd. These are
10 //! the common standalone decompression-bomb vectors (`.gz`, `.tar.gz`,
11 //! `.bz2`, `.xz`, `.zst`). The stream is decompressed and the produced
12 //! bytes are counted against the same size + ratio caps as ZIP, with an
13 //! early exit so a bomb is rejected after ~`MAX_RATIO`× its input rather
14 //! than fully expanded.
15 //!
16 //! Formats we cannot introspect in-process (7z, RAR — container formats with
17 //! no lightweight pure-checker) are NOT bomb-checked here; they fall through to
18 //! ClamAV. A raw (uncompressed) tar carries no decompression amplification, so
19 //! a tar bomb only matters as `.tar.gz`, which the gzip path already covers.
20
21 use std::io::{Cursor, Read};
22
23 use crate::constants;
24 use crate::storage::FileType;
25
26 use super::{ErrorPolicy, LayerResult, LayerVerdict};
27
28 /// In-process deterministic layer. Parser / decompression errors fail closed
29 /// because they indicate either a corrupt archive or an evasion attempt — both
30 /// warrant a human look rather than an automatic pass.
31 pub const ERROR_POLICY: ErrorPolicy = ErrorPolicy::FailClosed;
32
33 /// A recognized compressed/archive container we know how to inspect.
34 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
35 enum ArchiveKind {
36 Zip,
37 Gzip,
38 Bzip2,
39 Xz,
40 Zstd,
41 }
42
43 impl ArchiveKind {
44 fn label(self) -> &'static str {
45 match self {
46 ArchiveKind::Zip => "ZIP",
47 ArchiveKind::Gzip => "gzip",
48 ArchiveKind::Bzip2 => "bzip2",
49 ArchiveKind::Xz => "xz",
50 ArchiveKind::Zstd => "zstd",
51 }
52 }
53 }
54
55 /// Classify by leading magic bytes. ZIP is handled separately (it can be
56 /// detected by a trailing end-of-central-directory record too), so this only
57 /// reports the single-stream compressors plus the offset-0 ZIP fast path.
58 fn detect_kind(magic: &[u8]) -> Option<ArchiveKind> {
59 match magic {
60 [0x50, 0x4B, 0x03, 0x04, ..] => Some(ArchiveKind::Zip),
61 [0x1F, 0x8B, ..] => Some(ArchiveKind::Gzip),
62 [0x42, 0x5A, 0x68, ..] => Some(ArchiveKind::Bzip2), // "BZh"
63 [0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00, ..] => Some(ArchiveKind::Xz),
64 [0x28, 0xB5, 0x2F, 0xFD, ..] => Some(ArchiveKind::Zstd),
65 _ => None,
66 }
67 }
68
69 /// ZIP end-of-central-directory signature (`PK\x05\x06`). A valid ZIP always
70 /// ends with this record (plus an optional trailing comment), even when bytes
71 /// are prepended ahead of the first local header (self-extracting archives).
72 /// Scanning the tail catches those prefixed ZIPs that `detect_kind` misses.
73 fn has_zip_eocd(data: &[u8]) -> bool {
74 const EOCD: [u8; 4] = [0x50, 0x4B, 0x05, 0x06];
75 // The EOCD sits within the last 22 bytes + up to 64 KiB of comment.
76 let window = 22 + u16::MAX as usize;
77 let start = data.len().saturating_sub(window);
78 data[start..]
79 .windows(EOCD.len())
80 .any(|w| w == EOCD)
81 }
82
83 /// Check a file for archive / decompression-bomb safety issues.
84 /// Runs regardless of claimed type so a disguised archive is still inspected.
85 pub fn check_archive_safety(data: &[u8], file_type: FileType) -> LayerResult {
86 // If an archive is disguised as a cover image, layer 1 (content_type)
87 // handles the type mismatch; skip the archive walk for covers.
88 if file_type == FileType::Cover {
89 return skip("Archive check skipped for cover images");
90 }
91
92 match detect_kind(data) {
93 Some(ArchiveKind::Zip) => inspect_zip(Cursor::new(data)),
94 Some(stream) => inspect_compressed_stream(stream, data.len() as u64, Cursor::new(data)),
95 None if has_zip_eocd(data) => {
96 // Prefixed / self-extracting ZIP: no offset-0 magic, but a real
97 // central directory at the tail. ZipArchive locates it from the end.
98 inspect_zip(Cursor::new(data))
99 }
100 None => skip("Not a recognized archive"),
101 }
102 }
103
104 /// Path-based entry. Opens the spooled file directly so we never have to
105 /// buffer the whole archive. File-type gating happens at the call site (same
106 /// shape as the buffered variant — caller already checked `file_type`).
107 pub fn check_archive_safety_path(path: &std::path::Path, file_type: FileType) -> LayerResult {
108 use std::io::{Seek, SeekFrom};
109
110 if file_type == FileType::Cover {
111 return skip("Archive check skipped for cover images");
112 }
113
114 let mut file = match std::fs::File::open(path) {
115 Ok(f) => f,
116 Err(e) => return error(format!("open spool {}: {e}", path.display())),
117 };
118
119 let mut magic = [0u8; 6];
120 let read = file.read(&mut magic).unwrap_or(0);
121 let kind = detect_kind(&magic[..read]);
122
123 if file.seek(SeekFrom::Start(0)).is_err() {
124 return error(format!("seek spool {}", path.display()));
125 }
126
127 match kind {
128 Some(ArchiveKind::Zip) => inspect_zip(file),
129 Some(stream) => {
130 let compressed_size = std::fs::metadata(path).map(|m| m.len()).unwrap_or(0);
131 inspect_compressed_stream(stream, compressed_size, file)
132 }
133 None => {
134 // Tail-scan for a prefixed-ZIP central directory. Read up to the
135 // last 64 KiB + 22 bytes rather than the whole (possibly huge) file.
136 let len = std::fs::metadata(path).map(|m| m.len()).unwrap_or(0);
137 let window = 22 + u16::MAX as u64;
138 let start = len.saturating_sub(window);
139 let mut tail = Vec::new();
140 let is_zip = file.seek(SeekFrom::Start(start)).is_ok()
141 && file.read_to_end(&mut tail).is_ok()
142 && has_zip_eocd(&tail);
143 if is_zip {
144 if file.seek(SeekFrom::Start(0)).is_err() {
145 return error(format!("seek spool {}", path.display()));
146 }
147 inspect_zip(file)
148 } else {
149 skip("Not a recognized archive")
150 }
151 }
152 }
153 }
154
155 fn skip(detail: &str) -> LayerResult {
156 LayerResult { layer: "archive", verdict: LayerVerdict::Skip, detail: Some(detail.to_string()) }
157 }
158
159 fn error(detail: String) -> LayerResult {
160 LayerResult { layer: "archive", verdict: LayerVerdict::Error, detail: Some(detail) }
161 }
162
163 /// Decompress a single-stream compressor (gzip/bzip2/xz/zstd) and check the
164 /// produced byte count against the same absolute-size and ratio caps used for
165 /// ZIP entries. Decompression stops early: as soon as the output exceeds the
166 /// ratio cap (`compressed × MAX_RATIO`) or the absolute cap, it's a bomb — so
167 /// a 1 KiB bomb is rejected after expanding ~100 KiB, not gigabytes.
168 fn inspect_compressed_stream<R: Read>(kind: ArchiveKind, compressed_size: u64, reader: R) -> LayerResult {
169 let mut decoder: Box<dyn Read> = match kind {
170 // MultiGzDecoder/MultiBzDecoder so a multi-member stream can't hide
171 // additional expansion past the first member.
172 ArchiveKind::Gzip => Box::new(flate2::read::MultiGzDecoder::new(reader)),
173 ArchiveKind::Bzip2 => Box::new(bzip2::read::BzDecoder::new(reader)),
174 ArchiveKind::Xz => Box::new(xz2::read::XzDecoder::new(reader)),
175 ArchiveKind::Zstd => match zstd::stream::read::Decoder::new(reader) {
176 Ok(d) => Box::new(d),
177 Err(e) => return error(format!("zstd init failed: {e}")),
178 },
179 ArchiveKind::Zip => unreachable!("zip is handled by inspect_zip"),
180 };
181
182 let abs_limit = constants::SCAN_ZIP_MAX_UNCOMPRESSED;
183 // Ratio early-exit threshold. For a 0-byte compressed input fall back to
184 // the absolute cap only (ratio is undefined).
185 let ratio_limit = compressed_size.saturating_mul(constants::SCAN_ZIP_MAX_RATIO as u64);
186
187 let mut counted: u64 = 0;
188 let mut buf = [0u8; 8192];
189 loop {
190 match decoder.read(&mut buf) {
191 Ok(0) => break,
192 Ok(n) => {
193 counted += n as u64;
194 if counted > abs_limit {
195 return LayerResult {
196 layer: "archive",
197 verdict: LayerVerdict::Fail,
198 detail: Some(format!(
199 "{} stream decompresses past {} bytes (possible decompression bomb)",
200 kind.label(),
201 abs_limit
202 )),
203 };
204 }
205 if compressed_size > 0 && counted > ratio_limit {
206 return LayerResult {
207 layer: "archive",
208 verdict: LayerVerdict::Fail,
209 detail: Some(format!(
210 "{} compression ratio exceeds {:.0}x (possible decompression bomb)",
211 kind.label(),
212 constants::SCAN_ZIP_MAX_RATIO
213 )),
214 };
215 }
216 }
217 Err(e) => {
218 // A decode error mid-stream is suspicious (corrupt or crafted
219 // to truncate). Fail closed for admin review per ERROR_POLICY.
220 return error(format!("{} decode error: {e}", kind.label()));
221 }
222 }
223 }
224
225 LayerResult {
226 layer: "archive",
227 verdict: LayerVerdict::Pass,
228 detail: Some(format!(
229 "{} stream, {} bytes uncompressed ({:.1}x)",
230 kind.label(),
231 counted,
232 if compressed_size > 0 { counted as f64 / compressed_size as f64 } else { 0.0 }
233 )),
234 }
235 }
236
237 fn inspect_zip<R: std::io::Read + std::io::Seek>(reader: R) -> LayerResult {
238 let mut archive = match zip::ZipArchive::new(reader) {
239 Ok(a) => a,
240 Err(e) => {
241 return LayerResult {
242 layer: "archive",
243 verdict: LayerVerdict::Error,
244 detail: Some(format!("Failed to parse ZIP: {}", e)),
245 };
246 }
247 };
248
249 let mut total_compressed: u64 = 0;
250 let mut total_uncompressed: u64 = 0;
251 let mut nested_archives: u32 = 0;
252
253 for i in 0..archive.len() {
254 let entry = match archive.by_index_raw(i) {
255 Ok(e) => e,
256 Err(e) => {
257 return LayerResult {
258 layer: "archive",
259 verdict: LayerVerdict::Error,
260 detail: Some(format!("Failed to read ZIP entry {}: {}", i, e)),
261 };
262 }
263 };
264
265 let name = entry.name().to_string();
266
267 // Check for path traversal (literal, URL-encoded, and absolute paths)
268 let name_lower = name.to_ascii_lowercase();
269 if name.contains("../") || name.contains("..\\")
270 || name_lower.contains("%2e%2e")
271 || name.starts_with('/') || name.contains('\0')
272 {
273 return LayerResult {
274 layer: "archive",
275 verdict: LayerVerdict::Fail,
276 detail: Some(format!("Path traversal in entry: {}", name)),
277 };
278 }
279
280 total_compressed += entry.compressed_size();
281 let claimed_size = entry.size();
282 drop(entry);
283
284 // Use actual decompressed byte count instead of trusting the claimed size
285 // from the ZIP central directory (which is attacker-controlled).
286 // Also capture the first 8 bytes for nested archive magic detection,
287 // avoiding a second decompression pass.
288 let mut magic_bytes = [0u8; 8];
289 let mut magic_len = 0usize;
290 let actual_size = match archive.by_index(i) {
291 Ok(mut reader) => {
292 let mut counted: u64 = 0;
293 let mut buf = [0u8; 8192];
294 let limit = constants::SCAN_ZIP_MAX_UNCOMPRESSED;
295 loop {
296 match std::io::Read::read(&mut reader, &mut buf) {
297 Ok(0) => break,
298 Ok(n) => {
299 // Capture first 8 bytes for magic detection
300 if magic_len < 8 {
301 let copy = n.min(8 - magic_len);
302 magic_bytes[magic_len..magic_len + copy].copy_from_slice(&buf[..copy]);
303 magic_len += copy;
304 }
305 counted += n as u64;
306 if counted > limit {
307 return LayerResult {
308 layer: "archive",
309 verdict: LayerVerdict::Fail,
310 detail: Some(format!(
311 "Actual decompressed size exceeds {} bytes (possible ZIP bomb)",
312 limit
313 )),
314 };
315 }
316 }
317 Err(_) => {
318 // Decompression errors are suspicious — a crafted deflate
319 // stream can break here to underreport size and bypass the
320 // ratio check. Use a conservative multiplier instead of
321 // trusting the attacker-controlled claimed_size.
322 counted = claimed_size.saturating_mul(10).max(1024 * 1024);
323 break;
324 }
325 }
326 }
327 counted
328 }
329 // If we can't open the entry at all, use conservative estimate
330 Err(_) => claimed_size.saturating_mul(10).max(1024 * 1024),
331 };
332 total_uncompressed += actual_size;
333
334 // Check for nested archives — extension check first, then magic bytes
335 // from the first decompression pass (no re-read needed).
336 let lower_name = name.to_lowercase();
337 let ext_match = lower_name.ends_with(".zip")
338 || lower_name.ends_with(".tar.gz")
339 || lower_name.ends_with(".tgz")
340 || lower_name.ends_with(".7z")
341 || lower_name.ends_with(".rar")
342 || lower_name.ends_with(".tar");
343 if ext_match {
344 nested_archives += 1;
345 } else if actual_size > 0 && magic_len >= 4 {
346 let is_nested = matches!(
347 magic_bytes,
348 [0x50, 0x4B, 0x03, 0x04, ..] // ZIP
349 | [0x1F, 0x8B, ..] // gzip (tar.gz)
350 | [0x37, 0x7A, 0xBC, 0xAF, 0x27, 0x1C, ..] // 7z
351 | [0x52, 0x61, 0x72, 0x21, ..] // RAR
352 );
353 if is_nested {
354 nested_archives += 1;
355 }
356 }
357 }
358
359 // Check total uncompressed size
360 if total_uncompressed > constants::SCAN_ZIP_MAX_UNCOMPRESSED {
361 return LayerResult {
362 layer: "archive",
363 verdict: LayerVerdict::Fail,
364 detail: Some(format!(
365 "Total uncompressed size {} bytes exceeds limit of {} bytes",
366 total_uncompressed,
367 constants::SCAN_ZIP_MAX_UNCOMPRESSED
368 )),
369 };
370 }
371
372 // Check compression ratio (ZIP bomb detection)
373 if total_compressed > 0 {
374 let ratio = total_uncompressed as f64 / total_compressed as f64;
375 if ratio > constants::SCAN_ZIP_MAX_RATIO {
376 return LayerResult {
377 layer: "archive",
378 verdict: LayerVerdict::Fail,
379 detail: Some(format!(
380 "Compression ratio {:.1}x exceeds limit of {:.0}x (possible ZIP bomb)",
381 ratio,
382 constants::SCAN_ZIP_MAX_RATIO
383 )),
384 };
385 }
386 }
387
388 // Check nesting depth
389 if nested_archives > constants::SCAN_ZIP_MAX_DEPTH {
390 return LayerResult {
391 layer: "archive",
392 verdict: LayerVerdict::Fail,
393 detail: Some(format!(
394 "Contains {} nested archives (limit: {})",
395 nested_archives,
396 constants::SCAN_ZIP_MAX_DEPTH
397 )),
398 };
399 }
400
401 LayerResult {
402 layer: "archive",
403 verdict: LayerVerdict::Pass,
404 detail: Some(format!(
405 "{} entries, {:.1}x ratio",
406 archive.len(),
407 if total_compressed > 0 {
408 total_uncompressed as f64 / total_compressed as f64
409 } else {
410 0.0
411 }
412 )),
413 }
414 }
415
416 #[cfg(test)]
417 mod tests {
418 use super::*;
419 use zip::write::SimpleFileOptions;
420
421 fn make_zip(entries: &[(&str, &[u8])]) -> Vec<u8> {
422 let buf = Vec::new();
423 let cursor = Cursor::new(buf);
424 let mut writer = zip::ZipWriter::new(cursor);
425 let options =
426 SimpleFileOptions::default().compression_method(zip::CompressionMethod::Stored);
427 for (name, data) in entries {
428 writer.start_file(*name, options).unwrap();
429 std::io::Write::write_all(&mut writer, data).unwrap();
430 }
431 writer.finish().unwrap().into_inner()
432 }
433
434 fn make_compressed_zip(entries: &[(&str, &[u8])]) -> Vec<u8> {
435 let buf = Vec::new();
436 let cursor = Cursor::new(buf);
437 let mut writer = zip::ZipWriter::new(cursor);
438 let options =
439 SimpleFileOptions::default().compression_method(zip::CompressionMethod::Deflated);
440 for (name, data) in entries {
441 writer.start_file(*name, options).unwrap();
442 std::io::Write::write_all(&mut writer, data).unwrap();
443 }
444 writer.finish().unwrap().into_inner()
445 }
446
447 // -- Skip behavior --
448
449 #[test]
450 fn non_zip_skipped() {
451 let result = check_archive_safety(b"not a zip file", FileType::Download);
452 assert_eq!(result.verdict, LayerVerdict::Skip);
453 }
454
455 #[test]
456 fn audio_non_zip_skipped() {
457 let result = check_archive_safety(b"audio data", FileType::Audio);
458 assert_eq!(result.verdict, LayerVerdict::Skip);
459 }
460
461 #[test]
462 fn cover_zip_skipped() {
463 // A ZIP file claimed as cover should be skipped (layer 1 handles type mismatch)
464 let data = make_zip(&[("test.txt", b"hello")]);
465 let result = check_archive_safety(&data, FileType::Cover);
466 assert_eq!(result.verdict, LayerVerdict::Skip);
467 }
468
469 // -- Valid archives --
470
471 #[test]
472 fn valid_zip_passes() {
473 let data = make_zip(&[("test.txt", b"hello world")]);
474 let result = check_archive_safety(&data, FileType::Download);
475 assert_eq!(result.verdict, LayerVerdict::Pass);
476 }
477
478 #[test]
479 fn empty_zip_passes() {
480 let buf = Vec::new();
481 let cursor = Cursor::new(buf);
482 let writer = zip::ZipWriter::new(cursor);
483 let data = writer.finish().unwrap().into_inner();
484 // Empty ZIPs may not have the PK magic at offset 0, they'd just be
485 // an end-of-central-directory record. If it doesn't start with PK 03 04,
486 // we'll skip it. That's fine.
487 let result = check_archive_safety(&data, FileType::Download);
488 // Either Skip (no local file header) or Pass (valid empty ZIP)
489 assert!(
490 result.verdict == LayerVerdict::Skip || result.verdict == LayerVerdict::Pass,
491 "unexpected verdict: {:?}",
492 result.verdict
493 );
494 }
495
496 #[test]
497 fn multi_entry_zip_passes() {
498 let data = make_zip(&[
499 ("file1.txt", b"content one"),
500 ("subdir/file2.txt", b"content two"),
501 ("readme.md", b"# hello"),
502 ]);
503 let result = check_archive_safety(&data, FileType::Download);
504 assert_eq!(result.verdict, LayerVerdict::Pass);
505 assert!(result.detail.unwrap().contains("3 entries"));
506 }
507
508 // -- Path traversal --
509
510 #[test]
511 fn zip_with_forward_slash_traversal_fails() {
512 let data = make_zip(&[("../../../etc/passwd", b"pwned")]);
513 let result = check_archive_safety(&data, FileType::Download);
514 assert_eq!(result.verdict, LayerVerdict::Fail);
515 assert!(result.detail.unwrap().contains("Path traversal"));
516 }
517
518 #[test]
519 fn zip_with_backslash_traversal_fails() {
520 let data = make_zip(&[("..\\..\\Windows\\System32\\config", b"pwned")]);
521 let result = check_archive_safety(&data, FileType::Download);
522 assert_eq!(result.verdict, LayerVerdict::Fail);
523 assert!(result.detail.unwrap().contains("Path traversal"));
524 }
525
526 #[test]
527 fn zip_with_mid_path_traversal_fails() {
528 let data = make_zip(&[("safe/../../etc/passwd", b"pwned")]);
529 let result = check_archive_safety(&data, FileType::Download);
530 assert_eq!(result.verdict, LayerVerdict::Fail);
531 }
532
533 #[test]
534 fn zip_with_url_encoded_traversal_fails() {
535 // %2e%2e is URL-encoded "..". The check is case-insensitive on the encoding.
536 let data = make_zip(&[("%2E%2E/secrets", b"pwned")]);
537 let result = check_archive_safety(&data, FileType::Download);
538 assert_eq!(result.verdict, LayerVerdict::Fail);
539 assert!(result.detail.unwrap().contains("Path traversal"));
540 }
541
542 #[test]
543 fn zip_with_absolute_path_fails() {
544 let data = make_zip(&[("/etc/passwd", b"pwned")]);
545 let result = check_archive_safety(&data, FileType::Download);
546 assert_eq!(result.verdict, LayerVerdict::Fail);
547 assert!(result.detail.unwrap().contains("Path traversal"));
548 }
549
550 #[test]
551 fn zip_with_null_byte_in_name_fails() {
552 let data = make_zip(&[("legit.txt\0../escape", b"pwned")]);
553 let result = check_archive_safety(&data, FileType::Download);
554 assert_eq!(result.verdict, LayerVerdict::Fail);
555 assert!(result.detail.unwrap().contains("Path traversal"));
556 }
557
558 // -- Nesting detection --
559
560 #[test]
561 fn zip_within_nesting_limit_passes() {
562 // SCAN_ZIP_MAX_DEPTH = 2; the check is `nested > limit`, so 2 entries
563 // with archive extensions sit exactly at the limit and must pass.
564 let data = make_zip(&[
565 ("data.txt", b"content"),
566 ("inner1.zip", b"fake zip content"),
567 ("inner2.zip", b"fake zip content"),
568 ]);
569 let result = check_archive_safety(&data, FileType::Download);
570 assert_eq!(result.verdict, LayerVerdict::Pass);
571 }
572
573 #[test]
574 fn zip_exceeding_nesting_limit_fails() {
575 // SCAN_ZIP_MAX_DEPTH = 2; 3 nested archives trips the limit.
576 let data = make_zip(&[
577 ("inner1.zip", b"fake"),
578 ("inner2.zip", b"fake"),
579 ("inner3.zip", b"fake"),
580 ]);
581 let result = check_archive_safety(&data, FileType::Download);
582 assert_eq!(result.verdict, LayerVerdict::Fail);
583 assert!(result.detail.unwrap().contains("nested archives"));
584 }
585
586 #[test]
587 fn nested_tar_gz_counts() {
588 let data = make_zip(&[
589 ("archive.tar.gz", b"fake"),
590 ("another.tar.gz", b"fake"),
591 ("third.tar.gz", b"fake"),
592 ("fourth.tar.gz", b"fake"),
593 ]);
594 let result = check_archive_safety(&data, FileType::Download);
595 assert_eq!(result.verdict, LayerVerdict::Fail);
596 }
597
598 #[test]
599 fn nested_7z_and_rar_count() {
600 let data = make_zip(&[
601 ("a.7z", b"fake"),
602 ("b.rar", b"fake"),
603 ("c.zip", b"fake"),
604 ("d.7z", b"fake"),
605 ]);
606 let result = check_archive_safety(&data, FileType::Download);
607 assert_eq!(result.verdict, LayerVerdict::Fail);
608 }
609
610 #[test]
611 fn nested_zip_detected_by_magic_without_extension() {
612 // Inner file has innocent name but contains real ZIP magic bytes.
613 // The extension check misses it; the magic-byte fallback must catch it.
614 let inner_zip = make_zip(&[("payload.txt", b"hi")]);
615 let data = make_zip(&[
616 ("a.bin", &inner_zip),
617 ("b.bin", &inner_zip),
618 ("c.bin", &inner_zip),
619 ]);
620 let result = check_archive_safety(&data, FileType::Download);
621 assert_eq!(result.verdict, LayerVerdict::Fail);
622 assert!(result.detail.unwrap().contains("nested archives"));
623 }
624
625 #[test]
626 fn nested_gzip_detected_by_magic_without_extension() {
627 // 1F 8B is gzip magic. No extension hint, must be caught by magic check.
628 let gzip_bytes: &[u8] = &[0x1F, 0x8B, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00];
629 let data = make_zip(&[
630 ("one.dat", gzip_bytes),
631 ("two.dat", gzip_bytes),
632 ("three.dat", gzip_bytes),
633 ]);
634 let result = check_archive_safety(&data, FileType::Download);
635 assert_eq!(result.verdict, LayerVerdict::Fail);
636 assert!(result.detail.unwrap().contains("nested archives"));
637 }
638
639 #[test]
640 fn nested_7z_detected_by_magic_without_extension() {
641 // 7z magic: 37 7A BC AF 27 1C
642 let sevenz_bytes: &[u8] = &[0x37, 0x7A, 0xBC, 0xAF, 0x27, 0x1C, 0x00, 0x00];
643 let data = make_zip(&[
644 ("alpha.bin", sevenz_bytes),
645 ("beta.bin", sevenz_bytes),
646 ("gamma.bin", sevenz_bytes),
647 ]);
648 let result = check_archive_safety(&data, FileType::Download);
649 assert_eq!(result.verdict, LayerVerdict::Fail);
650 }
651
652 #[test]
653 fn nested_rar_detected_by_magic_without_extension() {
654 // RAR magic: 52 61 72 21 ("Rar!")
655 let rar_bytes: &[u8] = &[0x52, 0x61, 0x72, 0x21, 0x1A, 0x07, 0x00, 0x00];
656 let data = make_zip(&[
657 ("x.bin", rar_bytes),
658 ("y.bin", rar_bytes),
659 ("z.bin", rar_bytes),
660 ]);
661 let result = check_archive_safety(&data, FileType::Download);
662 assert_eq!(result.verdict, LayerVerdict::Fail);
663 }
664
665 #[test]
666 fn non_archive_extensions_ignored() {
667 let data = make_zip(&[
668 ("app.exe", b"binary"),
669 ("readme.txt", b"hello"),
670 ("image.png", b"pixels"),
671 ]);
672 let result = check_archive_safety(&data, FileType::Download);
673 assert_eq!(result.verdict, LayerVerdict::Pass);
674 }
675
676 // -- Compression ratio (ZIP bomb detection) --
677
678 #[test]
679 fn high_compression_ratio_fails() {
680 // Create highly compressible data: repeating zeros compress extremely well
681 // 1MB of zeros should compress to ~1KB with deflate, giving ratio ~1000x
682 let zeros = vec![0u8; 1024 * 1024];
683 let data = make_compressed_zip(&[("bomb.bin", &zeros)]);
684 let result = check_archive_safety(&data, FileType::Download);
685 assert_eq!(
686 result.verdict,
687 LayerVerdict::Fail,
688 "Expected Fail for high compression ratio, got: {:?}",
689 result.detail
690 );
691 assert!(result.detail.unwrap().contains("ZIP bomb"));
692 }
693
694 #[test]
695 fn normal_compression_ratio_passes() {
696 // Random-ish data doesn't compress well, ratio should be ~1x
697 let data_bytes: Vec<u8> = (0..10000).map(|i| (i * 37 + 13) as u8).collect();
698 let data = make_compressed_zip(&[("normal.bin", &data_bytes)]);
699 let result = check_archive_safety(&data, FileType::Download);
700 assert_eq!(result.verdict, LayerVerdict::Pass);
701 }
702
703 // -- Audio file with ZIP magic (disguised archive) --
704
705 #[test]
706 fn zip_disguised_as_audio_checked() {
707 // A ZIP file claimed as Audio should still be checked (not skipped)
708 let data = make_zip(&[("test.txt", b"hello")]);
709 let result = check_archive_safety(&data, FileType::Audio);
710 assert_eq!(result.verdict, LayerVerdict::Pass);
711 }
712
713 #[test]
714 fn zip_disguised_as_audio_with_traversal_fails() {
715 let data = make_zip(&[("../../../etc/passwd", b"pwned")]);
716 let result = check_archive_safety(&data, FileType::Audio);
717 assert_eq!(result.verdict, LayerVerdict::Fail);
718 }
719
720 // -- Corrupted ZIP --
721
722 #[test]
723 fn corrupted_zip_magic_returns_error() {
724 // Valid ZIP magic bytes but garbage after
725 let mut data = vec![0x50, 0x4B, 0x03, 0x04];
726 data.extend_from_slice(&[0xFF; 100]);
727 let result = check_archive_safety(&data, FileType::Download);
728 assert_eq!(result.verdict, LayerVerdict::Error);
729 assert!(result.detail.unwrap().contains("Failed to parse ZIP"));
730 }
731
732 #[test]
733 fn path_entry_matches_buffered_for_non_zip() {
734 let data = b"not a zip at all";
735 let buffered = check_archive_safety(data, FileType::Download);
736 let tmp = tempfile::NamedTempFile::new().unwrap();
737 std::fs::write(tmp.path(), data).unwrap();
738 let path_based = check_archive_safety_path(tmp.path(), FileType::Download);
739 assert_eq!(buffered.verdict, path_based.verdict);
740 assert_eq!(buffered.verdict, LayerVerdict::Skip);
741 }
742
743 #[test]
744 fn path_entry_matches_buffered_for_cover_skip() {
745 let mut data = vec![0x50, 0x4B, 0x03, 0x04];
746 data.extend_from_slice(&[0xFF; 100]);
747 let buffered = check_archive_safety(&data, FileType::Cover);
748 let tmp = tempfile::NamedTempFile::new().unwrap();
749 std::fs::write(tmp.path(), &data).unwrap();
750 let path_based = check_archive_safety_path(tmp.path(), FileType::Cover);
751 assert_eq!(buffered.verdict, path_based.verdict);
752 assert_eq!(buffered.verdict, LayerVerdict::Skip);
753 }
754
755 // -- Single-stream decompression bombs (gzip / bzip2 / xz / zstd) --
756
757 use std::io::Write;
758
759 fn gzip(data: &[u8]) -> Vec<u8> {
760 let mut e = flate2::write::GzEncoder::new(Vec::new(), flate2::Compression::best());
761 e.write_all(data).unwrap();
762 e.finish().unwrap()
763 }
764 fn bzip2_compress(data: &[u8]) -> Vec<u8> {
765 let mut e = bzip2::write::BzEncoder::new(Vec::new(), bzip2::Compression::new(9));
766 e.write_all(data).unwrap();
767 e.finish().unwrap()
768 }
769 fn xz(data: &[u8]) -> Vec<u8> {
770 let mut e = xz2::write::XzEncoder::new(Vec::new(), 9);
771 e.write_all(data).unwrap();
772 e.finish().unwrap()
773 }
774 fn zstd_compress(data: &[u8]) -> Vec<u8> {
775 zstd::encode_all(data, 19).unwrap()
776 }
777
778 /// 8 MiB of zeros — compresses to a tiny stream at a ratio far above the
779 /// 100x cap, the canonical decompression-bomb shape.
780 fn bomb_payload() -> Vec<u8> {
781 vec![0u8; 8 * 1024 * 1024]
782 }
783
784 /// Moderately-incompressible data: stays well under the ratio cap, so a
785 /// legitimate compressed download passes.
786 fn benign_payload() -> Vec<u8> {
787 (0..200_000u32).map(|i| (i.wrapping_mul(2654435761) >> 13) as u8).collect()
788 }
789
790 #[test]
791 fn gzip_bomb_fails() {
792 let data = gzip(&bomb_payload());
793 let result = check_archive_safety(&data, FileType::Download);
794 assert_eq!(result.verdict, LayerVerdict::Fail, "detail: {:?}", result.detail);
795 assert!(result.detail.unwrap().to_lowercase().contains("bomb"));
796 }
797
798 #[test]
799 fn benign_gzip_passes() {
800 let data = gzip(&benign_payload());
801 let result = check_archive_safety(&data, FileType::Download);
802 assert_eq!(result.verdict, LayerVerdict::Pass, "detail: {:?}", result.detail);
803 }
804
805 #[test]
806 fn bzip2_bomb_fails() {
807 let data = bzip2_compress(&bomb_payload());
808 let result = check_archive_safety(&data, FileType::Download);
809 assert_eq!(result.verdict, LayerVerdict::Fail, "detail: {:?}", result.detail);
810 }
811
812 #[test]
813 fn xz_bomb_fails() {
814 let data = xz(&bomb_payload());
815 let result = check_archive_safety(&data, FileType::Download);
816 assert_eq!(result.verdict, LayerVerdict::Fail, "detail: {:?}", result.detail);
817 }
818
819 #[test]
820 fn zstd_bomb_fails() {
821 let data = zstd_compress(&bomb_payload());
822 let result = check_archive_safety(&data, FileType::Download);
823 assert_eq!(result.verdict, LayerVerdict::Fail, "detail: {:?}", result.detail);
824 }
825
826 #[test]
827 fn gzip_bomb_caught_on_path_variant_too() {
828 let data = gzip(&bomb_payload());
829 let tmp = tempfile::NamedTempFile::new().unwrap();
830 std::fs::write(tmp.path(), &data).unwrap();
831 let result = check_archive_safety_path(tmp.path(), FileType::Download);
832 assert_eq!(result.verdict, LayerVerdict::Fail, "detail: {:?}", result.detail);
833 }
834
835 #[test]
836 fn gzip_bomb_skipped_for_cover() {
837 // Type mismatch is layer 1's job; the archive layer skips covers.
838 let data = gzip(&bomb_payload());
839 let result = check_archive_safety(&data, FileType::Cover);
840 assert_eq!(result.verdict, LayerVerdict::Skip);
841 }
842
843 // -- Prefixed / self-extracting ZIP (no offset-0 magic) --
844
845 #[test]
846 fn prefixed_zip_is_not_silently_skipped() {
847 // A real ZIP with arbitrary bytes prepended (the self-extracting-stub
848 // shape). It lacks the offset-0 PK\x03\x04 magic, so the old offset-0
849 // gate would Skip it. The tail EOCD scan must catch it and hand it to
850 // inspect_zip — the security property is that it is NOT Skipped.
851 let zip = make_zip(&[("readme.txt", b"hello")]);
852 let mut data = b"MZ\x90\x00 this is a self-extracting stub padding ".to_vec();
853 data.extend_from_slice(&zip);
854
855 let result = check_archive_safety(&data, FileType::Download);
856 assert_ne!(
857 result.verdict,
858 LayerVerdict::Skip,
859 "prefixed ZIP must be inspected, not skipped; got {:?}",
860 result.detail
861 );
862
863 // And on the path variant.
864 let tmp = tempfile::NamedTempFile::new().unwrap();
865 std::fs::write(tmp.path(), &data).unwrap();
866 let path_based = check_archive_safety_path(tmp.path(), FileType::Download);
867 assert_ne!(path_based.verdict, LayerVerdict::Skip, "detail: {:?}", path_based.detail);
868 }
869
870 #[test]
871 fn prefixed_zip_bomb_fails() {
872 // Prepend a stub to a high-ratio ZIP; it must still be caught.
873 let zeros = vec![0u8; 1024 * 1024];
874 let zip = make_compressed_zip(&[("bomb.bin", &zeros)]);
875 let mut data = b"self-extracting stub ".to_vec();
876 data.extend_from_slice(&zip);
877 let result = check_archive_safety(&data, FileType::Download);
878 assert_eq!(result.verdict, LayerVerdict::Fail, "detail: {:?}", result.detail);
879 }
880 }
881