//! Layer 3: Archive / compression-bomb safety checks. //! //! Two families of check: //! 1. **ZIP archives** — inspected for excessive compression ratios, deeply //! nested archives, path-traversal entry names, and unreasonable //! uncompressed sizes. ZIPs are detected both by the offset-0 local-file //! header AND by an end-of-central-directory scan, so a prefixed / self- //! extracting ZIP (a stub prepended to the archive) can't slip past. //! 2. **Single-stream compressors** — gzip, bzip2, xz, and zstd. These are //! the common standalone decompression-bomb vectors (`.gz`, `.tar.gz`, //! `.bz2`, `.xz`, `.zst`). The stream is decompressed and the produced //! bytes are counted against the same size + ratio caps as ZIP, with an //! early exit so a bomb is rejected after ~`MAX_RATIO`× its input rather //! than fully expanded. //! //! Formats we cannot introspect in-process (7z, RAR — container formats with //! no lightweight pure-checker) are NOT bomb-checked here; they fall through to //! ClamAV. A raw (uncompressed) tar carries no decompression amplification, so //! a tar bomb only matters as `.tar.gz`, which the gzip path already covers. use std::io::{Cursor, Read}; use crate::constants; use crate::storage::FileType; use super::{ErrorPolicy, LayerResult, LayerVerdict}; /// In-process deterministic layer. Parser / decompression errors fail closed /// because they indicate either a corrupt archive or an evasion attempt — both /// warrant a human look rather than an automatic pass. pub const ERROR_POLICY: ErrorPolicy = ErrorPolicy::FailClosed; /// A recognized compressed/archive container we know how to inspect. #[derive(Debug, Clone, Copy, PartialEq, Eq)] enum ArchiveKind { Zip, Gzip, Bzip2, Xz, Zstd, } impl ArchiveKind { fn label(self) -> &'static str { match self { ArchiveKind::Zip => "ZIP", ArchiveKind::Gzip => "gzip", ArchiveKind::Bzip2 => "bzip2", ArchiveKind::Xz => "xz", ArchiveKind::Zstd => "zstd", } } } /// Classify by leading magic bytes. ZIP is handled separately (it can be /// detected by a trailing end-of-central-directory record too), so this only /// reports the single-stream compressors plus the offset-0 ZIP fast path. fn detect_kind(magic: &[u8]) -> Option { match magic { [0x50, 0x4B, 0x03, 0x04, ..] => Some(ArchiveKind::Zip), [0x1F, 0x8B, ..] => Some(ArchiveKind::Gzip), [0x42, 0x5A, 0x68, ..] => Some(ArchiveKind::Bzip2), // "BZh" [0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00, ..] => Some(ArchiveKind::Xz), [0x28, 0xB5, 0x2F, 0xFD, ..] => Some(ArchiveKind::Zstd), _ => None, } } /// ZIP end-of-central-directory signature (`PK\x05\x06`). A valid ZIP always /// ends with this record (plus an optional trailing comment), even when bytes /// are prepended ahead of the first local header (self-extracting archives). /// Scanning the tail catches those prefixed ZIPs that `detect_kind` misses. fn has_zip_eocd(data: &[u8]) -> bool { const EOCD: [u8; 4] = [0x50, 0x4B, 0x05, 0x06]; // The EOCD sits within the last 22 bytes + up to 64 KiB of comment. let window = 22 + u16::MAX as usize; let start = data.len().saturating_sub(window); data[start..] .windows(EOCD.len()) .any(|w| w == EOCD) } /// Check a file for archive / decompression-bomb safety issues. /// Runs regardless of claimed type so a disguised archive is still inspected. pub fn check_archive_safety(data: &[u8], file_type: FileType) -> LayerResult { // If an archive is disguised as a cover image, layer 1 (content_type) // handles the type mismatch; skip the archive walk for covers. if file_type == FileType::Cover { return skip("Archive check skipped for cover images"); } match detect_kind(data) { Some(ArchiveKind::Zip) => inspect_zip(Cursor::new(data)), Some(stream) => inspect_compressed_stream(stream, data.len() as u64, Cursor::new(data)), None if has_zip_eocd(data) => { // Prefixed / self-extracting ZIP: no offset-0 magic, but a real // central directory at the tail. ZipArchive locates it from the end. inspect_zip(Cursor::new(data)) } None => skip("Not a recognized archive"), } } /// Path-based entry. Opens the spooled file directly so we never have to /// buffer the whole archive. File-type gating happens at the call site (same /// shape as the buffered variant — caller already checked `file_type`). pub fn check_archive_safety_path(path: &std::path::Path, file_type: FileType) -> LayerResult { use std::io::{Seek, SeekFrom}; if file_type == FileType::Cover { return skip("Archive check skipped for cover images"); } let mut file = match std::fs::File::open(path) { Ok(f) => f, Err(e) => return error(format!("open spool {}: {e}", path.display())), }; let mut magic = [0u8; 6]; let read = file.read(&mut magic).unwrap_or(0); let kind = detect_kind(&magic[..read]); if file.seek(SeekFrom::Start(0)).is_err() { return error(format!("seek spool {}", path.display())); } match kind { Some(ArchiveKind::Zip) => inspect_zip(file), Some(stream) => { let compressed_size = std::fs::metadata(path).map(|m| m.len()).unwrap_or(0); inspect_compressed_stream(stream, compressed_size, file) } None => { // Tail-scan for a prefixed-ZIP central directory. Read up to the // last 64 KiB + 22 bytes rather than the whole (possibly huge) file. let len = std::fs::metadata(path).map(|m| m.len()).unwrap_or(0); let window = 22 + u16::MAX as u64; let start = len.saturating_sub(window); let mut tail = Vec::new(); let is_zip = file.seek(SeekFrom::Start(start)).is_ok() && file.read_to_end(&mut tail).is_ok() && has_zip_eocd(&tail); if is_zip { if file.seek(SeekFrom::Start(0)).is_err() { return error(format!("seek spool {}", path.display())); } inspect_zip(file) } else { skip("Not a recognized archive") } } } } fn skip(detail: &str) -> LayerResult { LayerResult { layer: "archive", verdict: LayerVerdict::Skip, detail: Some(detail.to_string()) } } fn error(detail: String) -> LayerResult { LayerResult { layer: "archive", verdict: LayerVerdict::Error, detail: Some(detail) } } /// Decompress a single-stream compressor (gzip/bzip2/xz/zstd) and check the /// produced byte count against the same absolute-size and ratio caps used for /// ZIP entries. Decompression stops early: as soon as the output exceeds the /// ratio cap (`compressed × MAX_RATIO`) or the absolute cap, it's a bomb — so /// a 1 KiB bomb is rejected after expanding ~100 KiB, not gigabytes. fn inspect_compressed_stream(kind: ArchiveKind, compressed_size: u64, reader: R) -> LayerResult { let mut decoder: Box = match kind { // MultiGzDecoder/MultiBzDecoder so a multi-member stream can't hide // additional expansion past the first member. ArchiveKind::Gzip => Box::new(flate2::read::MultiGzDecoder::new(reader)), ArchiveKind::Bzip2 => Box::new(bzip2::read::BzDecoder::new(reader)), ArchiveKind::Xz => Box::new(xz2::read::XzDecoder::new(reader)), ArchiveKind::Zstd => match zstd::stream::read::Decoder::new(reader) { Ok(d) => Box::new(d), Err(e) => return error(format!("zstd init failed: {e}")), }, ArchiveKind::Zip => unreachable!("zip is handled by inspect_zip"), }; let abs_limit = constants::SCAN_ZIP_MAX_UNCOMPRESSED; // Ratio early-exit threshold. For a 0-byte compressed input fall back to // the absolute cap only (ratio is undefined). let ratio_limit = compressed_size.saturating_mul(constants::SCAN_ZIP_MAX_RATIO as u64); let mut counted: u64 = 0; let mut buf = [0u8; 8192]; loop { match decoder.read(&mut buf) { Ok(0) => break, Ok(n) => { counted += n as u64; if counted > abs_limit { return LayerResult { layer: "archive", verdict: LayerVerdict::Fail, detail: Some(format!( "{} stream decompresses past {} bytes (possible decompression bomb)", kind.label(), abs_limit )), }; } if compressed_size > 0 && counted > ratio_limit { return LayerResult { layer: "archive", verdict: LayerVerdict::Fail, detail: Some(format!( "{} compression ratio exceeds {:.0}x (possible decompression bomb)", kind.label(), constants::SCAN_ZIP_MAX_RATIO )), }; } } Err(e) => { // A decode error mid-stream is suspicious (corrupt or crafted // to truncate). Fail closed for admin review per ERROR_POLICY. return error(format!("{} decode error: {e}", kind.label())); } } } LayerResult { layer: "archive", verdict: LayerVerdict::Pass, detail: Some(format!( "{} stream, {} bytes uncompressed ({:.1}x)", kind.label(), counted, if compressed_size > 0 { counted as f64 / compressed_size as f64 } else { 0.0 } )), } } fn inspect_zip(reader: R) -> LayerResult { let mut archive = match zip::ZipArchive::new(reader) { Ok(a) => a, Err(e) => { return LayerResult { layer: "archive", verdict: LayerVerdict::Error, detail: Some(format!("Failed to parse ZIP: {}", e)), }; } }; let mut total_compressed: u64 = 0; let mut total_uncompressed: u64 = 0; let mut nested_archives: u32 = 0; for i in 0..archive.len() { let entry = match archive.by_index_raw(i) { Ok(e) => e, Err(e) => { return LayerResult { layer: "archive", verdict: LayerVerdict::Error, detail: Some(format!("Failed to read ZIP entry {}: {}", i, e)), }; } }; let name = entry.name().to_string(); // Check for path traversal (literal, URL-encoded, and absolute paths) let name_lower = name.to_ascii_lowercase(); if name.contains("../") || name.contains("..\\") || name_lower.contains("%2e%2e") || name.starts_with('/') || name.contains('\0') { return LayerResult { layer: "archive", verdict: LayerVerdict::Fail, detail: Some(format!("Path traversal in entry: {}", name)), }; } total_compressed += entry.compressed_size(); let claimed_size = entry.size(); drop(entry); // Use actual decompressed byte count instead of trusting the claimed size // from the ZIP central directory (which is attacker-controlled). // Also capture the first 8 bytes for nested archive magic detection, // avoiding a second decompression pass. let mut magic_bytes = [0u8; 8]; let mut magic_len = 0usize; let actual_size = match archive.by_index(i) { Ok(mut reader) => { let mut counted: u64 = 0; let mut buf = [0u8; 8192]; let limit = constants::SCAN_ZIP_MAX_UNCOMPRESSED; loop { match std::io::Read::read(&mut reader, &mut buf) { Ok(0) => break, Ok(n) => { // Capture first 8 bytes for magic detection if magic_len < 8 { let copy = n.min(8 - magic_len); magic_bytes[magic_len..magic_len + copy].copy_from_slice(&buf[..copy]); magic_len += copy; } counted += n as u64; if counted > limit { return LayerResult { layer: "archive", verdict: LayerVerdict::Fail, detail: Some(format!( "Actual decompressed size exceeds {} bytes (possible ZIP bomb)", limit )), }; } } Err(_) => { // Decompression errors are suspicious — a crafted deflate // stream can break here to underreport size and bypass the // ratio check. Use a conservative multiplier instead of // trusting the attacker-controlled claimed_size. counted = claimed_size.saturating_mul(10).max(1024 * 1024); break; } } } counted } // If we can't open the entry at all, use conservative estimate Err(_) => claimed_size.saturating_mul(10).max(1024 * 1024), }; total_uncompressed += actual_size; // Check for nested archives — extension check first, then magic bytes // from the first decompression pass (no re-read needed). let lower_name = name.to_lowercase(); let ext_match = lower_name.ends_with(".zip") || lower_name.ends_with(".tar.gz") || lower_name.ends_with(".tgz") || lower_name.ends_with(".7z") || lower_name.ends_with(".rar") || lower_name.ends_with(".tar"); if ext_match { nested_archives += 1; } else if actual_size > 0 && magic_len >= 4 { let is_nested = matches!( magic_bytes, [0x50, 0x4B, 0x03, 0x04, ..] // ZIP | [0x1F, 0x8B, ..] // gzip (tar.gz) | [0x37, 0x7A, 0xBC, 0xAF, 0x27, 0x1C, ..] // 7z | [0x52, 0x61, 0x72, 0x21, ..] // RAR ); if is_nested { nested_archives += 1; } } } // Check total uncompressed size if total_uncompressed > constants::SCAN_ZIP_MAX_UNCOMPRESSED { return LayerResult { layer: "archive", verdict: LayerVerdict::Fail, detail: Some(format!( "Total uncompressed size {} bytes exceeds limit of {} bytes", total_uncompressed, constants::SCAN_ZIP_MAX_UNCOMPRESSED )), }; } // Check compression ratio (ZIP bomb detection) if total_compressed > 0 { let ratio = total_uncompressed as f64 / total_compressed as f64; if ratio > constants::SCAN_ZIP_MAX_RATIO { return LayerResult { layer: "archive", verdict: LayerVerdict::Fail, detail: Some(format!( "Compression ratio {:.1}x exceeds limit of {:.0}x (possible ZIP bomb)", ratio, constants::SCAN_ZIP_MAX_RATIO )), }; } } // Check nesting depth if nested_archives > constants::SCAN_ZIP_MAX_DEPTH { return LayerResult { layer: "archive", verdict: LayerVerdict::Fail, detail: Some(format!( "Contains {} nested archives (limit: {})", nested_archives, constants::SCAN_ZIP_MAX_DEPTH )), }; } LayerResult { layer: "archive", verdict: LayerVerdict::Pass, detail: Some(format!( "{} entries, {:.1}x ratio", archive.len(), if total_compressed > 0 { total_uncompressed as f64 / total_compressed as f64 } else { 0.0 } )), } } #[cfg(test)] mod tests { use super::*; use zip::write::SimpleFileOptions; fn make_zip(entries: &[(&str, &[u8])]) -> Vec { let buf = Vec::new(); let cursor = Cursor::new(buf); let mut writer = zip::ZipWriter::new(cursor); let options = SimpleFileOptions::default().compression_method(zip::CompressionMethod::Stored); for (name, data) in entries { writer.start_file(*name, options).unwrap(); std::io::Write::write_all(&mut writer, data).unwrap(); } writer.finish().unwrap().into_inner() } fn make_compressed_zip(entries: &[(&str, &[u8])]) -> Vec { let buf = Vec::new(); let cursor = Cursor::new(buf); let mut writer = zip::ZipWriter::new(cursor); let options = SimpleFileOptions::default().compression_method(zip::CompressionMethod::Deflated); for (name, data) in entries { writer.start_file(*name, options).unwrap(); std::io::Write::write_all(&mut writer, data).unwrap(); } writer.finish().unwrap().into_inner() } // -- Skip behavior -- #[test] fn non_zip_skipped() { let result = check_archive_safety(b"not a zip file", FileType::Download); assert_eq!(result.verdict, LayerVerdict::Skip); } #[test] fn audio_non_zip_skipped() { let result = check_archive_safety(b"audio data", FileType::Audio); assert_eq!(result.verdict, LayerVerdict::Skip); } #[test] fn cover_zip_skipped() { // A ZIP file claimed as cover should be skipped (layer 1 handles type mismatch) let data = make_zip(&[("test.txt", b"hello")]); let result = check_archive_safety(&data, FileType::Cover); assert_eq!(result.verdict, LayerVerdict::Skip); } // -- Valid archives -- #[test] fn valid_zip_passes() { let data = make_zip(&[("test.txt", b"hello world")]); let result = check_archive_safety(&data, FileType::Download); assert_eq!(result.verdict, LayerVerdict::Pass); } #[test] fn empty_zip_passes() { let buf = Vec::new(); let cursor = Cursor::new(buf); let writer = zip::ZipWriter::new(cursor); let data = writer.finish().unwrap().into_inner(); // Empty ZIPs may not have the PK magic at offset 0, they'd just be // an end-of-central-directory record. If it doesn't start with PK 03 04, // we'll skip it. That's fine. let result = check_archive_safety(&data, FileType::Download); // Either Skip (no local file header) or Pass (valid empty ZIP) assert!( result.verdict == LayerVerdict::Skip || result.verdict == LayerVerdict::Pass, "unexpected verdict: {:?}", result.verdict ); } #[test] fn multi_entry_zip_passes() { let data = make_zip(&[ ("file1.txt", b"content one"), ("subdir/file2.txt", b"content two"), ("readme.md", b"# hello"), ]); let result = check_archive_safety(&data, FileType::Download); assert_eq!(result.verdict, LayerVerdict::Pass); assert!(result.detail.unwrap().contains("3 entries")); } // -- Path traversal -- #[test] fn zip_with_forward_slash_traversal_fails() { let data = make_zip(&[("../../../etc/passwd", b"pwned")]); let result = check_archive_safety(&data, FileType::Download); assert_eq!(result.verdict, LayerVerdict::Fail); assert!(result.detail.unwrap().contains("Path traversal")); } #[test] fn zip_with_backslash_traversal_fails() { let data = make_zip(&[("..\\..\\Windows\\System32\\config", b"pwned")]); let result = check_archive_safety(&data, FileType::Download); assert_eq!(result.verdict, LayerVerdict::Fail); assert!(result.detail.unwrap().contains("Path traversal")); } #[test] fn zip_with_mid_path_traversal_fails() { let data = make_zip(&[("safe/../../etc/passwd", b"pwned")]); let result = check_archive_safety(&data, FileType::Download); assert_eq!(result.verdict, LayerVerdict::Fail); } #[test] fn zip_with_url_encoded_traversal_fails() { // %2e%2e is URL-encoded "..". The check is case-insensitive on the encoding. let data = make_zip(&[("%2E%2E/secrets", b"pwned")]); let result = check_archive_safety(&data, FileType::Download); assert_eq!(result.verdict, LayerVerdict::Fail); assert!(result.detail.unwrap().contains("Path traversal")); } #[test] fn zip_with_absolute_path_fails() { let data = make_zip(&[("/etc/passwd", b"pwned")]); let result = check_archive_safety(&data, FileType::Download); assert_eq!(result.verdict, LayerVerdict::Fail); assert!(result.detail.unwrap().contains("Path traversal")); } #[test] fn zip_with_null_byte_in_name_fails() { let data = make_zip(&[("legit.txt\0../escape", b"pwned")]); let result = check_archive_safety(&data, FileType::Download); assert_eq!(result.verdict, LayerVerdict::Fail); assert!(result.detail.unwrap().contains("Path traversal")); } // -- Nesting detection -- #[test] fn zip_within_nesting_limit_passes() { // SCAN_ZIP_MAX_DEPTH = 2; the check is `nested > limit`, so 2 entries // with archive extensions sit exactly at the limit and must pass. let data = make_zip(&[ ("data.txt", b"content"), ("inner1.zip", b"fake zip content"), ("inner2.zip", b"fake zip content"), ]); let result = check_archive_safety(&data, FileType::Download); assert_eq!(result.verdict, LayerVerdict::Pass); } #[test] fn zip_exceeding_nesting_limit_fails() { // SCAN_ZIP_MAX_DEPTH = 2; 3 nested archives trips the limit. let data = make_zip(&[ ("inner1.zip", b"fake"), ("inner2.zip", b"fake"), ("inner3.zip", b"fake"), ]); let result = check_archive_safety(&data, FileType::Download); assert_eq!(result.verdict, LayerVerdict::Fail); assert!(result.detail.unwrap().contains("nested archives")); } #[test] fn nested_tar_gz_counts() { let data = make_zip(&[ ("archive.tar.gz", b"fake"), ("another.tar.gz", b"fake"), ("third.tar.gz", b"fake"), ("fourth.tar.gz", b"fake"), ]); let result = check_archive_safety(&data, FileType::Download); assert_eq!(result.verdict, LayerVerdict::Fail); } #[test] fn nested_7z_and_rar_count() { let data = make_zip(&[ ("a.7z", b"fake"), ("b.rar", b"fake"), ("c.zip", b"fake"), ("d.7z", b"fake"), ]); let result = check_archive_safety(&data, FileType::Download); assert_eq!(result.verdict, LayerVerdict::Fail); } #[test] fn nested_zip_detected_by_magic_without_extension() { // Inner file has innocent name but contains real ZIP magic bytes. // The extension check misses it; the magic-byte fallback must catch it. let inner_zip = make_zip(&[("payload.txt", b"hi")]); let data = make_zip(&[ ("a.bin", &inner_zip), ("b.bin", &inner_zip), ("c.bin", &inner_zip), ]); let result = check_archive_safety(&data, FileType::Download); assert_eq!(result.verdict, LayerVerdict::Fail); assert!(result.detail.unwrap().contains("nested archives")); } #[test] fn nested_gzip_detected_by_magic_without_extension() { // 1F 8B is gzip magic. No extension hint, must be caught by magic check. let gzip_bytes: &[u8] = &[0x1F, 0x8B, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00]; let data = make_zip(&[ ("one.dat", gzip_bytes), ("two.dat", gzip_bytes), ("three.dat", gzip_bytes), ]); let result = check_archive_safety(&data, FileType::Download); assert_eq!(result.verdict, LayerVerdict::Fail); assert!(result.detail.unwrap().contains("nested archives")); } #[test] fn nested_7z_detected_by_magic_without_extension() { // 7z magic: 37 7A BC AF 27 1C let sevenz_bytes: &[u8] = &[0x37, 0x7A, 0xBC, 0xAF, 0x27, 0x1C, 0x00, 0x00]; let data = make_zip(&[ ("alpha.bin", sevenz_bytes), ("beta.bin", sevenz_bytes), ("gamma.bin", sevenz_bytes), ]); let result = check_archive_safety(&data, FileType::Download); assert_eq!(result.verdict, LayerVerdict::Fail); } #[test] fn nested_rar_detected_by_magic_without_extension() { // RAR magic: 52 61 72 21 ("Rar!") let rar_bytes: &[u8] = &[0x52, 0x61, 0x72, 0x21, 0x1A, 0x07, 0x00, 0x00]; let data = make_zip(&[ ("x.bin", rar_bytes), ("y.bin", rar_bytes), ("z.bin", rar_bytes), ]); let result = check_archive_safety(&data, FileType::Download); assert_eq!(result.verdict, LayerVerdict::Fail); } #[test] fn non_archive_extensions_ignored() { let data = make_zip(&[ ("app.exe", b"binary"), ("readme.txt", b"hello"), ("image.png", b"pixels"), ]); let result = check_archive_safety(&data, FileType::Download); assert_eq!(result.verdict, LayerVerdict::Pass); } // -- Compression ratio (ZIP bomb detection) -- #[test] fn high_compression_ratio_fails() { // Create highly compressible data: repeating zeros compress extremely well // 1MB of zeros should compress to ~1KB with deflate, giving ratio ~1000x let zeros = vec![0u8; 1024 * 1024]; let data = make_compressed_zip(&[("bomb.bin", &zeros)]); let result = check_archive_safety(&data, FileType::Download); assert_eq!( result.verdict, LayerVerdict::Fail, "Expected Fail for high compression ratio, got: {:?}", result.detail ); assert!(result.detail.unwrap().contains("ZIP bomb")); } #[test] fn normal_compression_ratio_passes() { // Random-ish data doesn't compress well, ratio should be ~1x let data_bytes: Vec = (0..10000).map(|i| (i * 37 + 13) as u8).collect(); let data = make_compressed_zip(&[("normal.bin", &data_bytes)]); let result = check_archive_safety(&data, FileType::Download); assert_eq!(result.verdict, LayerVerdict::Pass); } // -- Audio file with ZIP magic (disguised archive) -- #[test] fn zip_disguised_as_audio_checked() { // A ZIP file claimed as Audio should still be checked (not skipped) let data = make_zip(&[("test.txt", b"hello")]); let result = check_archive_safety(&data, FileType::Audio); assert_eq!(result.verdict, LayerVerdict::Pass); } #[test] fn zip_disguised_as_audio_with_traversal_fails() { let data = make_zip(&[("../../../etc/passwd", b"pwned")]); let result = check_archive_safety(&data, FileType::Audio); assert_eq!(result.verdict, LayerVerdict::Fail); } // -- Corrupted ZIP -- #[test] fn corrupted_zip_magic_returns_error() { // Valid ZIP magic bytes but garbage after let mut data = vec![0x50, 0x4B, 0x03, 0x04]; data.extend_from_slice(&[0xFF; 100]); let result = check_archive_safety(&data, FileType::Download); assert_eq!(result.verdict, LayerVerdict::Error); assert!(result.detail.unwrap().contains("Failed to parse ZIP")); } #[test] fn path_entry_matches_buffered_for_non_zip() { let data = b"not a zip at all"; let buffered = check_archive_safety(data, FileType::Download); let tmp = tempfile::NamedTempFile::new().unwrap(); std::fs::write(tmp.path(), data).unwrap(); let path_based = check_archive_safety_path(tmp.path(), FileType::Download); assert_eq!(buffered.verdict, path_based.verdict); assert_eq!(buffered.verdict, LayerVerdict::Skip); } #[test] fn path_entry_matches_buffered_for_cover_skip() { let mut data = vec![0x50, 0x4B, 0x03, 0x04]; data.extend_from_slice(&[0xFF; 100]); let buffered = check_archive_safety(&data, FileType::Cover); let tmp = tempfile::NamedTempFile::new().unwrap(); std::fs::write(tmp.path(), &data).unwrap(); let path_based = check_archive_safety_path(tmp.path(), FileType::Cover); assert_eq!(buffered.verdict, path_based.verdict); assert_eq!(buffered.verdict, LayerVerdict::Skip); } // -- Single-stream decompression bombs (gzip / bzip2 / xz / zstd) -- use std::io::Write; fn gzip(data: &[u8]) -> Vec { let mut e = flate2::write::GzEncoder::new(Vec::new(), flate2::Compression::best()); e.write_all(data).unwrap(); e.finish().unwrap() } fn bzip2_compress(data: &[u8]) -> Vec { let mut e = bzip2::write::BzEncoder::new(Vec::new(), bzip2::Compression::new(9)); e.write_all(data).unwrap(); e.finish().unwrap() } fn xz(data: &[u8]) -> Vec { let mut e = xz2::write::XzEncoder::new(Vec::new(), 9); e.write_all(data).unwrap(); e.finish().unwrap() } fn zstd_compress(data: &[u8]) -> Vec { zstd::encode_all(data, 19).unwrap() } /// 8 MiB of zeros — compresses to a tiny stream at a ratio far above the /// 100x cap, the canonical decompression-bomb shape. fn bomb_payload() -> Vec { vec![0u8; 8 * 1024 * 1024] } /// Moderately-incompressible data: stays well under the ratio cap, so a /// legitimate compressed download passes. fn benign_payload() -> Vec { (0..200_000u32).map(|i| (i.wrapping_mul(2654435761) >> 13) as u8).collect() } #[test] fn gzip_bomb_fails() { let data = gzip(&bomb_payload()); let result = check_archive_safety(&data, FileType::Download); assert_eq!(result.verdict, LayerVerdict::Fail, "detail: {:?}", result.detail); assert!(result.detail.unwrap().to_lowercase().contains("bomb")); } #[test] fn benign_gzip_passes() { let data = gzip(&benign_payload()); let result = check_archive_safety(&data, FileType::Download); assert_eq!(result.verdict, LayerVerdict::Pass, "detail: {:?}", result.detail); } #[test] fn bzip2_bomb_fails() { let data = bzip2_compress(&bomb_payload()); let result = check_archive_safety(&data, FileType::Download); assert_eq!(result.verdict, LayerVerdict::Fail, "detail: {:?}", result.detail); } #[test] fn xz_bomb_fails() { let data = xz(&bomb_payload()); let result = check_archive_safety(&data, FileType::Download); assert_eq!(result.verdict, LayerVerdict::Fail, "detail: {:?}", result.detail); } #[test] fn zstd_bomb_fails() { let data = zstd_compress(&bomb_payload()); let result = check_archive_safety(&data, FileType::Download); assert_eq!(result.verdict, LayerVerdict::Fail, "detail: {:?}", result.detail); } #[test] fn gzip_bomb_caught_on_path_variant_too() { let data = gzip(&bomb_payload()); let tmp = tempfile::NamedTempFile::new().unwrap(); std::fs::write(tmp.path(), &data).unwrap(); let result = check_archive_safety_path(tmp.path(), FileType::Download); assert_eq!(result.verdict, LayerVerdict::Fail, "detail: {:?}", result.detail); } #[test] fn gzip_bomb_skipped_for_cover() { // Type mismatch is layer 1's job; the archive layer skips covers. let data = gzip(&bomb_payload()); let result = check_archive_safety(&data, FileType::Cover); assert_eq!(result.verdict, LayerVerdict::Skip); } // -- Prefixed / self-extracting ZIP (no offset-0 magic) -- #[test] fn prefixed_zip_is_not_silently_skipped() { // A real ZIP with arbitrary bytes prepended (the self-extracting-stub // shape). It lacks the offset-0 PK\x03\x04 magic, so the old offset-0 // gate would Skip it. The tail EOCD scan must catch it and hand it to // inspect_zip — the security property is that it is NOT Skipped. let zip = make_zip(&[("readme.txt", b"hello")]); let mut data = b"MZ\x90\x00 this is a self-extracting stub padding ".to_vec(); data.extend_from_slice(&zip); let result = check_archive_safety(&data, FileType::Download); assert_ne!( result.verdict, LayerVerdict::Skip, "prefixed ZIP must be inspected, not skipped; got {:?}", result.detail ); // And on the path variant. let tmp = tempfile::NamedTempFile::new().unwrap(); std::fs::write(tmp.path(), &data).unwrap(); let path_based = check_archive_safety_path(tmp.path(), FileType::Download); assert_ne!(path_based.verdict, LayerVerdict::Skip, "detail: {:?}", path_based.detail); } #[test] fn prefixed_zip_bomb_fails() { // Prepend a stub to a high-ratio ZIP; it must still be caught. let zeros = vec![0u8; 1024 * 1024]; let zip = make_compressed_zip(&[("bomb.bin", &zeros)]); let mut data = b"self-extracting stub ".to_vec(); data.extend_from_slice(&zip); let result = check_archive_safety(&data, FileType::Download); assert_eq!(result.verdict, LayerVerdict::Fail, "detail: {:?}", result.detail); } }