Skip to main content

max / makenotwork

Scan archive interiors recursively; close OAuth/admin gaps Nested-archive recursion chronic (#20 INFO -> #21 MODERATE -> #22 MED): inner archive bytes were counted, never scanned, so a payload in a zip-in-a-zip reached Clean (only FailOpen ClamAV ever saw the outer stream). Delete the count-only branch in inspect_zip and add scan_nested_contents: it decompresses each entry and re-feeds the bytes through content-type/structural/YARA, descending into nested archives up to SCAN_ZIP_MAX_DEPTH. A new FailClosed "archive_nested" layer holds anything that can't be fully scanned (un-openable, over budget, or nested past the depth) for review, never Clean. The count-but-don't-scan shape is gone. Tests: EICAR in zip-in-zip is caught; EICAR in a gzip is caught; a nest past the scan depth is held; benign nesting passes. OAuth code->token: re-check account liveness at redemption (the refresh grant already did) so a user suspended between authorize and redeem can no longer mint a ~1h sync token. Cap `state` length on the prompt=none GET authorize path (the POST consent path already capped at 1024). Reports/moderation: resolve_report and resolve_action now surface NotFound on a zero-row update instead of a false success; a reporter can no longer stack a second still-open report against one target (per-target dedup under the existing per-reporter advisory lock). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
Author: Max Johnson <me@maxj.phd> · 2026-06-16 22:54 UTC
Commit: abb31f16d2ce305a536b941857abf67c0643ee8d
Parent: a3387a4
6 files changed, +407 insertions, -159 deletions
@@ -6,7 +6,7 @@ use chrono::{DateTime, Utc};
6 6 use sqlx::PgPool;
7 7
8 8 use super::{ModerationActionId, ModerationActionType, UserId};
9 - use crate::error::Result;
9 + use crate::error::{AppError, Result};
10 10
11 11 /// A moderation action record.
12 12 #[derive(Debug, sqlx::FromRow)]
@@ -97,13 +97,19 @@ pub async fn get_history(
97 97 #[allow(dead_code)]
98 98 #[tracing::instrument(skip_all)]
99 99 pub async fn resolve_action(pool: &PgPool, action_id: ModerationActionId) -> Result<()> {
100 - sqlx::query(
100 + let result = sqlx::query(
101 101 "UPDATE moderation_actions SET resolved_at = NOW() WHERE id = $1",
102 102 )
103 103 .bind(action_id)
104 104 .execute(pool)
105 105 .await?;
106 106
107 + // A zero-row update means the action id was stale; report it rather than
108 + // returning a false success to the caller.
109 + if result.rows_affected() == 0 {
110 + return Err(AppError::NotFound);
111 + }
112 +
107 113 Ok(())
108 114 }
109 115
@@ -6,7 +6,7 @@ use uuid::Uuid;
6 6 use super::id_types::*;
7 7 use super::models::*;
8 8 use super::enums::*;
9 - use crate::error::Result;
9 + use crate::error::{AppError, Result};
10 10
11 11 /// Get reports for the admin queue, joined with reporter, target, and owner info.
12 12 ///
@@ -100,7 +100,7 @@ pub async fn resolve_report(
100 100 admin_notes: &str,
101 101 resolved_by: UserId,
102 102 ) -> Result<()> {
103 - sqlx::query(
103 + let result = sqlx::query(
104 104 r#"
105 105 UPDATE reports
106 106 SET status = $2, admin_notes = $3, resolved_by = $4, resolved_at = NOW()
@@ -114,6 +114,14 @@ pub async fn resolve_report(
114 114 .execute(pool)
115 115 .await?;
116 116
117 + // No matching row means the report id was stale (already resolved away, or
118 + // never existed). Surface it as NotFound so the admin sees a real failure
119 + // instead of a false "resolved" — the UPDATE silently affecting zero rows
120 + // otherwise reports success.
121 + if result.rows_affected() == 0 {
122 + return Err(AppError::NotFound);
123 + }
124 +
117 125 Ok(())
118 126 }
119 127
@@ -155,6 +163,26 @@ pub async fn create_report_within_daily_limit(
155 163 return Ok(None);
156 164 }
157 165
166 + // Per-(reporter, target) dedup: a reporter cannot stack a second still-open
167 + // report on the same target. Without this, one user could spend their whole
168 + // daily quota re-reporting one item and flood the moderation queue with
169 + // duplicates of a single complaint. Re-reporting is allowed once the prior
170 + // report is resolved (`resolved_at` set). Runs under the same per-reporter
171 + // advisory lock as the cap, so concurrent submits can't both slip past.
172 + let already_open: bool = sqlx::query_scalar(
173 + "SELECT EXISTS(SELECT 1 FROM reports \
174 + WHERE reporter_user_id = $1 AND target_type = $2 AND target_id = $3 \
175 + AND resolved_at IS NULL)",
176 + )
177 + .bind(reporter_id)
178 + .bind(target_type)
179 + .bind(target_id)
180 + .fetch_one(&mut *tx)
181 + .await?;
182 + if already_open {
183 + return Ok(None);
184 + }
185 +
158 186 let report = sqlx::query_as::<_, DbReport>(
159 187 r#"
160 188 INSERT INTO reports (reporter_user_id, target_type, target_id, report_type, reason)
@@ -269,6 +269,13 @@ async fn authorize_get(
269 269 .ok_or_else(|| AppError::BadRequest("redirect_uri is required".to_string()))?;
270 270 let state_param = params.state.as_deref()
271 271 .ok_or_else(|| AppError::BadRequest("state is required".to_string()))?;
272 + // Cap state length on the GET authorize path too (the POST consent path caps
273 + // at 1024). state is echoed into the auth_codes row and the redirect URL, so
274 + // an unbounded value on the prompt=none branch would otherwise flow through
275 + // unchecked. Mirror the POST limit.
276 + if state_param.len() > 1024 {
277 + return Err(AppError::BadRequest("state is too long".to_string()));
278 + }
272 279 let code_challenge = params.code_challenge.as_deref()
273 280 .ok_or_else(|| AppError::BadRequest("code_challenge is required".to_string()))?;
274 281 let code_challenge_method = params.code_challenge_method.as_deref().unwrap_or("S256");
@@ -655,6 +662,16 @@ async fn token_authorization_code(
655 662 return Err(AppError::BadRequest("PKCE verification failed".to_string()));
656 663 }
657 664
665 + // Re-check account liveness at redemption. A user suspended or deactivated
666 + // between authorize and code->token must not receive a token; the refresh
667 + // grant already applies this gate, but the code->token path skipped it,
668 + // leaving a window (up to the code TTL) where a just-suspended user could
669 + // still mint a ~1h sync token. Mirror the refresh path's liveness check.
670 + match db::users::get_user_by_id(&state.db, oauth_code.user_id).await? {
671 + Some(u) if !(u.is_suspended() || u.is_deactivated()) => {}
672 + _ => return Ok(oauth_error("invalid_grant")),
673 + }
674 +
658 675 let scope = GrantedScopes::parse(&oauth_code.scope);
659 676
660 677 // Empty scope = a legacy sync client (desktop apps via synckit-client): mint
@@ -265,7 +265,6 @@ fn inspect_zip<R: std::io::Read + std::io::Seek>(reader: R) -> LayerResult {
265 265
266 266 let mut total_compressed: u64 = 0;
267 267 let mut total_uncompressed: u64 = 0;
268 - let mut nested_archives: u32 = 0;
269 268
270 269 for i in 0..archive.len() {
271 270 let entry = match archive.by_index_raw(i) {
@@ -300,11 +299,11 @@ fn inspect_zip<R: std::io::Read + std::io::Seek>(reader: R) -> LayerResult {
300 299 drop(entry);
301 300
302 301 // Use actual decompressed byte count instead of trusting the claimed size
303 - // from the ZIP central directory (which is attacker-controlled).
304 - // Also capture the first 8 bytes for nested archive magic detection,
305 - // avoiding a second decompression pass.
306 - let mut magic_bytes = [0u8; 8];
307 - let mut magic_len = 0usize;
302 + // from the ZIP central directory (which is attacker-controlled). Inner
303 + // content (including nested archives) is inspected separately by
304 + // `scan_nested_contents`, which re-feeds each entry's decompressed bytes
305 + // through the FailClosed scan layers — this loop is purely the
306 + // compression-bomb / size accounting.
308 307 let actual_size = match archive.by_index(i) {
309 308 Ok(mut reader) => {
310 309 let mut counted: u64 = 0;
@@ -314,12 +313,6 @@ fn inspect_zip<R: std::io::Read + std::io::Seek>(reader: R) -> LayerResult {
314 313 match std::io::Read::read(&mut reader, &mut buf) {
315 314 Ok(0) => break,
316 315 Ok(n) => {
317 - // Capture first 8 bytes for magic detection
318 - if magic_len < 8 {
319 - let copy = n.min(8 - magic_len);
320 - magic_bytes[magic_len..magic_len + copy].copy_from_slice(&buf[..copy]);
321 - magic_len += copy;
322 - }
323 316 counted += n as u64;
324 317 if counted > limit {
325 318 return LayerResult {
@@ -369,30 +362,6 @@ fn inspect_zip<R: std::io::Read + std::io::Seek>(reader: R) -> LayerResult {
369 362 };
370 363 }
371 364 }
372 -
373 - // Check for nested archives — extension check first, then magic bytes
374 - // from the first decompression pass (no re-read needed).
375 - let lower_name = name.to_lowercase();
376 - let ext_match = lower_name.ends_with(".zip")
377 - || lower_name.ends_with(".tar.gz")
378 - || lower_name.ends_with(".tgz")
379 - || lower_name.ends_with(".7z")
380 - || lower_name.ends_with(".rar")
381 - || lower_name.ends_with(".tar");
382 - if ext_match {
383 - nested_archives += 1;
384 - } else if actual_size > 0 && magic_len >= 4 {
385 - let is_nested = matches!(
386 - magic_bytes,
387 - [0x50, 0x4B, 0x03, 0x04, ..] // ZIP
388 - | [0x1F, 0x8B, ..] // gzip (tar.gz)
389 - | [0x37, 0x7A, 0xBC, 0xAF, 0x27, 0x1C, ..] // 7z
390 - | [0x52, 0x61, 0x72, 0x21, ..] // RAR
391 - );
392 - if is_nested {
393 - nested_archives += 1;
394 - }
395 - }
396 365 }
397 366
398 367 // Check total uncompressed size
@@ -424,19 +393,6 @@ fn inspect_zip<R: std::io::Read + std::io::Seek>(reader: R) -> LayerResult {
424 393 }
425 394 }
426 395
427 - // Check nesting depth
428 - if nested_archives > constants::SCAN_ZIP_MAX_DEPTH {
429 - return LayerResult {
430 - layer: "archive",
431 - verdict: LayerVerdict::Fail,
432 - detail: Some(format!(
433 - "Contains {} nested archives (limit: {})",
434 - nested_archives,
435 - constants::SCAN_ZIP_MAX_DEPTH
436 - )),
437 - };
438 - }
439 -
440 396 LayerResult {
441 397 layer: "archive",
442 398 verdict: LayerVerdict::Pass,
@@ -452,6 +408,218 @@ fn inspect_zip<R: std::io::Read + std::io::Seek>(reader: R) -> LayerResult {
452 408 }
453 409 }
454 410
411 + /// True if `data` is a container we know how to descend into (offset-0 magic or
412 + /// a prefixed-ZIP central directory). Gate for the recursive interior scan.
413 + pub fn is_archive(data: &[u8]) -> bool {
414 + detect_kind(data).is_some() || has_zip_eocd(data)
415 + }
416 +
417 + /// Per-entry ceiling for interior scanning. An entry whose decompressed size
418 + /// exceeds this cannot be fully buffered for the in-process layers; rather than
419 + /// blow up memory it is reported "not fully scanned" and held for review (the
420 + /// bomb-defense walk in `check_archive_safety` independently bounds total size).
421 + const INTERIOR_ENTRY_MAX: usize = constants::SCAN_MAX_MEMORY_BYTES;
422 +
423 + fn nested(verdict: LayerVerdict, detail: String) -> LayerResult {
424 + LayerResult { layer: "archive_nested", verdict, detail: Some(detail) }
425 + }
426 +
427 + /// Recursively inspect the *interior* of an archive: decompress each entry and
428 + /// re-feed its bytes through the FailClosed in-process layers (content-type,
429 + /// structural, YARA), descending into nested archives up to `SCAN_ZIP_MAX_DEPTH`.
430 + ///
431 + /// This is the coverage `check_archive_safety` deliberately does NOT provide:
432 + /// that walk counts entries and bounds decompression-bomb size; this one scans
433 + /// their *content*. The result is the `"archive_nested"` layer:
434 + /// - `Pass` — every entry at every depth cleared the scan layers.
435 + /// - `Fail` — an entry tripped a content layer (malware / disguised HTML/exe).
436 + /// - `Error` — the interior could not be fully scanned (un-openable, over the
437 + /// per-entry or total budget, OR nesting deeper than `SCAN_ZIP_MAX_DEPTH`).
438 + /// `"archive_nested"` is FailClosed, so a not-fully-scanned interior is held
439 + /// for review, never passed Clean. There is no path that descends into an
440 + /// archive without either scanning the bytes or emitting this verdict — the
441 + /// old "count nested archives but never scan them" branch is gone.
442 + pub fn scan_nested_contents(data: &[u8], yara_rules: Option<&yara_x::Rules>) -> LayerResult {
443 + if !is_archive(data) {
444 + return nested(LayerVerdict::Skip, "Not an archive; no interior to scan".to_string());
445 + }
446 + let mut budget: u64 = constants::SCAN_ZIP_MAX_UNCOMPRESSED;
447 + match scan_interior(data, yara_rules, constants::SCAN_ZIP_MAX_DEPTH, &mut budget) {
448 + Some(result) => result,
449 + None => nested(
450 + LayerVerdict::Pass,
451 + "Archive interior fully scanned; no threats found".to_string(),
452 + ),
453 + }
454 + }
455 +
456 + /// Descend one archive level. `Some(verdict)` short-circuits with a non-clean
457 + /// result; `None` means everything at and below this level was clean.
458 + fn scan_interior(
459 + data: &[u8],
460 + yara_rules: Option<&yara_x::Rules>,
461 + depth: u32,
462 + budget: &mut u64,
463 + ) -> Option<LayerResult> {
464 + match detect_kind(data) {
465 + Some(ArchiveKind::Zip) => scan_zip_interior(Cursor::new(data), yara_rules, depth, budget),
466 + Some(stream) => match decompress_one_bounded(stream, Cursor::new(data), budget) {
467 + Ok(bytes) => scan_entry(&bytes, yara_rules, depth, budget),
468 + Err(why) => Some(nested(LayerVerdict::Error, why)),
469 + },
470 + None if has_zip_eocd(data) => {
471 + scan_zip_interior(Cursor::new(data), yara_rules, depth, budget)
472 + }
473 + None => None,
474 + }
475 + }
476 +
477 + fn scan_zip_interior<R: Read + std::io::Seek>(
478 + reader: R,
479 + yara_rules: Option<&yara_x::Rules>,
480 + depth: u32,
481 + budget: &mut u64,
482 + ) -> Option<LayerResult> {
483 + let mut archive = match zip::ZipArchive::new(reader) {
484 + Ok(a) => a,
485 + Err(e) => return Some(nested(LayerVerdict::Error, format!("cannot open nested ZIP: {e}"))),
486 + };
487 + if archive.len() > constants::SCAN_ZIP_MAX_ENTRIES {
488 + return Some(nested(
489 + LayerVerdict::Error,
490 + format!(
491 + "nested ZIP entry count {} exceeds limit {}",
492 + archive.len(),
493 + constants::SCAN_ZIP_MAX_ENTRIES
494 + ),
495 + ));
496 + }
497 + for i in 0..archive.len() {
498 + let bytes = match read_zip_entry_bounded(&mut archive, i, budget) {
499 + Ok(b) => b,
500 + Err(why) => return Some(nested(LayerVerdict::Error, why)),
501 + };
502 + if let Some(v) = scan_entry(&bytes, yara_rules, depth, budget) {
503 + return Some(v);
504 + }
505 + }
506 + None
507 + }
508 +
509 + /// Run an entry's decompressed bytes through the FailClosed in-process layers,
510 + /// then recurse if the entry is itself an archive.
511 + fn scan_entry(
512 + bytes: &[u8],
513 + yara_rules: Option<&yara_x::Rules>,
514 + depth: u32,
515 + budget: &mut u64,
516 + ) -> Option<LayerResult> {
517 + let yara_result = match yara_rules {
518 + Some(rules) => super::yara::scan_with_yara(rules, bytes),
519 + None => LayerResult { layer: "yara", verdict: LayerVerdict::Skip, detail: None },
520 + };
521 + // Inner bundle content is downloadable-by-a-buyer, so it is checked as a
522 + // `Download`: content-type catches disguised HTML/SVG, structural catches
523 + // executables, YARA catches signatures (EICAR, script-in-binary).
524 + let checks = [
525 + super::content_type::verify_content_type(bytes, FileType::Download),
526 + super::structural::analyze_binary(bytes, FileType::Download),
527 + yara_result,
528 + ];
529 + for r in checks {
530 + match r.verdict {
531 + LayerVerdict::Fail => {
532 + return Some(nested(
533 + LayerVerdict::Fail,
534 + format!(
535 + "nested archive entry flagged by {}: {}",
536 + r.layer,
537 + r.detail.unwrap_or_default()
538 + ),
539 + ));
540 + }
541 + LayerVerdict::Error
542 + if super::error_policy_for(r.layer) == ErrorPolicy::FailClosed =>
543 + {
544 + return Some(nested(
545 + LayerVerdict::Error,
546 + format!(
547 + "nested archive entry: {} layer errored: {}",
548 + r.layer,
549 + r.detail.unwrap_or_default()
550 + ),
551 + ));
552 + }
553 + _ => {}
554 + }
555 + }
556 + if is_archive(bytes) {
557 + if depth == 0 {
558 + return Some(nested(
559 + LayerVerdict::Error,
560 + "nested archive exceeds maximum scan depth; held for review".to_string(),
561 + ));
562 + }
563 + return scan_interior(bytes, yara_rules, depth - 1, budget);
564 + }
565 + None
566 + }
567 +
568 + fn read_zip_entry_bounded<R: Read + std::io::Seek>(
569 + archive: &mut zip::ZipArchive<R>,
570 + index: usize,
571 + budget: &mut u64,
572 + ) -> Result<Vec<u8>, String> {
573 + let mut entry = archive
574 + .by_index(index)
575 + .map_err(|e| format!("read nested ZIP entry {index}: {e}"))?;
576 + read_bounded(&mut entry, budget)
577 + }
578 +
579 + fn decompress_one_bounded<R: Read>(
580 + kind: ArchiveKind,
581 + reader: R,
582 + budget: &mut u64,
583 + ) -> Result<Vec<u8>, String> {
584 + let mut decoder: Box<dyn Read> = match kind {
585 + ArchiveKind::Gzip => Box::new(flate2::read::MultiGzDecoder::new(reader)),
586 + ArchiveKind::Bzip2 => Box::new(bzip2::read::BzDecoder::new(reader)),
587 + ArchiveKind::Xz => Box::new(xz2::read::XzDecoder::new(reader)),
588 + ArchiveKind::Zstd => zstd::stream::read::Decoder::new(reader)
589 + .map(|d| Box::new(d) as Box<dyn Read>)
590 + .map_err(|e| format!("zstd init failed: {e}"))?,
591 + ArchiveKind::Zip => return Err("zip handled separately".to_string()),
592 + };
593 + read_bounded(decoder.as_mut(), budget)
594 + }
595 +
596 + /// Read a decompressed entry into a Vec, enforcing the per-entry ceiling and the
597 + /// shared total budget. Either overflow, or a mid-stream decode error, is a
598 + /// "not fully scanned" condition (the caller fails closed).
599 + fn read_bounded(reader: &mut dyn Read, budget: &mut u64) -> Result<Vec<u8>, String> {
600 + let mut out: Vec<u8> = Vec::new();
601 + let mut buf = [0u8; 8192];
602 + loop {
603 + match reader.read(&mut buf) {
604 + Ok(0) => break,
605 + Ok(n) => {
606 + if out.len() + n > INTERIOR_ENTRY_MAX {
607 + return Err(format!(
608 + "nested entry exceeds {INTERIOR_ENTRY_MAX}-byte interior scan ceiling"
609 + ));
610 + }
611 + if n as u64 > *budget {
612 + return Err("nested archive content exceeds total interior scan budget".to_string());
613 + }
614 + *budget -= n as u64;
615 + out.extend_from_slice(&buf[..n]);
616 + }
617 + Err(e) => return Err(format!("decode nested entry: {e}")),
618 + }
619 + }
620 + Ok(out)
621 + }
622 +
455 623 #[cfg(test)]
456 624 mod tests {
457 625 use super::*;
@@ -596,109 +764,82 @@ mod tests {
596 764
597 765 // -- Nesting detection --
598 766
599 - #[test]
600 - fn zip_within_nesting_limit_passes() {
601 - // SCAN_ZIP_MAX_DEPTH = 2; the check is `nested > limit`, so 2 entries
602 - // with archive extensions sit exactly at the limit and must pass.
603 - let data = make_zip(&[
604 - ("data.txt", b"content"),
605 - ("inner1.zip", b"fake zip content"),
606 - ("inner2.zip", b"fake zip content"),
607 - ]);
608 - let result = check_archive_safety(&data, FileType::Download);
609 - assert_eq!(result.verdict, LayerVerdict::Pass);
767 + // -- Nested-archive interior scanning (`scan_nested_contents`) --
768 + //
769 + // The old behavior here merely *counted* nested-archive entries and failed a
770 + // ZIP with more than `SCAN_ZIP_MAX_DEPTH` of them, never inspecting their
771 + // contents — counting is not scanning, so a payload in a zip-in-a-zip passed
772 + // Clean (the run #20→#22 chronic). `check_archive_safety` no longer counts;
773 + // interior coverage is `scan_nested_contents`, exercised below with the real
774 + // rule set so an actual signature in a nested archive is caught or held.
775 +
776 + /// The compiled production YARA rules (includes the EICAR test signature).
777 + fn test_yara_rules() -> yara_x::Rules {
778 + super::super::yara::compile_rules_from_dir("yara-rules")
779 + .expect("compile yara-rules")
780 + .0
781 + .expect("yara-rules dir has rules")
610 782 }
611 783
612 - #[test]
613 - fn zip_exceeding_nesting_limit_fails() {
614 - // SCAN_ZIP_MAX_DEPTH = 2; 3 nested archives trips the limit.
615 - let data = make_zip(&[
616 - ("inner1.zip", b"fake"),
617 - ("inner2.zip", b"fake"),
618 - ("inner3.zip", b"fake"),
619 - ]);
620 - let result = check_archive_safety(&data, FileType::Download);
621 - assert_eq!(result.verdict, LayerVerdict::Fail);
622 - assert!(result.detail.unwrap().contains("nested archives"));
623 - }
784 + /// EICAR antivirus test string — matched by `yara-rules/mnw_custom.yar`.
785 + const EICAR: &[u8] =
786 + br#"X5O!P%@AP[4\PZX54(P^)7CC)7}$EICAR-STANDARD-ANTIVIRUS-TEST-FILE!$H+H*"#;
624 787
625 788 #[test]
626 - fn nested_tar_gz_counts() {
627 - let data = make_zip(&[
628 - ("archive.tar.gz", b"fake"),
629 - ("another.tar.gz", b"fake"),
630 - ("third.tar.gz", b"fake"),
631 - ("fourth.tar.gz", b"fake"),
632 - ]);
633 - let result = check_archive_safety(&data, FileType::Download);
634 - assert_eq!(result.verdict, LayerVerdict::Fail);
789 + fn benign_nested_zip_passes() {
790 + let inner = make_zip(&[("hello.txt", b"hello world")]);
791 + let outer = make_zip(&[("inner.zip", &inner), ("notes.txt", b"readme")]);
792 + let result = scan_nested_contents(&outer, Some(&test_yara_rules()));
793 + assert_eq!(result.verdict, LayerVerdict::Pass, "{:?}", result.detail);
635 794 }
636 795
637 796 #[test]
638 - fn nested_7z_and_rar_count() {
639 - let data = make_zip(&[
640 - ("a.7z", b"fake"),
641 - ("b.rar", b"fake"),
642 - ("c.zip", b"fake"),
643 - ("d.7z", b"fake"),
644 - ]);
645 - let result = check_archive_safety(&data, FileType::Download);
646 - assert_eq!(result.verdict, LayerVerdict::Fail);
647 - }
648 -
649 - #[test]
650 - fn nested_zip_detected_by_magic_without_extension() {
651 - // Inner file has innocent name but contains real ZIP magic bytes.
652 - // The extension check misses it; the magic-byte fallback must catch it.
653 - let inner_zip = make_zip(&[("payload.txt", b"hi")]);
654 - let data = make_zip(&[
655 - ("a.bin", &inner_zip),
656 - ("b.bin", &inner_zip),
657 - ("c.bin", &inner_zip),
658 - ]);
659 - let result = check_archive_safety(&data, FileType::Download);
660 - assert_eq!(result.verdict, LayerVerdict::Fail);
661 - assert!(result.detail.unwrap().contains("nested archives"));
797 + fn eicar_in_zip_in_zip_is_caught() {
798 + // outer.zip -> inner.zip -> evil.txt(EICAR). The interior bytes must
799 + // traverse YARA exactly as a top-level file would: not Clean.
800 + let inner = make_zip(&[("evil.txt", EICAR)]);
801 + let outer = make_zip(&[("inner.zip", &inner)]);
802 + let result = scan_nested_contents(&outer, Some(&test_yara_rules()));
803 + assert_eq!(
804 + result.verdict,
805 + LayerVerdict::Fail,
806 + "EICAR nested two zips deep must be caught, got {:?}",
807 + result.detail
808 + );
662 809 }
663 810
664 811 #[test]
665 - fn nested_gzip_detected_by_magic_without_extension() {
666 - // 1F 8B is gzip magic. No extension hint, must be caught by magic check.
667 - let gzip_bytes: &[u8] = &[0x1F, 0x8B, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00];
668 - let data = make_zip(&[
669 - ("one.dat", gzip_bytes),
670 - ("two.dat", gzip_bytes),
671 - ("three.dat", gzip_bytes),
672 - ]);
673 - let result = check_archive_safety(&data, FileType::Download);
674 - assert_eq!(result.verdict, LayerVerdict::Fail);
675 - assert!(result.detail.unwrap().contains("nested archives"));
812 + fn eicar_in_single_gzip_is_caught() {
813 + // A standalone gzip member is one logical entry; its decompressed bytes
814 + // must be scanned.
815 + let gz = gzip(EICAR);
816 + let result = scan_nested_contents(&gz, Some(&test_yara_rules()));
817 + assert_eq!(result.verdict, LayerVerdict::Fail, "{:?}", result.detail);
676 818 }
677 819
678 820 #[test]
679 - fn nested_7z_detected_by_magic_without_extension() {
680 - // 7z magic: 37 7A BC AF 27 1C
681 - let sevenz_bytes: &[u8] = &[0x37, 0x7A, 0xBC, 0xAF, 0x27, 0x1C, 0x00, 0x00];
682 - let data = make_zip(&[
683 - ("alpha.bin", sevenz_bytes),
684 - ("beta.bin", sevenz_bytes),
685 - ("gamma.bin", sevenz_bytes),
686 - ]);
687 - let result = check_archive_safety(&data, FileType::Download);
688 - assert_eq!(result.verdict, LayerVerdict::Fail);
821 + fn nesting_beyond_scan_depth_is_held_not_passed() {
822 + // SCAN_ZIP_MAX_DEPTH = 2. Wrap a benign file in enough ZIP layers that
823 + // the innermost archive sits past the descent budget; the interior is
824 + // not fully scanned, so it must fail closed (Error -> held), never Clean.
825 + let mut nested = make_zip(&[("leaf.txt", b"benign")]);
826 + for _ in 0..4 {
827 + nested = make_zip(&[("inner.zip", &nested)]);
828 + }
829 + let result = scan_nested_contents(&nested, Some(&test_yara_rules()));
830 + assert_eq!(
831 + result.verdict,
832 + LayerVerdict::Error,
833 + "a nest deeper than the scan depth must be held, got {:?}",
834 + result.detail
835 + );
836 + assert_eq!(super::super::error_policy_for(result.layer), ErrorPolicy::FailClosed);
689 837 }
690 838
691 839 #[test]
692 - fn nested_rar_detected_by_magic_without_extension() {
693 - // RAR magic: 52 61 72 21 ("Rar!")
694 - let rar_bytes: &[u8] = &[0x52, 0x61, 0x72, 0x21, 0x1A, 0x07, 0x00, 0x00];
695 - let data = make_zip(&[
696 - ("x.bin", rar_bytes),
697 - ("y.bin", rar_bytes),
698 - ("z.bin", rar_bytes),
699 - ]);
700 - let result = check_archive_safety(&data, FileType::Download);
701 - assert_eq!(result.verdict, LayerVerdict::Fail);
840 + fn non_archive_has_no_interior() {
841 + let result = scan_nested_contents(b"just some plain bytes", Some(&test_yara_rules()));
842 + assert_eq!(result.verdict, LayerVerdict::Skip);
702 843 }
703 844
704 845 #[test]
@@ -77,6 +77,12 @@ fn error_policy_for(layer: &str) -> ErrorPolicy {
77 77 "content_type" => content_type::ERROR_POLICY,
78 78 "structural" => structural::ERROR_POLICY,
79 79 "archive" => archive::ERROR_POLICY,
80 + // Recursive interior scan of an archive's entries. A decompressed entry
81 + // we could not fully inspect (un-openable, over budget, or nested deeper
82 + // than the scan depth) errors here and must fail closed — a payload
83 + // hidden in a zip-in-a-zip is held for review, never passed Clean. This
84 + // is the structural close of the nested-archive recursion chronic.
85 + "archive_nested" => ErrorPolicy::FailClosed,
80 86 "yara" => yara::ERROR_POLICY,
81 87 "clamav" => clamav::ERROR_POLICY,
82 88 // A *reachable* clamd that couldn't fully scan (size/scan limit,
@@ -517,6 +523,11 @@ impl ScanPipeline {
517 523 layers.push(content_type::verify_content_type(data, file_type));
518 524 layers.push(structural::analyze_binary(data, file_type));
519 525 layers.push(archive::check_archive_safety(data, file_type));
526 + // Recursively scan archive interiors: re-feed each entry's decompressed
527 + // bytes back through content-type/structural/YARA, descending into
528 + // nested archives. Covers the zip-in-a-zip seam the bomb-defense walk
529 + // above only counted. No-op (Skip) for non-archives.
530 + layers.push(archive::scan_nested_contents(data, self.yara_rules.as_ref()));
520 531 layers.push(match self.yara_rules {
521 532 Some(ref rules) => yara::scan_with_yara(rules, data),
522 533 None => LayerResult {
@@ -640,7 +651,7 @@ mod tests {
640 651 assert_eq!(result.status, FileScanStatus::Clean);
641 652 assert_eq!(result.file_size, 22);
642 653 assert!(!result.sha256.is_empty());
643 - assert_eq!(result.layers.len(), 11);
654 + assert_eq!(result.layers.len(), 12);
644 655 }
645 656
646 657 #[tokio::test]
@@ -708,11 +719,12 @@ mod tests {
708 719 }
709 720
710 721 #[tokio::test]
711 - async fn pipeline_always_produces_11_layers() {
722 + async fn pipeline_always_produces_12_layers() {
712 723 let pipeline = make_pipeline();
713 724 for file_type in [FileType::Audio, FileType::Cover, FileType::Download] {
714 725 let result = pipeline.clone().scan(b"data".to_vec(), file_type).await;
715 - assert_eq!(result.layers.len(), 11, "Expected 11 layers for {:?}", file_type);
726 + // 11 base layers + the recursive archive-interior layer (archive_nested).
727 + assert_eq!(result.layers.len(), 12, "Expected 12 layers for {:?}", file_type);
716 728 }
717 729 }
718 730
@@ -385,11 +385,10 @@ async fn non_admin_cannot_resolve_report() {
385 385 // ---------------------------------------------------------------------------
386 386
387 387 #[tokio::test]
388 - async fn report_spam_rate_limited_at_ten_per_day() {
388 + async fn report_daily_cap_enforced_across_distinct_targets() {
389 389 let mut h = TestHarness::new().await;
390 390
391 - // A creator with a public item to report. (create_report has no per-target
392 - // dedup, so repeatedly reporting the same item is what a spammer would do.)
391 + // A creator with one real, public item — the target for the over-cap report.
393 392 let setup = h.create_creator_with_item("spamtarget", "text", 0).await;
394 393 h.publish_project_and_item(&setup.project_id, &setup.item_id).await;
395 394
@@ -397,24 +396,29 @@ async fn report_spam_rate_limited_at_ten_per_day() {
397 396 let reporter_id = h.signup("spamreporter", "spamreporter@test.com", "password123").await;
398 397 h.login("spamreporter", "password123").await;
399 398
400 - let body = format!(
401 - "target_type=item&target_id={}&report_type=spam&reason=spam",
402 - setup.item_id
403 - );
404 -
405 - // First 10 are accepted (the cap is `>= 10` checked BEFORE insert, so the
406 - // 10th sees a count of 9 and goes through).
407 - for n in 1..=10 {
408 - let resp = h.client.post_form("/api/reports", &body).await;
409 - assert!(resp.status.is_success(), "report #{} should be accepted: {} {}", n, resp.status, resp.text);
399 + // Seed the reporter's day with 10 prior reports against distinct targets,
400 + // directly (the HTTP write-rate limiter — burst 10/IP — would otherwise trip
401 + // long before the per-day report cap). Distinct random target_ids so the
402 + // per-target dedup does not interfere with the over-cap HTTP report below.
403 + for _ in 0..10 {
404 + sqlx::query(
405 + "INSERT INTO reports (reporter_user_id, target_type, target_id, report_type, reason) \
406 + VALUES ($1, 'item', $2, 'spam', 'seed')",
407 + )
408 + .bind(reporter_id)
409 + .bind(uuid::Uuid::new_v4())
410 + .execute(&h.db)
411 + .await
412 + .unwrap();
410 413 }
411 414
412 - // The 11th sees a count of 10 and is rejected.
415 + // The 11th report (against the real item) sees a count of 10 and is rejected.
416 + let body = format!("target_type=item&target_id={}&report_type=spam&reason=spam", setup.item_id);
413 417 let resp = h.client.post_form("/api/reports", &body).await;
414 418 assert!(!resp.status.is_success() || resp.text.contains("limit reached"),
415 - "the 11th report must be rate-limited, got: {} {}", resp.status, resp.text);
419 + "the over-cap report must be rate-limited, got: {} {}", resp.status, resp.text);
416 420
417 - // Exactly 10 rows persisted — the rejected one wrote nothing.
421 + // Still exactly 10 rows — the rejected one wrote nothing.
418 422 let count: i64 = sqlx::query_scalar("SELECT COUNT(*) FROM reports WHERE reporter_user_id = $1")
419 423 .bind(reporter_id)
420 424 .fetch_one(&h.db)
@@ -422,3 +426,43 @@ async fn report_spam_rate_limited_at_ten_per_day() {
422 426 .unwrap();
423 427 assert_eq!(count, 10, "rate limit must cap stored reports at 10, found {count}");
424 428 }
429 +
430 + #[tokio::test]
431 + async fn report_same_target_deduped_while_open() {
432 + // Run #22 LOW: a reporter must not be able to stack multiple still-open
433 + // reports against one target (queue-flooding). The first lands; a second
434 + // against the same target while the first is unresolved is rejected and
435 + // writes nothing.
436 + let mut h = TestHarness::new().await;
437 + let setup = h.create_creator_with_item("dedupcreator", "text", 0).await;
438 + h.publish_project_and_item(&setup.project_id, &setup.item_id).await;
439 +
440 + h.client.post_form("/logout", "").await;
441 + let reporter_id = h.signup("dedupreporter", "dedupreporter@test.com", "password123").await;
442 + h.login("dedupreporter", "password123").await;
443 +
444 + let body = format!(
445 + "target_type=item&target_id={}&report_type=spam&reason=spam",
446 + setup.item_id
447 + );
448 +
449 + let first = h.client.post_form("/api/reports", &body).await;
450 + assert!(first.status.is_success(), "first report should land: {} {}", first.status, first.text);
451 +
452 + let second = h.client.post_form("/api/reports", &body).await;
453 + assert!(
454 + !second.status.is_success() || second.text.contains("limit reached"),
455 + "a second open report on the same target must be rejected, got: {} {}",
456 + second.status, second.text
457 + );
458 +
459 + let count: i64 = sqlx::query_scalar(
460 + "SELECT COUNT(*) FROM reports WHERE reporter_user_id = $1 AND target_id = $2",
461 + )
462 + .bind(reporter_id)
463 + .bind(setup.item_id.parse::<uuid::Uuid>().unwrap())
464 + .fetch_one(&h.db)
465 + .await
466 + .unwrap();
467 + assert_eq!(count, 1, "duplicate open report must not be stored, found {count}");
468 + }