Skip to main content

max / makenotwork

29.5 KB · 750 lines History Blame Raw
1 //! 6-layer malware scanning pipeline for file uploads.
2 //!
3 //! Layers 1-4 always run (in-process, deterministic). Layers 5-6 are optional
4 //! (external services).
5 //!
6 //! **Error policy is per-layer**, declared at each layer's source file as
7 //! `pub const ERROR_POLICY`. The aggregator `final_status` consults each
8 //! layer's policy via `error_policy_for`. In-process layers are `FailClosed`
9 //! (an error is a structural defect); external layers are `FailOpen` (an
10 //! error is an outage that must not block the platform). See
11 //! `docs/scan-pipeline-audit.md` for the rationale.
12 //!
13 //! See also: `/docs/tech/content-protection`
14
15 pub mod archive;
16 pub mod clamav;
17 pub mod content_type;
18 pub mod hash_lookup;
19 pub mod metadefender;
20 pub mod signing_linux;
21 pub mod signing_macos;
22 pub mod signing_windows;
23 pub mod spool;
24 pub mod structural;
25 pub mod urlhaus;
26 pub mod worker;
27 pub mod yara;
28
29 use serde::Serialize;
30 use sha2::{Digest, Sha256};
31
32 use crate::config::ScanConfig;
33 use crate::db::FileScanStatus;
34 use crate::storage::FileType;
35
36 /// Per-layer scan verdict
37 #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
38 #[serde(rename_all = "lowercase")]
39 pub enum LayerVerdict {
40 Pass,
41 Fail,
42 Skip,
43 Error,
44 }
45
46 /// Policy for how a layer's `Error` verdict feeds into the pipeline's final status.
47 ///
48 /// - `FailClosed` — an `Error` from this layer holds the upload for admin review.
49 /// Appropriate for deterministic in-process layers where an `Error` indicates a
50 /// real bug or a structurally suspicious file.
51 /// - `FailOpen` — an `Error` from this layer is treated as `Skip` for aggregation.
52 /// Appropriate for external services (network, daemons) where an outage on a
53 /// third party must not take down the platform's upload pipeline.
54 ///
55 /// Each layer declares its own `ERROR_POLICY` const; the aggregator in
56 /// `ScanPipeline::final_status` consults the declaration via `error_policy_for`.
57 #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
58 #[serde(rename_all = "snake_case")]
59 pub enum ErrorPolicy {
60 FailClosed,
61 FailOpen,
62 }
63
64 /// Result from a single scanning layer
65 #[derive(Debug, Clone, Serialize)]
66 pub struct LayerResult {
67 pub layer: &'static str,
68 pub verdict: LayerVerdict,
69 pub detail: Option<String>,
70 }
71
72 /// Look up a layer's declared error policy by name. Defaults to `FailClosed`
73 /// for unknown layers — a defensive choice that surfaces uninstrumented
74 /// additions during testing rather than silently fail-opening them.
75 fn error_policy_for(layer: &str) -> ErrorPolicy {
76 match layer {
77 "content_type" => content_type::ERROR_POLICY,
78 "structural" => structural::ERROR_POLICY,
79 "archive" => archive::ERROR_POLICY,
80 "yara" => yara::ERROR_POLICY,
81 "clamav" => clamav::ERROR_POLICY,
82 "malwarebazaar" => hash_lookup::ERROR_POLICY,
83 "urlhaus" => urlhaus::ERROR_POLICY,
84 "signing_macos" => signing_macos::ERROR_POLICY,
85 "signing_windows" => signing_windows::ERROR_POLICY,
86 "signing_linux" => signing_linux::ERROR_POLICY,
87 "metadefender" => metadefender::ERROR_POLICY,
88 other => {
89 tracing::error!(layer = other, "unknown scan layer; defaulting to FailClosed");
90 ErrorPolicy::FailClosed
91 }
92 }
93 }
94
95 /// Decide whether the second-opinion (MetaDefender) layer should run, based
96 /// on the verdicts of layers that have already completed. Any `Fail`, or any
97 /// `Error` from a fail-closed in-process layer, counts as "suspicious enough
98 /// to escalate". Pure `Error`s from fail-open external layers do not — those
99 /// are operational noise, not malware signals.
100 fn suspicion_present(layers: &[LayerResult]) -> bool {
101 layers.iter().any(|l| {
102 match l.verdict {
103 LayerVerdict::Fail => true,
104 LayerVerdict::Error => error_policy_for(l.layer) == ErrorPolicy::FailClosed,
105 _ => false,
106 }
107 })
108 }
109
110 /// Aggregate per-layer results into a final scan status.
111 ///
112 /// - Any layer `Fail` → `Quarantined` (terminal).
113 /// - Any layer `Error` whose policy is `FailClosed` → `HeldForReview`.
114 /// - `FailOpen` errors are treated as Skip-equivalent for aggregation; they're
115 /// still surfaced in the per-layer detail so admins and PoM can see degraded
116 /// layers in the dashboard.
117 /// - Otherwise → `Clean`.
118 fn final_status(layers: &[LayerResult]) -> FileScanStatus {
119 if layers.iter().any(|l| l.verdict == LayerVerdict::Fail) {
120 return FileScanStatus::Quarantined;
121 }
122 let has_fail_closed_error = layers.iter().any(|l| {
123 l.verdict == LayerVerdict::Error && error_policy_for(l.layer) == ErrorPolicy::FailClosed
124 });
125 if has_fail_closed_error {
126 FileScanStatus::HeldForReview
127 } else {
128 FileScanStatus::Clean
129 }
130 }
131
132 /// Aggregate scan result across all layers
133 #[derive(Debug, Clone)]
134 pub struct ScanResult {
135 pub status: FileScanStatus,
136 pub layers: Vec<LayerResult>,
137 pub sha256: String,
138 pub file_size: u64,
139 }
140
141 /// Pre-compiled scanning pipeline. Initialized once at startup and shared via Arc.
142 pub struct ScanPipeline {
143 yara_rules: Option<yara_x::Rules>,
144 /// Number of YARA rule files that compiled, and the configured health floor.
145 yara_rule_count: usize,
146 yara_min_rule_files: usize,
147 clamav_socket: Option<String>,
148 malwarebazaar_enabled: bool,
149 urlhaus_enabled: bool,
150 abuse_ch_auth_key: Option<String>,
151 metadefender_api_key: Option<String>,
152 }
153
154 impl ScanPipeline {
155 /// Create a new pipeline, compiling YARA rules from the configured directory.
156 pub fn new(config: &ScanConfig) -> Result<Self, String> {
157 let (yara_rules, yara_rule_count) = yara::compile_rules_from_dir(&config.yara_rules_dir)?;
158
159 Ok(ScanPipeline {
160 yara_rules,
161 yara_rule_count,
162 yara_min_rule_files: config.yara_min_rule_files,
163 clamav_socket: config.clamav_socket.clone(),
164 malwarebazaar_enabled: config.malwarebazaar_enabled,
165 urlhaus_enabled: config.urlhaus_enabled,
166 abuse_ch_auth_key: config.abuse_ch_auth_key.clone(),
167 metadefender_api_key: config.metadefender_api_key.clone(),
168 })
169 }
170
171 /// Assert at startup that at least one real AV layer is live. Refuse to
172 /// boot otherwise — ClamAV's FailOpen policy means a dead clamd
173 /// silently passes every upload as Clean, and a YARA-rules-empty deploy
174 /// gives the same false sense of coverage. If the operator configured
175 /// scanning, a misconfiguration must be loud at boot, not silent at runtime.
176 pub async fn assert_live(&self) -> Result<(), String> {
177 let mut live_layers: Vec<&str> = Vec::new();
178 if let Some(ref socket) = self.clamav_socket {
179 match clamav::ping(socket).await {
180 Ok(()) => live_layers.push("clamav"),
181 Err(e) => {
182 return Err(format!("ClamAV socket {socket} unreachable: {e}"));
183 }
184 }
185 }
186 if self.yara_rules.is_some() {
187 // Expected-rule-count floor: a corpus that quietly dropped below the
188 // operator-declared size (e.g. a yara-x upgrade made N rules
189 // uncompilable) is degraded coverage masquerading as a live layer.
190 // Fail boot loudly when a floor is set and we're under it.
191 if self.yara_min_rule_files > 0 && self.yara_rule_count < self.yara_min_rule_files {
192 return Err(format!(
193 "YARA corpus degraded: {} rule files compiled, below the configured \
194 floor of {} (YARA_MIN_RULE_FILES). Refusing to boot — a silently \
195 shrunken rule set is false coverage.",
196 self.yara_rule_count, self.yara_min_rule_files,
197 ));
198 }
199 live_layers.push("yara");
200 }
201 if self.malwarebazaar_enabled {
202 live_layers.push("malwarebazaar");
203 }
204 if self.urlhaus_enabled {
205 live_layers.push("urlhaus");
206 }
207 if self.metadefender_api_key.is_some() {
208 live_layers.push("metadefender");
209 }
210 if live_layers.is_empty() {
211 return Err(
212 "Scanning configured but no AV layer is live (no ClamAV socket, \
213 no YARA rules, no remote API keys). Refusing to boot — the \
214 FailOpen policy would pass every upload as Clean."
215 .to_string(),
216 );
217 }
218 tracing::info!(layers = ?live_layers, "scan pipeline live layers asserted");
219 Ok(())
220 }
221
222 /// Run all applicable scanning layers against file data.
223 ///
224 /// CPU-bound layers (sha256, content-type, structural, archive, yara) run
225 /// on a blocking-pool thread via `spawn_blocking` so they don't stall the
226 /// tokio runtime — the 4 in-process layers can each take seconds on a
227 /// 100 MB file. Network-bound layers (ClamAV, MalwareBazaar) run
228 /// concurrently with the sync block via `tokio::join!`.
229 pub async fn scan(self: std::sync::Arc<Self>, data: Vec<u8>, file_type: FileType) -> ScanResult {
230 let file_size = data.len() as u64;
231 let data = std::sync::Arc::<[u8]>::from(data);
232
233 // Sync layers + hash, off the runtime
234 let sync_data = std::sync::Arc::clone(&data);
235 let sync_self = std::sync::Arc::clone(&self);
236 let sync_fut = tokio::task::spawn_blocking(move || sync_self.run_sync_layers(&sync_data, file_type));
237
238 // Async layers — ClamAV needs the bytes, MalwareBazaar needs only the hash
239 // but the hash is computed in the sync block, so we run MalwareBazaar
240 // after the sync block returns (its endpoint is fast). ClamAV can run
241 // concurrently with the sync block.
242 let clamav_data = std::sync::Arc::clone(&data);
243 let clamav_socket = self.clamav_socket.clone();
244 let clamav_fut = async move {
245 match clamav_socket {
246 Some(socket) => clamav::scan_with_clamav(&socket, &clamav_data).await,
247 None => LayerResult {
248 layer: "clamav",
249 verdict: LayerVerdict::Skip,
250 detail: Some("ClamAV not configured".to_string()),
251 },
252 }
253 };
254
255 // Layer 7: URLhaus — extract URLs from the bytes, query hosts.
256 // Runs concurrently with the sync block (independent of the hash).
257 let urlhaus_data = std::sync::Arc::clone(&data);
258 let urlhaus_enabled = self.urlhaus_enabled;
259 let urlhaus_key = self.abuse_ch_auth_key.clone();
260 let urlhaus_fut = async move {
261 if urlhaus_enabled {
262 urlhaus::check_urlhaus(&urlhaus_data, urlhaus_key.as_deref()).await
263 } else {
264 LayerResult {
265 layer: "urlhaus",
266 verdict: LayerVerdict::Skip,
267 detail: Some("URLhaus lookups disabled".to_string()),
268 }
269 }
270 };
271
272 let (sync_result, clamav_result, urlhaus_result) =
273 tokio::join!(sync_fut, clamav_fut, urlhaus_fut);
274 let (mut layers, sha256) = sync_result
275 .expect("scan_sync spawn_blocking panicked");
276 layers.push(clamav_result);
277 layers.push(urlhaus_result);
278
279 // Layer 6: MalwareBazaar — needs the hash from the sync block
280 layers.push(if self.malwarebazaar_enabled {
281 hash_lookup::check_malwarebazaar(&sha256, self.abuse_ch_auth_key.as_deref()).await
282 } else {
283 LayerResult {
284 layer: "malwarebazaar",
285 verdict: LayerVerdict::Skip,
286 detail: Some("MalwareBazaar lookups disabled".to_string()),
287 }
288 });
289
290 // Layer 9: MetaDefender (second-opinion). Only invoked when a prior
291 // layer flagged the file as suspicious — keeps us within the free-tier
292 // quota and avoids spending budget on uncontroversial uploads.
293 layers.push(if suspicion_present(&layers) {
294 metadefender::check_metadefender(&sha256, self.metadefender_api_key.as_deref()).await
295 } else {
296 LayerResult {
297 layer: "metadefender",
298 verdict: LayerVerdict::Skip,
299 detail: Some("No prior suspicion; second-opinion not invoked".to_string()),
300 }
301 });
302
303 let status = final_status(&layers);
304
305 ScanResult {
306 status,
307 layers,
308 sha256,
309 file_size,
310 }
311 }
312
313 /// Streaming counterpart to `scan`. Runs against a spooled tempfile so
314 /// the >100 MB case doesn't hold the whole object in RAM. The CPU
315 /// layers operate on a memory mapping (`spool::mmap_read`) — pages are
316 /// demand-paged by the kernel as goblin / yara-x / archive walk them —
317 /// and ClamAV streams the file via INSTREAM frames.
318 pub async fn scan_stream(
319 self: std::sync::Arc<Self>,
320 spool: spool::SpoolHandle,
321 file_type: FileType,
322 ) -> ScanResult {
323 let file_size = std::fs::metadata(spool.path())
324 .map(|m| m.len())
325 .unwrap_or(0);
326
327 let map = match spool::mmap_read(spool.path()) {
328 Ok(m) => std::sync::Arc::new(m),
329 Err(e) => {
330 let layer = LayerResult {
331 layer: "spool",
332 verdict: LayerVerdict::Error,
333 detail: Some(e),
334 };
335 return ScanResult {
336 status: final_status(std::slice::from_ref(&layer)),
337 layers: vec![layer],
338 sha256: String::new(),
339 file_size,
340 };
341 }
342 };
343
344 let sync_map = std::sync::Arc::clone(&map);
345 let sync_self = std::sync::Arc::clone(&self);
346 let sync_fut = tokio::task::spawn_blocking(move || sync_self.run_sync_layers(&sync_map, file_type));
347
348 let clamav_socket = self.clamav_socket.clone();
349 let clamav_path = spool.path().to_path_buf();
350 let clamav_fut = async move {
351 match clamav_socket {
352 Some(socket) => match tokio::fs::File::open(&clamav_path).await {
353 Ok(file) => clamav::scan_with_clamav_stream(&socket, file).await,
354 Err(e) => LayerResult {
355 layer: "clamav",
356 verdict: LayerVerdict::Error,
357 detail: Some(format!("open spool for clamav: {e}")),
358 },
359 },
360 None => LayerResult {
361 layer: "clamav",
362 verdict: LayerVerdict::Skip,
363 detail: Some("ClamAV not configured".to_string()),
364 },
365 }
366 };
367
368 let urlhaus_map = std::sync::Arc::clone(&map);
369 let urlhaus_enabled = self.urlhaus_enabled;
370 let urlhaus_key = self.abuse_ch_auth_key.clone();
371 let urlhaus_fut = async move {
372 if urlhaus_enabled {
373 urlhaus::check_urlhaus(&urlhaus_map, urlhaus_key.as_deref()).await
374 } else {
375 LayerResult {
376 layer: "urlhaus",
377 verdict: LayerVerdict::Skip,
378 detail: Some("URLhaus lookups disabled".to_string()),
379 }
380 }
381 };
382
383 let (sync_result, clamav_result, urlhaus_result) =
384 tokio::join!(sync_fut, clamav_fut, urlhaus_fut);
385 let (mut layers, sha256) = sync_result
386 .expect("scan_stream sync spawn_blocking panicked");
387 layers.push(clamav_result);
388 layers.push(urlhaus_result);
389
390 layers.push(if self.malwarebazaar_enabled {
391 hash_lookup::check_malwarebazaar(&sha256, self.abuse_ch_auth_key.as_deref()).await
392 } else {
393 LayerResult {
394 layer: "malwarebazaar",
395 verdict: LayerVerdict::Skip,
396 detail: Some("MalwareBazaar lookups disabled".to_string()),
397 }
398 });
399
400 layers.push(if suspicion_present(&layers) {
401 metadefender::check_metadefender(&sha256, self.metadefender_api_key.as_deref()).await
402 } else {
403 LayerResult {
404 layer: "metadefender",
405 verdict: LayerVerdict::Skip,
406 detail: Some("No prior suspicion; second-opinion not invoked".to_string()),
407 }
408 });
409
410 let status = final_status(&layers);
411 drop(map);
412 drop(spool);
413
414 ScanResult {
415 status,
416 layers,
417 sha256,
418 file_size,
419 }
420 }
421
422 /// CPU-bound layers + SHA-256. Pure sync; safe to call from `spawn_blocking`.
423 fn run_sync_layers(&self, data: &[u8], file_type: FileType) -> (Vec<LayerResult>, String) {
424 let mut layers = Vec::with_capacity(5);
425
426 // SHA-256 hash for audit + MalwareBazaar lookup
427 let sha256 = {
428 let mut hasher = Sha256::new();
429 hasher.update(data);
430 hex::encode(hasher.finalize())
431 };
432
433 layers.push(content_type::verify_content_type(data, file_type));
434 layers.push(structural::analyze_binary(data, file_type));
435 layers.push(archive::check_archive_safety(data, file_type));
436 layers.push(match self.yara_rules {
437 Some(ref rules) => yara::scan_with_yara(rules, data),
438 None => LayerResult {
439 layer: "yara",
440 verdict: LayerVerdict::Skip,
441 detail: Some("No YARA rules loaded".to_string()),
442 },
443 });
444 layers.push(signing_macos::verify_apple_signature(data, file_type));
445 layers.push(signing_windows::verify_authenticode(data, file_type));
446 layers.push(signing_linux::verify_appimage_signature(data, file_type));
447
448 (layers, sha256)
449 }
450 }
451
452 #[cfg(test)]
453 mod tests {
454 use super::*;
455
456 #[test]
457 fn layer_verdict_serializes_lowercase() {
458 assert_eq!(
459 serde_json::to_string(&LayerVerdict::Pass).unwrap(),
460 "\"pass\""
461 );
462 assert_eq!(
463 serde_json::to_string(&LayerVerdict::Fail).unwrap(),
464 "\"fail\""
465 );
466 assert_eq!(
467 serde_json::to_string(&LayerVerdict::Skip).unwrap(),
468 "\"skip\""
469 );
470 assert_eq!(
471 serde_json::to_string(&LayerVerdict::Error).unwrap(),
472 "\"error\""
473 );
474 }
475
476 #[test]
477 fn scan_result_quarantined_on_any_fail() {
478 let layers = [
479 LayerResult {
480 layer: "test1",
481 verdict: LayerVerdict::Pass,
482 detail: None,
483 },
484 LayerResult {
485 layer: "test2",
486 verdict: LayerVerdict::Fail,
487 detail: Some("bad".to_string()),
488 },
489 ];
490 let has_fail = layers.iter().any(|l| l.verdict == LayerVerdict::Fail);
491 assert!(has_fail);
492 }
493
494 #[test]
495 fn sha256_computation() {
496 let mut hasher = Sha256::new();
497 hasher.update(b"hello");
498 let hash = hex::encode(hasher.finalize());
499 assert_eq!(
500 hash,
501 "2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824"
502 );
503 }
504
505 // -- Pipeline integration tests --
506
507 /// Create a minimal ScanPipeline with no external deps (no YARA, no ClamAV, no MalwareBazaar).
508 /// Wrapped in `Arc` because `scan` consumes `Arc<Self>` (see `pub async fn scan`).
509 fn make_pipeline() -> std::sync::Arc<ScanPipeline> {
510 std::sync::Arc::new(ScanPipeline {
511 yara_rules: None,
512 yara_rule_count: 0,
513 yara_min_rule_files: 0,
514 clamav_socket: None,
515 malwarebazaar_enabled: false,
516 urlhaus_enabled: false,
517 abuse_ch_auth_key: None,
518 metadefender_api_key: None,
519 })
520 }
521
522 #[tokio::test]
523 async fn pipeline_clean_download_passes() {
524 let pipeline = make_pipeline();
525 let result = pipeline.clone().scan(b"just some file content".to_vec(), FileType::Download).await;
526 assert_eq!(result.status, FileScanStatus::Clean);
527 assert_eq!(result.file_size, 22);
528 assert!(!result.sha256.is_empty());
529 assert_eq!(result.layers.len(), 11);
530 }
531
532 #[tokio::test]
533 async fn pipeline_unrecognized_audio_quarantined() {
534 let pipeline = make_pipeline();
535 // Unrecognized data claimed as audio should be rejected by content_type layer
536 let result = pipeline.clone().scan(b"audio data here".to_vec(), FileType::Audio).await;
537 assert_eq!(result.status, FileScanStatus::Quarantined);
538 }
539
540 #[tokio::test]
541 async fn pipeline_clean_cover_passes() {
542 let pipeline = make_pipeline();
543 // PNG magic bytes
544 let png = [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A];
545 let result = pipeline.clone().scan(png.to_vec(), FileType::Cover).await;
546 assert_eq!(result.status, FileScanStatus::Clean);
547 }
548
549 #[tokio::test]
550 async fn pipeline_pe_as_audio_quarantined() {
551 let pipeline = make_pipeline();
552 // PE magic bytes — content-type layer should detect application/* and fail
553 let pe_header = b"MZ\x90\x00\x03\x00\x00\x00";
554 let result = pipeline.clone().scan(pe_header.to_vec(), FileType::Audio).await;
555 assert_eq!(result.status, FileScanStatus::Quarantined);
556 // Verify content_type layer produced the fail
557 let content_type_layer = result.layers.iter().find(|l| l.layer == "content_type").unwrap();
558 assert_eq!(content_type_layer.verdict, LayerVerdict::Fail);
559 }
560
561 #[tokio::test]
562 async fn pipeline_pe_as_cover_quarantined() {
563 let pipeline = make_pipeline();
564 let pe_header = b"MZ\x90\x00\x03\x00\x00\x00";
565 let result = pipeline.clone().scan(pe_header.to_vec(), FileType::Cover).await;
566 assert_eq!(result.status, FileScanStatus::Quarantined);
567 }
568
569 #[tokio::test]
570 async fn pipeline_sha256_is_deterministic() {
571 let pipeline = make_pipeline();
572 let data = b"deterministic hash test";
573 let r1 = pipeline.clone().scan(data.to_vec(), FileType::Download).await;
574 let r2 = pipeline.clone().scan(data.to_vec(), FileType::Download).await;
575 assert_eq!(r1.sha256, r2.sha256);
576 }
577
578 #[tokio::test]
579 async fn pipeline_skips_optional_layers_when_unconfigured() {
580 let pipeline = make_pipeline();
581 let result = pipeline.clone().scan(b"test".to_vec(), FileType::Download).await;
582
583 let yara = result.layers.iter().find(|l| l.layer == "yara").unwrap();
584 assert_eq!(yara.verdict, LayerVerdict::Skip);
585
586 let clamav = result.layers.iter().find(|l| l.layer == "clamav").unwrap();
587 assert_eq!(clamav.verdict, LayerVerdict::Skip);
588
589 let mb = result.layers.iter().find(|l| l.layer == "malwarebazaar").unwrap();
590 assert_eq!(mb.verdict, LayerVerdict::Skip);
591
592 let uh = result.layers.iter().find(|l| l.layer == "urlhaus").unwrap();
593 assert_eq!(uh.verdict, LayerVerdict::Skip);
594 }
595
596 #[tokio::test]
597 async fn pipeline_always_produces_11_layers() {
598 let pipeline = make_pipeline();
599 for file_type in [FileType::Audio, FileType::Cover, FileType::Download] {
600 let result = pipeline.clone().scan(b"data".to_vec(), file_type).await;
601 assert_eq!(result.layers.len(), 11, "Expected 11 layers for {:?}", file_type);
602 }
603 }
604
605 #[test]
606 fn suspicion_present_on_fail() {
607 let layers = vec![pass("content_type"), fail("yara")];
608 assert!(suspicion_present(&layers));
609 }
610
611 #[test]
612 fn suspicion_present_on_fail_closed_error() {
613 let layers = vec![pass("content_type"), err("archive")];
614 assert!(suspicion_present(&layers));
615 }
616
617 #[test]
618 fn no_suspicion_when_fail_open_error_only() {
619 // External-layer errors are operational noise, not malware signals;
620 // they must not invoke MetaDefender.
621 let layers = vec![pass("content_type"), err("malwarebazaar"), err("urlhaus")];
622 assert!(!suspicion_present(&layers));
623 }
624
625 #[test]
626 fn no_suspicion_when_all_clean() {
627 let layers = vec![pass("content_type"), skip("yara"), pass("structural")];
628 assert!(!suspicion_present(&layers));
629 }
630
631 #[tokio::test]
632 async fn pipeline_errors_held_for_review() {
633 // Errors from fail-closed layers (archive is in-process deterministic)
634 // should hold the file for admin review.
635 let pipeline = make_pipeline();
636 // Corrupted ZIP magic bytes — archive layer returns Error
637 let mut data = vec![0x50, 0x4B, 0x03, 0x04];
638 data.extend_from_slice(&[0xFF; 100]);
639 let result = pipeline.clone().scan(data, FileType::Download).await;
640 let archive = result.layers.iter().find(|l| l.layer == "archive").unwrap();
641 assert_eq!(archive.verdict, LayerVerdict::Error);
642 assert_eq!(result.status, FileScanStatus::HeldForReview);
643 }
644
645 // -- Per-layer fail policy tests --
646
647 fn err(layer: &'static str) -> LayerResult {
648 LayerResult { layer, verdict: LayerVerdict::Error, detail: None }
649 }
650 fn pass(layer: &'static str) -> LayerResult {
651 LayerResult { layer, verdict: LayerVerdict::Pass, detail: None }
652 }
653 fn skip(layer: &'static str) -> LayerResult {
654 LayerResult { layer, verdict: LayerVerdict::Skip, detail: None }
655 }
656 fn fail(layer: &'static str) -> LayerResult {
657 LayerResult { layer, verdict: LayerVerdict::Fail, detail: None }
658 }
659
660 #[test]
661 fn final_status_clean_when_all_pass() {
662 let layers = vec![pass("content_type"), pass("structural"), pass("archive"), skip("yara"), skip("clamav"), skip("malwarebazaar")];
663 assert_eq!(final_status(&layers), FileScanStatus::Clean);
664 }
665
666 #[test]
667 fn final_status_quarantined_on_any_fail() {
668 let layers = vec![pass("content_type"), fail("yara"), skip("clamav")];
669 assert_eq!(final_status(&layers), FileScanStatus::Quarantined);
670 }
671
672 #[test]
673 fn final_status_fail_beats_error() {
674 // A Fail anywhere supersedes any Error, regardless of policy.
675 let layers = vec![err("malwarebazaar"), fail("yara")];
676 assert_eq!(final_status(&layers), FileScanStatus::Quarantined);
677 }
678
679 #[test]
680 fn final_status_held_on_fail_closed_error() {
681 // archive is FailClosed — its Error must hold the file.
682 let layers = vec![pass("content_type"), err("archive"), skip("clamav")];
683 assert_eq!(final_status(&layers), FileScanStatus::HeldForReview);
684 }
685
686 #[test]
687 fn final_status_clean_on_fail_open_error_only() {
688 // malwarebazaar is FailOpen — its Error must NOT hold the file.
689 // This is the regression of 2026-05-10 that motivated the audit.
690 let layers = vec![pass("content_type"), pass("structural"), pass("archive"), skip("yara"), skip("clamav"), err("malwarebazaar")];
691 assert_eq!(final_status(&layers), FileScanStatus::Clean);
692 }
693
694 #[test]
695 fn final_status_clean_when_all_external_layers_error() {
696 // Worst-case external-services outage: every network/daemon layer
697 // erroring at once. As long as the in-process layers pass, the file
698 // is Clean. Health is surfaced separately via per-layer monitoring.
699 let layers = vec![pass("content_type"), pass("structural"), pass("archive"), skip("yara"), err("clamav"), err("malwarebazaar")];
700 assert_eq!(final_status(&layers), FileScanStatus::Clean);
701 }
702
703 #[test]
704 fn final_status_held_on_unknown_layer_error() {
705 // Defensive default: an unknown layer name that errors falls through
706 // to FailClosed. This is what catches a new layer added without
707 // wiring its policy into `error_policy_for`.
708 let layers = vec![pass("content_type"), err("brand_new_layer_someone_forgot_to_register")];
709 assert_eq!(final_status(&layers), FileScanStatus::HeldForReview);
710 }
711
712 #[test]
713 fn error_policy_for_all_known_layers() {
714 // Every layer name produced by the pipeline must have an explicit
715 // declaration in `error_policy_for`. The default branch is reserved
716 // for genuine programmer error (new layer, forgot to register).
717 for name in ["content_type", "structural", "archive", "yara", "clamav", "malwarebazaar"] {
718 let policy = error_policy_for(name);
719 // Both values are valid; we just want this to not hit the default.
720 // If a layer is renamed without updating `error_policy_for`, this
721 // test still passes (the rename produces a new unknown name)
722 // — but the per-layer name tests below catch that.
723 let _ = policy;
724 }
725 }
726
727 #[test]
728 fn content_type_is_fail_closed() { assert_eq!(error_policy_for("content_type"), ErrorPolicy::FailClosed); }
729 #[test]
730 fn structural_is_fail_closed() { assert_eq!(error_policy_for("structural"), ErrorPolicy::FailClosed); }
731 #[test]
732 fn archive_is_fail_closed() { assert_eq!(error_policy_for("archive"), ErrorPolicy::FailClosed); }
733 #[test]
734 fn yara_is_fail_closed() { assert_eq!(error_policy_for("yara"), ErrorPolicy::FailClosed); }
735 #[test]
736 fn clamav_is_fail_open() { assert_eq!(error_policy_for("clamav"), ErrorPolicy::FailOpen); }
737 #[test]
738 fn malwarebazaar_is_fail_open() { assert_eq!(error_policy_for("malwarebazaar"), ErrorPolicy::FailOpen); }
739 #[test]
740 fn urlhaus_is_fail_open() { assert_eq!(error_policy_for("urlhaus"), ErrorPolicy::FailOpen); }
741 #[test]
742 fn signing_macos_is_fail_open() { assert_eq!(error_policy_for("signing_macos"), ErrorPolicy::FailOpen); }
743 #[test]
744 fn metadefender_is_fail_open() { assert_eq!(error_policy_for("metadefender"), ErrorPolicy::FailOpen); }
745 #[test]
746 fn signing_windows_is_fail_open() { assert_eq!(error_policy_for("signing_windows"), ErrorPolicy::FailOpen); }
747 #[test]
748 fn signing_linux_is_fail_open() { assert_eq!(error_policy_for("signing_linux"), ErrorPolicy::FailOpen); }
749 }
750