//! Layer 4: YARA rule matching via yara-x. //! //! Rules are compiled once at startup from all `.yar` files in the configured //! rules directory. The compiled rules are stored in ScanPipeline and reused. use std::path::Path; use std::time::Duration; use super::{ErrorPolicy, LayerResult, LayerVerdict}; /// In-process deterministic layer. A YARA scan error (timeout, rule /// compilation failure at scan time) is structurally suspicious — fail closed. pub const ERROR_POLICY: ErrorPolicy = ErrorPolicy::FailClosed; /// Maximum time for a single YARA scan before it is aborted. const YARA_SCAN_TIMEOUT: Duration = Duration::from_secs(30); /// Compile all YARA rules from `.yar` files in a directory. /// Returns None if the directory doesn't exist or contains no rules. /// Compile every `.yar`/`.yara` file under `dir`. Returns the compiled `Rules` /// (or `None` when the directory is absent / empty) alongside the count of rule /// files that compiled successfully — the caller uses that count to enforce an /// optional health floor (see `ScanPipeline::assert_live`). pub fn compile_rules_from_dir(dir: &str) -> Result<(Option, usize), String> { let path = Path::new(dir); if !path.exists() { tracing::info!(dir = %dir, "YARA rules directory not found, skipping"); return Ok((None, 0)); } let mut compiler = yara_x::Compiler::new(); let mut rule_count = 0; let mut skipped_count = 0; let entries = std::fs::read_dir(path) .map_err(|e| format!("Failed to read YARA rules directory: {}", e))?; for entry in entries { let entry = entry.map_err(|e| format!("Failed to read directory entry: {}", e))?; let file_path = entry.path(); if file_path.extension().is_some_and(|ext| ext == "yar" || ext == "yara") { let source = match std::fs::read_to_string(&file_path) { Ok(s) => s, Err(e) => { tracing::warn!(file = %file_path.display(), error = %e, "skipping unreadable YARA rule file"); skipped_count += 1; continue; } }; // Per-file fail-open. Third-party rule corpora (e.g. Florian Roth's // signature-base) include rules that exercise built-in identifiers // (`filename`, `filepath`, `extension`, ...) which yara-x's pure-Rust // engine does not yet implement. A single such rule must not take // down the whole scanner — skip the file and log the rule path so // operators can audit coverage gaps. match compiler.add_source(source.as_str()) { Ok(_) => { rule_count += 1; tracing::debug!(file = %file_path.display(), "Loaded YARA rule file"); } Err(e) => { tracing::warn!( file = %file_path.display(), error = %e, "skipping YARA rule file that yara-x cannot compile" ); skipped_count += 1; } } } } if skipped_count > 0 { tracing::info!(skipped_count, "YARA rule files skipped due to unsupported features"); } if rule_count == 0 { tracing::info!(dir = %dir, "No YARA rule files found"); return Ok((None, 0)); } let rules = compiler .build(); tracing::info!(rule_count, skipped_count, dir = %dir, "YARA rules compiled"); Ok((Some(rules), rule_count)) } /// Scan file data against compiled YARA rules. pub fn scan_with_yara(rules: &yara_x::Rules, data: &[u8]) -> LayerResult { let mut scanner = yara_x::Scanner::new(rules); scanner.set_timeout(YARA_SCAN_TIMEOUT); let scan_results = match scanner.scan(data) { Ok(results) => results, Err(e) => { return LayerResult { layer: "yara", verdict: LayerVerdict::Error, detail: Some(format!("YARA scan failed: {}", e)), }; } }; let matching_rules: Vec = scan_results .matching_rules() .map(|rule| { let ns = rule.namespace(); let name = rule.identifier(); if ns == "default" { name.to_string() } else { format!("{}:{}", ns, name) } }) .collect(); if matching_rules.is_empty() { LayerResult { layer: "yara", verdict: LayerVerdict::Pass, detail: None, } } else { LayerResult { layer: "yara", verdict: LayerVerdict::Fail, detail: Some(format!("Matched rules: {}", matching_rules.join(", "))), } } } /// Scan a spooled file against YARA rules. Reads the file into memory /// (yara-x's `Scanner::scan` takes a byte slice). Path-based entry exists /// so the streaming code path has a clean call site even though it does /// not yet save on memory; the win comes when yara-x exposes mmap input. pub fn scan_with_yara_path(rules: &yara_x::Rules, path: &std::path::Path) -> LayerResult { match std::fs::read(path) { Ok(data) => scan_with_yara(rules, &data), Err(e) => LayerResult { layer: "yara", verdict: LayerVerdict::Error, detail: Some(format!("read spool {}: {e}", path.display())), }, } } #[cfg(test)] mod tests { use super::*; #[test] fn no_rules_dir_returns_none() { let result = compile_rules_from_dir("/nonexistent/path/to/rules"); assert!(result.is_ok()); assert!(result.unwrap().0.is_none()); } #[test] fn clean_data_passes() { // Compile a simple test rule that looks for "MALWARE_SIGNATURE" let mut compiler = yara_x::Compiler::new(); compiler .add_source( r#" rule test_malware { strings: $sig = "MALWARE_SIGNATURE" condition: $sig } "#, ) .unwrap(); let rules = compiler.build(); let result = scan_with_yara(&rules, b"this is clean data"); assert_eq!(result.verdict, LayerVerdict::Pass); } #[test] fn malicious_data_fails() { let mut compiler = yara_x::Compiler::new(); compiler .add_source( r#" rule test_malware { strings: $sig = "MALWARE_SIGNATURE" condition: $sig } "#, ) .unwrap(); let rules = compiler.build(); let result = scan_with_yara(&rules, b"contains MALWARE_SIGNATURE inside"); assert_eq!(result.verdict, LayerVerdict::Fail); assert!(result.detail.unwrap().contains("test_malware")); } #[test] fn path_entry_matches_buffered() { let mut compiler = yara_x::Compiler::new(); compiler .add_source( r#" rule test_malware { strings: $sig = "MALWARE_SIGNATURE" condition: $sig } "#, ) .unwrap(); let rules = compiler.build(); for sample in [&b"clean bytes"[..], &b"hit MALWARE_SIGNATURE here"[..]] { let buffered = scan_with_yara(&rules, sample); let tmp = tempfile::NamedTempFile::new().unwrap(); std::fs::write(tmp.path(), sample).unwrap(); let path_based = scan_with_yara_path(&rules, tmp.path()); assert_eq!(buffered.verdict, path_based.verdict); } } // ── Adversarial tests (test-fuzz) ── #[test] fn empty_data_passes() { let mut compiler = yara_x::Compiler::new(); compiler .add_source( r#" rule test_malware { strings: $sig = "MALWARE_SIGNATURE" condition: $sig } "#, ) .unwrap(); let rules = compiler.build(); let result = scan_with_yara(&rules, b""); assert_eq!(result.verdict, LayerVerdict::Pass); } #[test] fn multiple_rules_all_reported() { let mut compiler = yara_x::Compiler::new(); compiler .add_source( r#" rule rule_alpha { strings: $a = "ALPHA" condition: $a } rule rule_beta { strings: $b = "BETA" condition: $b } "#, ) .unwrap(); let rules = compiler.build(); let result = scan_with_yara(&rules, b"ALPHA and BETA are both here"); assert_eq!(result.verdict, LayerVerdict::Fail); let detail = result.detail.unwrap(); assert!(detail.contains("rule_alpha"), "Missing rule_alpha in: {}", detail); assert!(detail.contains("rule_beta"), "Missing rule_beta in: {}", detail); } #[test] fn partial_match_does_not_trigger() { let mut compiler = yara_x::Compiler::new(); compiler .add_source( r#" rule test_exact { strings: $sig = "EXACT_MATCH" condition: $sig } "#, ) .unwrap(); let rules = compiler.build(); // Partial overlap should not match let result = scan_with_yara(&rules, b"EXACT_MATC"); assert_eq!(result.verdict, LayerVerdict::Pass); } #[test] fn large_clean_data_passes() { let mut compiler = yara_x::Compiler::new(); compiler .add_source( r#" rule test_sig { strings: $sig = "DANGEROUS" condition: $sig } "#, ) .unwrap(); let rules = compiler.build(); // 1MB of clean data let data = vec![b'A'; 1_000_000]; let result = scan_with_yara(&rules, &data); assert_eq!(result.verdict, LayerVerdict::Pass); } #[test] fn signature_at_end_of_data() { let mut compiler = yara_x::Compiler::new(); compiler .add_source( r#" rule end_sig { strings: $sig = "TAIL" condition: $sig } "#, ) .unwrap(); let rules = compiler.build(); let mut data = vec![b'X'; 10000]; data.extend_from_slice(b"TAIL"); let result = scan_with_yara(&rules, &data); assert_eq!(result.verdict, LayerVerdict::Fail); assert!(result.detail.unwrap().contains("end_sig")); } #[test] fn compile_rules_from_real_temp_dir() { let dir = tempfile::tempdir().unwrap(); // Write a valid YARA rule file std::fs::write( dir.path().join("test.yar"), r#" rule hello_world { strings: $hw = "Hello, World!" condition: $hw } "#, ) .unwrap(); let result = compile_rules_from_dir(dir.path().to_str().unwrap()); assert!(result.is_ok()); let (rules, count) = result.unwrap(); assert!(rules.is_some(), "Should have compiled one rule"); assert_eq!(count, 1, "exactly one rule file compiled"); // Verify the rules work let rules = rules.unwrap(); let scan = scan_with_yara(&rules, b"Hello, World!"); assert_eq!(scan.verdict, LayerVerdict::Fail); assert!(scan.detail.unwrap().contains("hello_world")); } #[test] fn empty_dir_returns_none() { let dir = tempfile::tempdir().unwrap(); let result = compile_rules_from_dir(dir.path().to_str().unwrap()); assert!(result.is_ok()); assert!(result.unwrap().0.is_none()); } #[test] fn non_yar_files_ignored() { let dir = tempfile::tempdir().unwrap(); std::fs::write(dir.path().join("readme.txt"), "not a rule").unwrap(); std::fs::write(dir.path().join("rules.json"), "{}").unwrap(); let result = compile_rules_from_dir(dir.path().to_str().unwrap()); assert!(result.is_ok()); assert!(result.unwrap().0.is_none()); } #[test] fn invalid_yara_rule_is_skipped_not_fatal() { // Per-file fail-open: a single uncompilable rule (e.g. one using a // yara-x-unsupported built-in identifier from a third-party corpus) // must not abort the entire scanner. The file is logged and skipped. let dir = tempfile::tempdir().unwrap(); std::fs::write(dir.path().join("bad.yar"), "this is not valid YARA syntax").unwrap(); let result = compile_rules_from_dir(dir.path().to_str().unwrap()); assert!(result.is_ok(), "skipped-on-error, not aborted"); // No valid rules in the dir, so the function returns Ok(None) — the // pipeline interprets None as "yara not configured" → Skip verdict. assert!(result.unwrap().0.is_none()); } #[test] fn mixed_valid_and_invalid_rules_keeps_valid() { let dir = tempfile::tempdir().unwrap(); std::fs::write( dir.path().join("good.yar"), r#"rule clean_test { strings: $s = "marker" condition: $s }"#, ).unwrap(); std::fs::write(dir.path().join("bad.yar"), "not yara at all").unwrap(); let result = compile_rules_from_dir(dir.path().to_str().unwrap()); assert!(result.is_ok()); let (rules, count) = result.unwrap(); assert!(rules.is_some(), "valid rules still compile when bad files are present"); assert_eq!(count, 1, "the one valid file compiled; the bad one was skipped"); } #[test] fn default_namespace_rule_is_unprefixed() { // Catches the L79 `==` → `!=` mutation. The function emits "rule" for // default-namespace rules but "ns:rule" otherwise. Under the mutant the // prefix logic inverts. Existing tests check `detail.contains("rule_x")` // which is also true for "default:rule_x", so they don't catch the flip. // Pin the exact format. let mut compiler = yara_x::Compiler::new(); compiler .add_source( r#" rule plain { strings: $a = "TARGET" condition: $a } "#, ) .unwrap(); let rules = compiler.build(); let result = scan_with_yara(&rules, b"TARGET in data"); let detail = result.detail.unwrap(); // Default-namespace rule must appear unprefixed. assert!(detail.contains("plain"), "missing rule name: {detail}"); assert!( !detail.contains("default:"), "default-namespace rules must not be prefixed; got {detail}" ); } }