Skip to main content

max / makenotwork

15.4 KB · 465 lines History Blame Raw
1 //! Layer 4: YARA rule matching via yara-x.
2 //!
3 //! Rules are compiled once at startup from all `.yar` files in the configured
4 //! rules directory. The compiled rules are stored in ScanPipeline and reused.
5
6 use std::path::Path;
7 use std::time::Duration;
8
9 use super::{ErrorPolicy, LayerResult, LayerVerdict};
10
11 /// In-process deterministic layer. A YARA scan error (timeout, rule
12 /// compilation failure at scan time) is structurally suspicious — fail closed.
13 pub const ERROR_POLICY: ErrorPolicy = ErrorPolicy::FailClosed;
14
15 /// Maximum time for a single YARA scan before it is aborted.
16 const YARA_SCAN_TIMEOUT: Duration = Duration::from_secs(30);
17
18 /// Compile all YARA rules from `.yar` files in a directory.
19 /// Returns None if the directory doesn't exist or contains no rules.
20 /// Compile every `.yar`/`.yara` file under `dir`. Returns the compiled `Rules`
21 /// (or `None` when the directory is absent / empty) alongside the count of rule
22 /// files that compiled successfully — the caller uses that count to enforce an
23 /// optional health floor (see `ScanPipeline::assert_live`).
24 pub fn compile_rules_from_dir(dir: &str) -> Result<(Option<yara_x::Rules>, usize), String> {
25 let path = Path::new(dir);
26 if !path.exists() {
27 tracing::info!(dir = %dir, "YARA rules directory not found, skipping");
28 return Ok((None, 0));
29 }
30
31 let mut compiler = yara_x::Compiler::new();
32 let mut rule_count = 0;
33 let mut skipped_count = 0;
34
35 let entries = std::fs::read_dir(path)
36 .map_err(|e| format!("Failed to read YARA rules directory: {}", e))?;
37
38 for entry in entries {
39 let entry = entry.map_err(|e| format!("Failed to read directory entry: {}", e))?;
40 let file_path = entry.path();
41
42 if file_path.extension().is_some_and(|ext| ext == "yar" || ext == "yara") {
43 let source = match std::fs::read_to_string(&file_path) {
44 Ok(s) => s,
45 Err(e) => {
46 tracing::warn!(file = %file_path.display(), error = %e, "skipping unreadable YARA rule file");
47 skipped_count += 1;
48 continue;
49 }
50 };
51
52 // Per-file fail-open. Third-party rule corpora (e.g. Florian Roth's
53 // signature-base) include rules that exercise built-in identifiers
54 // (`filename`, `filepath`, `extension`, ...) which yara-x's pure-Rust
55 // engine does not yet implement. A single such rule must not take
56 // down the whole scanner — skip the file and log the rule path so
57 // operators can audit coverage gaps.
58 match compiler.add_source(source.as_str()) {
59 Ok(_) => {
60 rule_count += 1;
61 tracing::debug!(file = %file_path.display(), "Loaded YARA rule file");
62 }
63 Err(e) => {
64 tracing::warn!(
65 file = %file_path.display(),
66 error = %e,
67 "skipping YARA rule file that yara-x cannot compile"
68 );
69 skipped_count += 1;
70 }
71 }
72 }
73 }
74
75 if skipped_count > 0 {
76 tracing::info!(skipped_count, "YARA rule files skipped due to unsupported features");
77 }
78
79 if rule_count == 0 {
80 tracing::info!(dir = %dir, "No YARA rule files found");
81 return Ok((None, 0));
82 }
83
84 let rules = compiler
85 .build();
86
87 tracing::info!(rule_count, skipped_count, dir = %dir, "YARA rules compiled");
88 Ok((Some(rules), rule_count))
89 }
90
91 /// Scan file data against compiled YARA rules.
92 pub fn scan_with_yara(rules: &yara_x::Rules, data: &[u8]) -> LayerResult {
93 let mut scanner = yara_x::Scanner::new(rules);
94 scanner.set_timeout(YARA_SCAN_TIMEOUT);
95
96 let scan_results = match scanner.scan(data) {
97 Ok(results) => results,
98 Err(e) => {
99 return LayerResult {
100 layer: "yara",
101 verdict: LayerVerdict::Error,
102 detail: Some(format!("YARA scan failed: {}", e)),
103 };
104 }
105 };
106
107 let matching_rules: Vec<String> = scan_results
108 .matching_rules()
109 .map(|rule| {
110 let ns = rule.namespace();
111 let name = rule.identifier();
112 if ns == "default" {
113 name.to_string()
114 } else {
115 format!("{}:{}", ns, name)
116 }
117 })
118 .collect();
119
120 if matching_rules.is_empty() {
121 LayerResult {
122 layer: "yara",
123 verdict: LayerVerdict::Pass,
124 detail: None,
125 }
126 } else {
127 LayerResult {
128 layer: "yara",
129 verdict: LayerVerdict::Fail,
130 detail: Some(format!("Matched rules: {}", matching_rules.join(", "))),
131 }
132 }
133 }
134
135 /// Scan a spooled file against YARA rules. Reads the file into memory
136 /// (yara-x's `Scanner::scan` takes a byte slice). Path-based entry exists
137 /// so the streaming code path has a clean call site even though it does
138 /// not yet save on memory; the win comes when yara-x exposes mmap input.
139 pub fn scan_with_yara_path(rules: &yara_x::Rules, path: &std::path::Path) -> LayerResult {
140 match std::fs::read(path) {
141 Ok(data) => scan_with_yara(rules, &data),
142 Err(e) => LayerResult {
143 layer: "yara",
144 verdict: LayerVerdict::Error,
145 detail: Some(format!("read spool {}: {e}", path.display())),
146 },
147 }
148 }
149
150 #[cfg(test)]
151 mod tests {
152 use super::*;
153
154 #[test]
155 fn no_rules_dir_returns_none() {
156 let result = compile_rules_from_dir("/nonexistent/path/to/rules");
157 assert!(result.is_ok());
158 assert!(result.unwrap().0.is_none());
159 }
160
161 #[test]
162 fn clean_data_passes() {
163 // Compile a simple test rule that looks for "MALWARE_SIGNATURE"
164 let mut compiler = yara_x::Compiler::new();
165 compiler
166 .add_source(
167 r#"
168 rule test_malware {
169 strings:
170 $sig = "MALWARE_SIGNATURE"
171 condition:
172 $sig
173 }
174 "#,
175 )
176 .unwrap();
177 let rules = compiler.build();
178
179 let result = scan_with_yara(&rules, b"this is clean data");
180 assert_eq!(result.verdict, LayerVerdict::Pass);
181 }
182
183 #[test]
184 fn malicious_data_fails() {
185 let mut compiler = yara_x::Compiler::new();
186 compiler
187 .add_source(
188 r#"
189 rule test_malware {
190 strings:
191 $sig = "MALWARE_SIGNATURE"
192 condition:
193 $sig
194 }
195 "#,
196 )
197 .unwrap();
198 let rules = compiler.build();
199
200 let result = scan_with_yara(&rules, b"contains MALWARE_SIGNATURE inside");
201 assert_eq!(result.verdict, LayerVerdict::Fail);
202 assert!(result.detail.unwrap().contains("test_malware"));
203 }
204
205 #[test]
206 fn path_entry_matches_buffered() {
207 let mut compiler = yara_x::Compiler::new();
208 compiler
209 .add_source(
210 r#"
211 rule test_malware {
212 strings:
213 $sig = "MALWARE_SIGNATURE"
214 condition:
215 $sig
216 }
217 "#,
218 )
219 .unwrap();
220 let rules = compiler.build();
221
222 for sample in [&b"clean bytes"[..], &b"hit MALWARE_SIGNATURE here"[..]] {
223 let buffered = scan_with_yara(&rules, sample);
224 let tmp = tempfile::NamedTempFile::new().unwrap();
225 std::fs::write(tmp.path(), sample).unwrap();
226 let path_based = scan_with_yara_path(&rules, tmp.path());
227 assert_eq!(buffered.verdict, path_based.verdict);
228 }
229 }
230
231 // ── Adversarial tests (test-fuzz) ──
232
233 #[test]
234 fn empty_data_passes() {
235 let mut compiler = yara_x::Compiler::new();
236 compiler
237 .add_source(
238 r#"
239 rule test_malware {
240 strings:
241 $sig = "MALWARE_SIGNATURE"
242 condition:
243 $sig
244 }
245 "#,
246 )
247 .unwrap();
248 let rules = compiler.build();
249
250 let result = scan_with_yara(&rules, b"");
251 assert_eq!(result.verdict, LayerVerdict::Pass);
252 }
253
254 #[test]
255 fn multiple_rules_all_reported() {
256 let mut compiler = yara_x::Compiler::new();
257 compiler
258 .add_source(
259 r#"
260 rule rule_alpha {
261 strings:
262 $a = "ALPHA"
263 condition:
264 $a
265 }
266 rule rule_beta {
267 strings:
268 $b = "BETA"
269 condition:
270 $b
271 }
272 "#,
273 )
274 .unwrap();
275 let rules = compiler.build();
276
277 let result = scan_with_yara(&rules, b"ALPHA and BETA are both here");
278 assert_eq!(result.verdict, LayerVerdict::Fail);
279 let detail = result.detail.unwrap();
280 assert!(detail.contains("rule_alpha"), "Missing rule_alpha in: {}", detail);
281 assert!(detail.contains("rule_beta"), "Missing rule_beta in: {}", detail);
282 }
283
284 #[test]
285 fn partial_match_does_not_trigger() {
286 let mut compiler = yara_x::Compiler::new();
287 compiler
288 .add_source(
289 r#"
290 rule test_exact {
291 strings:
292 $sig = "EXACT_MATCH"
293 condition:
294 $sig
295 }
296 "#,
297 )
298 .unwrap();
299 let rules = compiler.build();
300
301 // Partial overlap should not match
302 let result = scan_with_yara(&rules, b"EXACT_MATC");
303 assert_eq!(result.verdict, LayerVerdict::Pass);
304 }
305
306 #[test]
307 fn large_clean_data_passes() {
308 let mut compiler = yara_x::Compiler::new();
309 compiler
310 .add_source(
311 r#"
312 rule test_sig {
313 strings:
314 $sig = "DANGEROUS"
315 condition:
316 $sig
317 }
318 "#,
319 )
320 .unwrap();
321 let rules = compiler.build();
322
323 // 1MB of clean data
324 let data = vec![b'A'; 1_000_000];
325 let result = scan_with_yara(&rules, &data);
326 assert_eq!(result.verdict, LayerVerdict::Pass);
327 }
328
329 #[test]
330 fn signature_at_end_of_data() {
331 let mut compiler = yara_x::Compiler::new();
332 compiler
333 .add_source(
334 r#"
335 rule end_sig {
336 strings:
337 $sig = "TAIL"
338 condition:
339 $sig
340 }
341 "#,
342 )
343 .unwrap();
344 let rules = compiler.build();
345
346 let mut data = vec![b'X'; 10000];
347 data.extend_from_slice(b"TAIL");
348 let result = scan_with_yara(&rules, &data);
349 assert_eq!(result.verdict, LayerVerdict::Fail);
350 assert!(result.detail.unwrap().contains("end_sig"));
351 }
352
353 #[test]
354 fn compile_rules_from_real_temp_dir() {
355 let dir = tempfile::tempdir().unwrap();
356
357 // Write a valid YARA rule file
358 std::fs::write(
359 dir.path().join("test.yar"),
360 r#"
361 rule hello_world {
362 strings:
363 $hw = "Hello, World!"
364 condition:
365 $hw
366 }
367 "#,
368 )
369 .unwrap();
370
371 let result = compile_rules_from_dir(dir.path().to_str().unwrap());
372 assert!(result.is_ok());
373 let (rules, count) = result.unwrap();
374 assert!(rules.is_some(), "Should have compiled one rule");
375 assert_eq!(count, 1, "exactly one rule file compiled");
376
377 // Verify the rules work
378 let rules = rules.unwrap();
379 let scan = scan_with_yara(&rules, b"Hello, World!");
380 assert_eq!(scan.verdict, LayerVerdict::Fail);
381 assert!(scan.detail.unwrap().contains("hello_world"));
382 }
383
384 #[test]
385 fn empty_dir_returns_none() {
386 let dir = tempfile::tempdir().unwrap();
387 let result = compile_rules_from_dir(dir.path().to_str().unwrap());
388 assert!(result.is_ok());
389 assert!(result.unwrap().0.is_none());
390 }
391
392 #[test]
393 fn non_yar_files_ignored() {
394 let dir = tempfile::tempdir().unwrap();
395 std::fs::write(dir.path().join("readme.txt"), "not a rule").unwrap();
396 std::fs::write(dir.path().join("rules.json"), "{}").unwrap();
397
398 let result = compile_rules_from_dir(dir.path().to_str().unwrap());
399 assert!(result.is_ok());
400 assert!(result.unwrap().0.is_none());
401 }
402
403 #[test]
404 fn invalid_yara_rule_is_skipped_not_fatal() {
405 // Per-file fail-open: a single uncompilable rule (e.g. one using a
406 // yara-x-unsupported built-in identifier from a third-party corpus)
407 // must not abort the entire scanner. The file is logged and skipped.
408 let dir = tempfile::tempdir().unwrap();
409 std::fs::write(dir.path().join("bad.yar"), "this is not valid YARA syntax").unwrap();
410
411 let result = compile_rules_from_dir(dir.path().to_str().unwrap());
412 assert!(result.is_ok(), "skipped-on-error, not aborted");
413 // No valid rules in the dir, so the function returns Ok(None) — the
414 // pipeline interprets None as "yara not configured" → Skip verdict.
415 assert!(result.unwrap().0.is_none());
416 }
417
418 #[test]
419 fn mixed_valid_and_invalid_rules_keeps_valid() {
420 let dir = tempfile::tempdir().unwrap();
421 std::fs::write(
422 dir.path().join("good.yar"),
423 r#"rule clean_test { strings: $s = "marker" condition: $s }"#,
424 ).unwrap();
425 std::fs::write(dir.path().join("bad.yar"), "not yara at all").unwrap();
426
427 let result = compile_rules_from_dir(dir.path().to_str().unwrap());
428 assert!(result.is_ok());
429 let (rules, count) = result.unwrap();
430 assert!(rules.is_some(), "valid rules still compile when bad files are present");
431 assert_eq!(count, 1, "the one valid file compiled; the bad one was skipped");
432 }
433
434 #[test]
435 fn default_namespace_rule_is_unprefixed() {
436 // Catches the L79 `==` → `!=` mutation. The function emits "rule" for
437 // default-namespace rules but "ns:rule" otherwise. Under the mutant the
438 // prefix logic inverts. Existing tests check `detail.contains("rule_x")`
439 // which is also true for "default:rule_x", so they don't catch the flip.
440 // Pin the exact format.
441 let mut compiler = yara_x::Compiler::new();
442 compiler
443 .add_source(
444 r#"
445 rule plain {
446 strings:
447 $a = "TARGET"
448 condition:
449 $a
450 }
451 "#,
452 )
453 .unwrap();
454 let rules = compiler.build();
455 let result = scan_with_yara(&rules, b"TARGET in data");
456 let detail = result.detail.unwrap();
457 // Default-namespace rule must appear unprefixed.
458 assert!(detail.contains("plain"), "missing rule name: {detail}");
459 assert!(
460 !detail.contains("default:"),
461 "default-namespace rules must not be prefixed; got {detail}"
462 );
463 }
464 }
465