Skip to main content

max / makenotwork

16.0 KB · 524 lines History Blame Raw
1 //! Layer 2: Structural analysis of PE/ELF/Mach-O binaries.
2 //!
3 //! Uses the `goblin` crate to inspect binary imports and sections for
4 //! suspicious patterns commonly found in malware.
5
6 use crate::storage::FileType;
7
8 use super::{ErrorPolicy, LayerResult, LayerVerdict};
9
10 /// In-process deterministic layer. An `Error` here typically means the binary
11 /// parser refused the input — fail closed so admins can inspect the file.
12 pub const ERROR_POLICY: ErrorPolicy = ErrorPolicy::FailClosed;
13
14 /// Suspicious Windows API imports commonly used in malware
15 const SUSPICIOUS_PE_IMPORTS: &[&str] = &[
16 "VirtualAlloc",
17 "VirtualAllocEx",
18 "CreateRemoteThread",
19 "WriteProcessMemory",
20 "NtUnmapViewOfSection",
21 "QueueUserAPC",
22 "SetThreadContext",
23 "NtCreateThreadEx",
24 ];
25
26 /// Suspicious ELF syscall-related symbols
27 const SUSPICIOUS_ELF_SYMBOLS: &[&str] = &[
28 "ptrace",
29 "process_vm_writev",
30 "memfd_create",
31 ];
32
33 /// Suspicious Mach-O symbols (macOS equivalents of PE/ELF patterns).
34 /// Note: dlopen and posix_spawn are excluded — they are ubiquitous in
35 /// legitimate macOS apps (audio plugins, plugin hosts, runtime loaders).
36 const SUSPICIOUS_MACHO_SYMBOLS: &[&str] = &[
37 "ptrace",
38 "task_for_pid",
39 "mach_vm_write",
40 "mach_vm_protect",
41 "thread_create_running",
42 ];
43
44 /// Path-based entry. Mmaps the spooled file so goblin's parsers walk OS
45 /// page cache rather than a heap buffer; suitable for files larger than
46 /// `SCAN_MAX_MEMORY_BYTES`. Delegates to `analyze_binary` for the actual
47 /// analysis.
48 pub fn analyze_binary_path(path: &std::path::Path, file_type: FileType) -> LayerResult {
49 if file_type != FileType::Download {
50 return LayerResult {
51 layer: "structural",
52 verdict: LayerVerdict::Skip,
53 detail: Some("Not a download file".to_string()),
54 };
55 }
56 match crate::scanning::spool::mmap_read(path) {
57 Ok(map) => analyze_binary(&map, file_type),
58 Err(e) => LayerResult {
59 layer: "structural",
60 verdict: LayerVerdict::Error,
61 detail: Some(e),
62 },
63 }
64 }
65
66 /// Analyze a binary for suspicious structural patterns.
67 /// Only runs for Download file types; returns Skip for Audio/Cover.
68 pub fn analyze_binary(data: &[u8], file_type: FileType) -> LayerResult {
69 if file_type != FileType::Download {
70 return LayerResult {
71 layer: "structural",
72 verdict: LayerVerdict::Skip,
73 detail: Some("Not a download file".to_string()),
74 };
75 }
76
77 // Try to parse as a known binary format
78 match goblin::Object::parse(data) {
79 Ok(goblin::Object::PE(pe)) => analyze_pe(&pe),
80 Ok(goblin::Object::Elf(elf)) => analyze_elf(&elf),
81 Ok(goblin::Object::Mach(mach)) => analyze_mach(&mach),
82 Ok(_) => {
83 // Archive, unknown, or other format — skip structural analysis
84 LayerResult {
85 layer: "structural",
86 verdict: LayerVerdict::Skip,
87 detail: Some("Not an executable binary".to_string()),
88 }
89 }
90 Err(_) => {
91 // Not a recognized binary format — skip (not an error, just not applicable)
92 LayerResult {
93 layer: "structural",
94 verdict: LayerVerdict::Skip,
95 detail: Some("Not a recognized binary format".to_string()),
96 }
97 }
98 }
99 }
100
101 fn analyze_pe(pe: &goblin::pe::PE) -> LayerResult {
102 let import_names: Vec<&str> = pe.imports.iter().map(|i| i.name.as_ref()).collect();
103 let section_names: Vec<String> = pe
104 .sections
105 .iter()
106 .map(|s| {
107 String::from_utf8_lossy(&s.name)
108 .trim_end_matches('\0')
109 .to_string()
110 })
111 .collect();
112
113 let warnings = check_pe_warnings(&import_names, &section_names);
114
115 if warnings.is_empty() {
116 LayerResult {
117 layer: "structural",
118 verdict: LayerVerdict::Pass,
119 detail: Some("PE binary".to_string()),
120 }
121 } else {
122 LayerResult {
123 layer: "structural",
124 verdict: LayerVerdict::Fail,
125 detail: Some(warnings.join("; ")),
126 }
127 }
128 }
129
130 /// Check PE import names and section names for suspicious patterns.
131 /// Extracted for testability — the goblin parsing layer just feeds names in.
132 fn check_pe_warnings(import_names: &[&str], section_names: &[String]) -> Vec<String> {
133 let mut warnings = Vec::new();
134
135 for name in import_names {
136 if SUSPICIOUS_PE_IMPORTS.contains(name) {
137 warnings.push(format!("Suspicious import: {}", name));
138 }
139 }
140
141 for name in section_names {
142 if name == "UPX0" || name == "UPX1" || name == "UPX2" {
143 warnings.push(format!("Packed section detected: {}", name));
144 }
145 if name == ".vmp0" || name == ".vmp1" {
146 warnings.push(format!("VMProtect section detected: {}", name));
147 }
148 }
149
150 warnings
151 }
152
153 fn analyze_elf(elf: &goblin::elf::Elf) -> LayerResult {
154 let symbol_names: Vec<&str> = elf
155 .dynsyms
156 .iter()
157 .filter_map(|sym| elf.dynstrtab.get_at(sym.st_name))
158 .collect();
159
160 let warnings = check_elf_warnings(&symbol_names);
161
162 if warnings.is_empty() {
163 LayerResult {
164 layer: "structural",
165 verdict: LayerVerdict::Pass,
166 detail: Some("ELF binary".to_string()),
167 }
168 } else {
169 LayerResult {
170 layer: "structural",
171 verdict: LayerVerdict::Fail,
172 detail: Some(warnings.join("; ")),
173 }
174 }
175 }
176
177 /// Check ELF dynamic symbol names for suspicious patterns.
178 /// Extracted for testability.
179 fn check_elf_warnings(symbol_names: &[&str]) -> Vec<String> {
180 let mut warnings = Vec::new();
181
182 for name in symbol_names {
183 if SUSPICIOUS_ELF_SYMBOLS.contains(name) {
184 warnings.push(format!("Suspicious symbol: {}", name));
185 }
186 }
187
188 warnings
189 }
190
191 fn analyze_mach(mach: &goblin::mach::Mach) -> LayerResult {
192 let symbols: Vec<String> = match mach {
193 goblin::mach::Mach::Binary(macho) => collect_macho_symbols(macho),
194 goblin::mach::Mach::Fat(fat) => {
195 // Check first Mach-O arch in a fat binary
196 fat.into_iter()
197 .filter_map(|res| res.ok())
198 .find_map(|arch| match arch {
199 goblin::mach::SingleArch::MachO(macho) => Some(collect_macho_symbols(&macho)),
200 _ => None,
201 })
202 .unwrap_or_default()
203 }
204 };
205
206 let symbol_refs: Vec<&str> = symbols.iter().map(|s| s.as_str()).collect();
207 let warnings = check_mach_warnings(&symbol_refs);
208
209 if warnings.is_empty() {
210 LayerResult {
211 layer: "structural",
212 verdict: LayerVerdict::Pass,
213 detail: Some("Mach-O binary".to_string()),
214 }
215 } else {
216 LayerResult {
217 layer: "structural",
218 verdict: LayerVerdict::Fail,
219 detail: Some(warnings.join("; ")),
220 }
221 }
222 }
223
224 fn collect_macho_symbols(macho: &goblin::mach::MachO) -> Vec<String> {
225 macho
226 .imports()
227 .unwrap_or_default()
228 .iter()
229 .map(|imp| imp.name.to_string())
230 .collect()
231 }
232
233 /// Check Mach-O import names for suspicious patterns.
234 /// Extracted for testability.
235 fn check_mach_warnings(symbol_names: &[&str]) -> Vec<String> {
236 let mut warnings = Vec::new();
237
238 for name in symbol_names {
239 // Mach-O imports are prefixed with underscore
240 let stripped = name.strip_prefix('_').unwrap_or(name);
241 if SUSPICIOUS_MACHO_SYMBOLS.contains(&stripped) {
242 warnings.push(format!("Suspicious import: {}", stripped));
243 }
244 }
245
246 warnings
247 }
248
249 #[cfg(test)]
250 mod tests {
251 use super::*;
252
253 // -- Skip behavior --
254
255 #[test]
256 fn audio_file_skipped() {
257 let result = analyze_binary(b"not a binary", FileType::Audio);
258 assert_eq!(result.verdict, LayerVerdict::Skip);
259 }
260
261 #[test]
262 fn cover_file_skipped() {
263 let result = analyze_binary(b"not a binary", FileType::Cover);
264 assert_eq!(result.verdict, LayerVerdict::Skip);
265 }
266
267 #[test]
268 fn random_data_skipped() {
269 let result = analyze_binary(b"this is just random text data", FileType::Download);
270 assert_eq!(result.verdict, LayerVerdict::Skip);
271 }
272
273 // -- PE import detection --
274
275 #[test]
276 fn pe_clean_imports_pass() {
277 let imports = &["GetLastError", "CreateFileW", "ReadFile"];
278 let sections = &[];
279 let warnings = check_pe_warnings(imports, sections);
280 assert!(warnings.is_empty());
281 }
282
283 #[test]
284 fn pe_virtual_alloc_detected() {
285 let imports = &["GetLastError", "VirtualAlloc", "ReadFile"];
286 let warnings = check_pe_warnings(imports, &[]);
287 assert_eq!(warnings.len(), 1);
288 assert!(warnings[0].contains("VirtualAlloc"));
289 }
290
291 #[test]
292 fn pe_virtual_alloc_ex_detected() {
293 let imports = &["VirtualAllocEx"];
294 let warnings = check_pe_warnings(imports, &[]);
295 assert_eq!(warnings.len(), 1);
296 assert!(warnings[0].contains("VirtualAllocEx"));
297 }
298
299 #[test]
300 fn pe_create_remote_thread_detected() {
301 let imports = &["CreateRemoteThread"];
302 let warnings = check_pe_warnings(imports, &[]);
303 assert_eq!(warnings.len(), 1);
304 assert!(warnings[0].contains("CreateRemoteThread"));
305 }
306
307 #[test]
308 fn pe_write_process_memory_detected() {
309 let imports = &["WriteProcessMemory"];
310 let warnings = check_pe_warnings(imports, &[]);
311 assert_eq!(warnings.len(), 1);
312 assert!(warnings[0].contains("WriteProcessMemory"));
313 }
314
315 #[test]
316 fn pe_multiple_suspicious_imports() {
317 let imports = &[
318 "VirtualAlloc",
319 "CreateRemoteThread",
320 "WriteProcessMemory",
321 "GetLastError",
322 ];
323 let warnings = check_pe_warnings(imports, &[]);
324 assert_eq!(warnings.len(), 3);
325 }
326
327 #[test]
328 fn pe_nt_apis_detected() {
329 let imports = &["NtUnmapViewOfSection", "NtCreateThreadEx"];
330 let warnings = check_pe_warnings(imports, &[]);
331 assert_eq!(warnings.len(), 2);
332 }
333
334 #[test]
335 fn pe_queue_user_apc_detected() {
336 let imports = &["QueueUserAPC"];
337 let warnings = check_pe_warnings(imports, &[]);
338 assert_eq!(warnings.len(), 1);
339 }
340
341 #[test]
342 fn pe_set_thread_context_detected() {
343 let imports = &["SetThreadContext"];
344 let warnings = check_pe_warnings(imports, &[]);
345 assert_eq!(warnings.len(), 1);
346 }
347
348 // -- PE section detection --
349
350 #[test]
351 fn pe_normal_sections_pass() {
352 let sections = vec![".text".to_string(), ".data".to_string(), ".rsrc".to_string()];
353 let warnings = check_pe_warnings(&[], &sections);
354 assert!(warnings.is_empty());
355 }
356
357 #[test]
358 fn pe_upx_sections_detected() {
359 let sections = vec!["UPX0".to_string(), "UPX1".to_string()];
360 let warnings = check_pe_warnings(&[], &sections);
361 assert_eq!(warnings.len(), 2);
362 assert!(warnings[0].contains("Packed section"));
363 assert!(warnings[1].contains("Packed section"));
364 }
365
366 #[test]
367 fn pe_upx2_section_detected() {
368 let sections = vec!["UPX2".to_string()];
369 let warnings = check_pe_warnings(&[], &sections);
370 assert_eq!(warnings.len(), 1);
371 }
372
373 #[test]
374 fn pe_vmprotect_sections_detected() {
375 let sections = vec![".vmp0".to_string(), ".vmp1".to_string()];
376 let warnings = check_pe_warnings(&[], &sections);
377 assert_eq!(warnings.len(), 2);
378 assert!(warnings[0].contains("VMProtect"));
379 }
380
381 #[test]
382 fn pe_mixed_suspicious_imports_and_sections() {
383 let imports = &["VirtualAlloc", "CreateRemoteThread"];
384 let sections = vec!["UPX0".to_string(), ".text".to_string()];
385 let warnings = check_pe_warnings(imports, &sections);
386 assert_eq!(warnings.len(), 3); // 2 imports + 1 section
387 }
388
389 // -- ELF symbol detection --
390
391 #[test]
392 fn elf_clean_symbols_pass() {
393 let symbols = &["printf", "malloc", "free", "exit"];
394 let warnings = check_elf_warnings(symbols);
395 assert!(warnings.is_empty());
396 }
397
398 #[test]
399 fn elf_ptrace_detected() {
400 let symbols = &["printf", "ptrace", "exit"];
401 let warnings = check_elf_warnings(symbols);
402 assert_eq!(warnings.len(), 1);
403 assert!(warnings[0].contains("ptrace"));
404 }
405
406 #[test]
407 fn elf_process_vm_writev_detected() {
408 let symbols = &["process_vm_writev"];
409 let warnings = check_elf_warnings(symbols);
410 assert_eq!(warnings.len(), 1);
411 assert!(warnings[0].contains("process_vm_writev"));
412 }
413
414 #[test]
415 fn elf_memfd_create_detected() {
416 let symbols = &["memfd_create"];
417 let warnings = check_elf_warnings(symbols);
418 assert_eq!(warnings.len(), 1);
419 assert!(warnings[0].contains("memfd_create"));
420 }
421
422 #[test]
423 fn elf_multiple_suspicious_symbols() {
424 let symbols = &["ptrace", "memfd_create", "process_vm_writev", "printf"];
425 let warnings = check_elf_warnings(symbols);
426 assert_eq!(warnings.len(), 3);
427 }
428
429 #[test]
430 fn elf_empty_symbols_pass() {
431 let symbols: &[&str] = &[];
432 let warnings = check_elf_warnings(symbols);
433 assert!(warnings.is_empty());
434 }
435
436 // -- Exact matching (no false positives from substrings) --
437
438 #[test]
439 fn pe_import_no_false_positive_on_substring() {
440 // VirtualAllocExNuma should NOT match (not in suspicious list)
441 let imports = &["VirtualAllocExNuma"];
442 let warnings = check_pe_warnings(imports, &[]);
443 assert!(warnings.is_empty());
444 }
445
446 #[test]
447 fn pe_import_exact_match() {
448 let imports = &["VirtualAlloc"];
449 let warnings = check_pe_warnings(imports, &[]);
450 assert_eq!(warnings.len(), 1);
451 }
452
453 #[test]
454 fn elf_symbol_no_false_positive_on_substring() {
455 // "ptrace_scope" should NOT match (only "ptrace" is suspicious)
456 let symbols = &["ptrace_scope"];
457 let warnings = check_elf_warnings(symbols);
458 assert!(warnings.is_empty());
459 }
460
461 #[test]
462 fn elf_symbol_exact_match() {
463 let symbols = &["ptrace"];
464 let warnings = check_elf_warnings(symbols);
465 assert_eq!(warnings.len(), 1);
466 }
467
468 // -- Mach-O symbol detection --
469
470 #[test]
471 fn mach_clean_symbols_pass() {
472 let symbols = &["_printf", "_malloc", "_free", "_exit"];
473 let warnings = check_mach_warnings(symbols);
474 assert!(warnings.is_empty());
475 }
476
477 #[test]
478 fn mach_task_for_pid_detected() {
479 let symbols = &["_printf", "_task_for_pid", "_exit"];
480 let warnings = check_mach_warnings(symbols);
481 assert_eq!(warnings.len(), 1);
482 assert!(warnings[0].contains("task_for_pid"));
483 }
484
485 #[test]
486 fn mach_vm_write_detected() {
487 let symbols = &["_mach_vm_write"];
488 let warnings = check_mach_warnings(symbols);
489 assert_eq!(warnings.len(), 1);
490 assert!(warnings[0].contains("mach_vm_write"));
491 }
492
493 #[test]
494 fn mach_multiple_suspicious_symbols() {
495 let symbols = &["_ptrace", "_task_for_pid", "_mach_vm_write", "_printf"];
496 let warnings = check_mach_warnings(symbols);
497 assert_eq!(warnings.len(), 3);
498 }
499
500 #[test]
501 fn mach_no_underscore_prefix_still_matches() {
502 let symbols = &["ptrace"];
503 let warnings = check_mach_warnings(symbols);
504 assert_eq!(warnings.len(), 1);
505 }
506
507 #[test]
508 fn mach_no_false_positive_on_substring() {
509 let symbols = &["_task_for_pid_extra"];
510 let warnings = check_mach_warnings(symbols);
511 assert!(warnings.is_empty());
512 }
513
514 #[test]
515 fn path_entry_matches_buffered_on_plain_text() {
516 let data = b"this is not a binary";
517 let buffered = analyze_binary(data, FileType::Download);
518 let tmp = tempfile::NamedTempFile::new().unwrap();
519 std::fs::write(tmp.path(), data).unwrap();
520 let path_based = analyze_binary_path(tmp.path(), FileType::Download);
521 assert_eq!(buffered.verdict, path_based.verdict);
522 }
523 }
524