//! Layer 2: Structural analysis of PE/ELF/Mach-O binaries. //! //! Uses the `goblin` crate to inspect binary imports and sections for //! suspicious patterns commonly found in malware. use crate::storage::FileType; use super::{ErrorPolicy, LayerResult, LayerVerdict}; /// In-process deterministic layer. An `Error` here typically means the binary /// parser refused the input — fail closed so admins can inspect the file. pub const ERROR_POLICY: ErrorPolicy = ErrorPolicy::FailClosed; /// Suspicious Windows API imports commonly used in malware const SUSPICIOUS_PE_IMPORTS: &[&str] = &[ "VirtualAlloc", "VirtualAllocEx", "CreateRemoteThread", "WriteProcessMemory", "NtUnmapViewOfSection", "QueueUserAPC", "SetThreadContext", "NtCreateThreadEx", ]; /// Suspicious ELF syscall-related symbols const SUSPICIOUS_ELF_SYMBOLS: &[&str] = &[ "ptrace", "process_vm_writev", "memfd_create", ]; /// Suspicious Mach-O symbols (macOS equivalents of PE/ELF patterns). /// Note: dlopen and posix_spawn are excluded — they are ubiquitous in /// legitimate macOS apps (audio plugins, plugin hosts, runtime loaders). const SUSPICIOUS_MACHO_SYMBOLS: &[&str] = &[ "ptrace", "task_for_pid", "mach_vm_write", "mach_vm_protect", "thread_create_running", ]; /// Path-based entry. Mmaps the spooled file so goblin's parsers walk OS /// page cache rather than a heap buffer; suitable for files larger than /// `SCAN_MAX_MEMORY_BYTES`. Delegates to `analyze_binary` for the actual /// analysis. pub fn analyze_binary_path(path: &std::path::Path, file_type: FileType) -> LayerResult { if file_type != FileType::Download { return LayerResult { layer: "structural", verdict: LayerVerdict::Skip, detail: Some("Not a download file".to_string()), }; } match crate::scanning::spool::mmap_read(path) { Ok(map) => analyze_binary(&map, file_type), Err(e) => LayerResult { layer: "structural", verdict: LayerVerdict::Error, detail: Some(e), }, } } /// Analyze a binary for suspicious structural patterns. /// Only runs for Download file types; returns Skip for Audio/Cover. pub fn analyze_binary(data: &[u8], file_type: FileType) -> LayerResult { if file_type != FileType::Download { return LayerResult { layer: "structural", verdict: LayerVerdict::Skip, detail: Some("Not a download file".to_string()), }; } // Try to parse as a known binary format match goblin::Object::parse(data) { Ok(goblin::Object::PE(pe)) => analyze_pe(&pe), Ok(goblin::Object::Elf(elf)) => analyze_elf(&elf), Ok(goblin::Object::Mach(mach)) => analyze_mach(&mach), Ok(_) => { // Archive, unknown, or other format — skip structural analysis LayerResult { layer: "structural", verdict: LayerVerdict::Skip, detail: Some("Not an executable binary".to_string()), } } Err(_) => { // Not a recognized binary format — skip (not an error, just not applicable) LayerResult { layer: "structural", verdict: LayerVerdict::Skip, detail: Some("Not a recognized binary format".to_string()), } } } } fn analyze_pe(pe: &goblin::pe::PE) -> LayerResult { let import_names: Vec<&str> = pe.imports.iter().map(|i| i.name.as_ref()).collect(); let section_names: Vec = pe .sections .iter() .map(|s| { String::from_utf8_lossy(&s.name) .trim_end_matches('\0') .to_string() }) .collect(); let warnings = check_pe_warnings(&import_names, §ion_names); if warnings.is_empty() { LayerResult { layer: "structural", verdict: LayerVerdict::Pass, detail: Some("PE binary".to_string()), } } else { LayerResult { layer: "structural", verdict: LayerVerdict::Fail, detail: Some(warnings.join("; ")), } } } /// Check PE import names and section names for suspicious patterns. /// Extracted for testability — the goblin parsing layer just feeds names in. fn check_pe_warnings(import_names: &[&str], section_names: &[String]) -> Vec { let mut warnings = Vec::new(); for name in import_names { if SUSPICIOUS_PE_IMPORTS.contains(name) { warnings.push(format!("Suspicious import: {}", name)); } } for name in section_names { if name == "UPX0" || name == "UPX1" || name == "UPX2" { warnings.push(format!("Packed section detected: {}", name)); } if name == ".vmp0" || name == ".vmp1" { warnings.push(format!("VMProtect section detected: {}", name)); } } warnings } fn analyze_elf(elf: &goblin::elf::Elf) -> LayerResult { let symbol_names: Vec<&str> = elf .dynsyms .iter() .filter_map(|sym| elf.dynstrtab.get_at(sym.st_name)) .collect(); let warnings = check_elf_warnings(&symbol_names); if warnings.is_empty() { LayerResult { layer: "structural", verdict: LayerVerdict::Pass, detail: Some("ELF binary".to_string()), } } else { LayerResult { layer: "structural", verdict: LayerVerdict::Fail, detail: Some(warnings.join("; ")), } } } /// Check ELF dynamic symbol names for suspicious patterns. /// Extracted for testability. fn check_elf_warnings(symbol_names: &[&str]) -> Vec { let mut warnings = Vec::new(); for name in symbol_names { if SUSPICIOUS_ELF_SYMBOLS.contains(name) { warnings.push(format!("Suspicious symbol: {}", name)); } } warnings } fn analyze_mach(mach: &goblin::mach::Mach) -> LayerResult { let symbols: Vec = match mach { goblin::mach::Mach::Binary(macho) => collect_macho_symbols(macho), goblin::mach::Mach::Fat(fat) => { // Check first Mach-O arch in a fat binary fat.into_iter() .filter_map(|res| res.ok()) .find_map(|arch| match arch { goblin::mach::SingleArch::MachO(macho) => Some(collect_macho_symbols(&macho)), _ => None, }) .unwrap_or_default() } }; let symbol_refs: Vec<&str> = symbols.iter().map(|s| s.as_str()).collect(); let warnings = check_mach_warnings(&symbol_refs); if warnings.is_empty() { LayerResult { layer: "structural", verdict: LayerVerdict::Pass, detail: Some("Mach-O binary".to_string()), } } else { LayerResult { layer: "structural", verdict: LayerVerdict::Fail, detail: Some(warnings.join("; ")), } } } fn collect_macho_symbols(macho: &goblin::mach::MachO) -> Vec { macho .imports() .unwrap_or_default() .iter() .map(|imp| imp.name.to_string()) .collect() } /// Check Mach-O import names for suspicious patterns. /// Extracted for testability. fn check_mach_warnings(symbol_names: &[&str]) -> Vec { let mut warnings = Vec::new(); for name in symbol_names { // Mach-O imports are prefixed with underscore let stripped = name.strip_prefix('_').unwrap_or(name); if SUSPICIOUS_MACHO_SYMBOLS.contains(&stripped) { warnings.push(format!("Suspicious import: {}", stripped)); } } warnings } #[cfg(test)] mod tests { use super::*; // -- Skip behavior -- #[test] fn audio_file_skipped() { let result = analyze_binary(b"not a binary", FileType::Audio); assert_eq!(result.verdict, LayerVerdict::Skip); } #[test] fn cover_file_skipped() { let result = analyze_binary(b"not a binary", FileType::Cover); assert_eq!(result.verdict, LayerVerdict::Skip); } #[test] fn random_data_skipped() { let result = analyze_binary(b"this is just random text data", FileType::Download); assert_eq!(result.verdict, LayerVerdict::Skip); } // -- PE import detection -- #[test] fn pe_clean_imports_pass() { let imports = &["GetLastError", "CreateFileW", "ReadFile"]; let sections = &[]; let warnings = check_pe_warnings(imports, sections); assert!(warnings.is_empty()); } #[test] fn pe_virtual_alloc_detected() { let imports = &["GetLastError", "VirtualAlloc", "ReadFile"]; let warnings = check_pe_warnings(imports, &[]); assert_eq!(warnings.len(), 1); assert!(warnings[0].contains("VirtualAlloc")); } #[test] fn pe_virtual_alloc_ex_detected() { let imports = &["VirtualAllocEx"]; let warnings = check_pe_warnings(imports, &[]); assert_eq!(warnings.len(), 1); assert!(warnings[0].contains("VirtualAllocEx")); } #[test] fn pe_create_remote_thread_detected() { let imports = &["CreateRemoteThread"]; let warnings = check_pe_warnings(imports, &[]); assert_eq!(warnings.len(), 1); assert!(warnings[0].contains("CreateRemoteThread")); } #[test] fn pe_write_process_memory_detected() { let imports = &["WriteProcessMemory"]; let warnings = check_pe_warnings(imports, &[]); assert_eq!(warnings.len(), 1); assert!(warnings[0].contains("WriteProcessMemory")); } #[test] fn pe_multiple_suspicious_imports() { let imports = &[ "VirtualAlloc", "CreateRemoteThread", "WriteProcessMemory", "GetLastError", ]; let warnings = check_pe_warnings(imports, &[]); assert_eq!(warnings.len(), 3); } #[test] fn pe_nt_apis_detected() { let imports = &["NtUnmapViewOfSection", "NtCreateThreadEx"]; let warnings = check_pe_warnings(imports, &[]); assert_eq!(warnings.len(), 2); } #[test] fn pe_queue_user_apc_detected() { let imports = &["QueueUserAPC"]; let warnings = check_pe_warnings(imports, &[]); assert_eq!(warnings.len(), 1); } #[test] fn pe_set_thread_context_detected() { let imports = &["SetThreadContext"]; let warnings = check_pe_warnings(imports, &[]); assert_eq!(warnings.len(), 1); } // -- PE section detection -- #[test] fn pe_normal_sections_pass() { let sections = vec![".text".to_string(), ".data".to_string(), ".rsrc".to_string()]; let warnings = check_pe_warnings(&[], §ions); assert!(warnings.is_empty()); } #[test] fn pe_upx_sections_detected() { let sections = vec!["UPX0".to_string(), "UPX1".to_string()]; let warnings = check_pe_warnings(&[], §ions); assert_eq!(warnings.len(), 2); assert!(warnings[0].contains("Packed section")); assert!(warnings[1].contains("Packed section")); } #[test] fn pe_upx2_section_detected() { let sections = vec!["UPX2".to_string()]; let warnings = check_pe_warnings(&[], §ions); assert_eq!(warnings.len(), 1); } #[test] fn pe_vmprotect_sections_detected() { let sections = vec![".vmp0".to_string(), ".vmp1".to_string()]; let warnings = check_pe_warnings(&[], §ions); assert_eq!(warnings.len(), 2); assert!(warnings[0].contains("VMProtect")); } #[test] fn pe_mixed_suspicious_imports_and_sections() { let imports = &["VirtualAlloc", "CreateRemoteThread"]; let sections = vec!["UPX0".to_string(), ".text".to_string()]; let warnings = check_pe_warnings(imports, §ions); assert_eq!(warnings.len(), 3); // 2 imports + 1 section } // -- ELF symbol detection -- #[test] fn elf_clean_symbols_pass() { let symbols = &["printf", "malloc", "free", "exit"]; let warnings = check_elf_warnings(symbols); assert!(warnings.is_empty()); } #[test] fn elf_ptrace_detected() { let symbols = &["printf", "ptrace", "exit"]; let warnings = check_elf_warnings(symbols); assert_eq!(warnings.len(), 1); assert!(warnings[0].contains("ptrace")); } #[test] fn elf_process_vm_writev_detected() { let symbols = &["process_vm_writev"]; let warnings = check_elf_warnings(symbols); assert_eq!(warnings.len(), 1); assert!(warnings[0].contains("process_vm_writev")); } #[test] fn elf_memfd_create_detected() { let symbols = &["memfd_create"]; let warnings = check_elf_warnings(symbols); assert_eq!(warnings.len(), 1); assert!(warnings[0].contains("memfd_create")); } #[test] fn elf_multiple_suspicious_symbols() { let symbols = &["ptrace", "memfd_create", "process_vm_writev", "printf"]; let warnings = check_elf_warnings(symbols); assert_eq!(warnings.len(), 3); } #[test] fn elf_empty_symbols_pass() { let symbols: &[&str] = &[]; let warnings = check_elf_warnings(symbols); assert!(warnings.is_empty()); } // -- Exact matching (no false positives from substrings) -- #[test] fn pe_import_no_false_positive_on_substring() { // VirtualAllocExNuma should NOT match (not in suspicious list) let imports = &["VirtualAllocExNuma"]; let warnings = check_pe_warnings(imports, &[]); assert!(warnings.is_empty()); } #[test] fn pe_import_exact_match() { let imports = &["VirtualAlloc"]; let warnings = check_pe_warnings(imports, &[]); assert_eq!(warnings.len(), 1); } #[test] fn elf_symbol_no_false_positive_on_substring() { // "ptrace_scope" should NOT match (only "ptrace" is suspicious) let symbols = &["ptrace_scope"]; let warnings = check_elf_warnings(symbols); assert!(warnings.is_empty()); } #[test] fn elf_symbol_exact_match() { let symbols = &["ptrace"]; let warnings = check_elf_warnings(symbols); assert_eq!(warnings.len(), 1); } // -- Mach-O symbol detection -- #[test] fn mach_clean_symbols_pass() { let symbols = &["_printf", "_malloc", "_free", "_exit"]; let warnings = check_mach_warnings(symbols); assert!(warnings.is_empty()); } #[test] fn mach_task_for_pid_detected() { let symbols = &["_printf", "_task_for_pid", "_exit"]; let warnings = check_mach_warnings(symbols); assert_eq!(warnings.len(), 1); assert!(warnings[0].contains("task_for_pid")); } #[test] fn mach_vm_write_detected() { let symbols = &["_mach_vm_write"]; let warnings = check_mach_warnings(symbols); assert_eq!(warnings.len(), 1); assert!(warnings[0].contains("mach_vm_write")); } #[test] fn mach_multiple_suspicious_symbols() { let symbols = &["_ptrace", "_task_for_pid", "_mach_vm_write", "_printf"]; let warnings = check_mach_warnings(symbols); assert_eq!(warnings.len(), 3); } #[test] fn mach_no_underscore_prefix_still_matches() { let symbols = &["ptrace"]; let warnings = check_mach_warnings(symbols); assert_eq!(warnings.len(), 1); } #[test] fn mach_no_false_positive_on_substring() { let symbols = &["_task_for_pid_extra"]; let warnings = check_mach_warnings(symbols); assert!(warnings.is_empty()); } #[test] fn path_entry_matches_buffered_on_plain_text() { let data = b"this is not a binary"; let buffered = analyze_binary(data, FileType::Download); let tmp = tempfile::NamedTempFile::new().unwrap(); std::fs::write(tmp.path(), data).unwrap(); let path_based = analyze_binary_path(tmp.path(), FileType::Download); assert_eq!(buffered.verdict, path_based.verdict); } }