//! Email alerting via Postmark API. //! //! Sends alerts on health status transitions and peer disappearance/recovery. //! If no `postmark_token` is configured, alerts are logged to stdout instead. use sqlx::SqlitePool; use tracing::{info, instrument, warn}; use crate::config::AlertConfig; use crate::db; use crate::types::AlertCategory; /// WAM ticket priority for a transition into a non-operational health status. fn health_status_priority(to_status: &str) -> &'static str { match to_status { "error" | "unreachable" => "critical", "degraded" => "high", _ => "medium", } } /// WAM ticket priority for a TLS certificate that expires in `days`. fn tls_expiry_priority(days: i64) -> &'static str { if days <= 3 { "critical" } else if days <= 7 { "high" } else { "medium" } } /// WAM ticket priority for a domain registration that expires in `days`. fn whois_expiry_priority(days: i64) -> &'static str { if days <= 7 { "critical" } else if days <= 14 { "high" } else { "medium" } } /// WAM ticket priority for a stale/missing/error backup status. fn backup_status_priority(status: &str) -> &'static str { if status == "missing" { "critical" } else { "high" } } /// Human-readable detail text for a backup status alert. fn backup_status_detail(status: &str, age_hours: Option) -> String { match (status, age_hours) { ("stale", Some(hours)) => format!("last backup is {hours}h old"), ("missing", _) => "no backup files found".to_string(), ("error", _) => "backup check failed".to_string(), _ => format!("status: {status}"), } } #[derive(Clone)] pub struct Alerter { config: AlertConfig, client: reqwest::Client, pool: SqlitePool, instance_name: String, wam_url: Option, } impl Alerter { pub fn new(config: AlertConfig, pool: SqlitePool, instance_name: String) -> Self { let client = reqwest::Client::builder() .timeout(std::time::Duration::from_secs(10)) .build() .unwrap_or_default(); let wam_url = config.wam_url.clone(); Self { config, client, pool, instance_name, wam_url } } #[instrument(skip_all)] pub async fn send_health_alert( &self, target: &str, label: &str, from_status: &str, to_status: &str, error: Option<&str>, ) { let alert_key = format!("health:{target}"); if self.is_within_cooldown(&alert_key).await { info!("alert cooldown active for {alert_key}, skipping"); return; } let subject = format!("[PoM] {target}: {from_status} -> {to_status}"); let mut body = format!( "Target: {label} ({target})\n\ Status: {from_status} -> {to_status}\n\ Instance: {}\n\ Time: {}\n", self.instance_name, chrono::Utc::now().to_rfc3339(), ); if let Some(err) = error { body.push_str(&format!("Error: {err}\n")); } body.push_str("\n- PoM"); let priority = health_status_priority(to_status); self.wam_ticket(&subject, &body, priority, "pom-health", Some(target)).await; self.record_alert(&alert_key, AlertCategory::Health, Some(from_status), Some(to_status), error).await; } #[instrument(skip_all)] pub async fn send_health_recovery( &self, target: &str, label: &str, from_status: &str, ) { let alert_key = format!("health:{target}"); // No cooldown on recovery — always send let subject = format!("[PoM] {target}: recovered"); let body = format!( "Target: {label} ({target})\n\ Status: {from_status} -> operational\n\ Instance: {}\n\ Time: {}\n\n\ - PoM", self.instance_name, chrono::Utc::now().to_rfc3339(), ); self.send_email(&subject, &body).await; self.record_alert(&alert_key, AlertCategory::Recovery, Some(from_status), Some("operational"), None).await; } #[instrument(skip_all)] pub async fn send_tls_expiry_alert( &self, target: &str, host: &str, days_remaining: i64, not_after: &str, ) { let alert_key = format!("tls:{target}"); if self.is_within_cooldown(&alert_key).await { info!("alert cooldown active for {alert_key}, skipping"); return; } let subject = format!("[PoM] {target}: TLS cert expires in {days_remaining} days"); let body = format!( "Target: {target}\n\ Host: {host}\n\ Days remaining: {days_remaining}\n\ Expires: {not_after}\n\ Instance: {}\n\ Time: {}\n\n\ - PoM", self.instance_name, chrono::Utc::now().to_rfc3339(), ); let priority = tls_expiry_priority(days_remaining); self.wam_ticket(&subject, &body, priority, "pom-tls", Some(&format!("{target}:{host}"))).await; self.record_alert(&alert_key, AlertCategory::TlsExpiry, None, None, None).await; } #[instrument(skip_all)] pub async fn send_tls_error_alert( &self, target: &str, host: &str, error: &str, ) { let alert_key = format!("tls:{target}"); if self.is_within_cooldown(&alert_key).await { info!("alert cooldown active for {alert_key}, skipping"); return; } let subject = format!("[PoM] {target}: TLS check failed"); let body = format!( "Target: {target}\n\ Host: {host}\n\ Error: {error}\n\ Instance: {}\n\ Time: {}\n\n\ - PoM", self.instance_name, chrono::Utc::now().to_rfc3339(), ); self.wam_ticket(&subject, &body, "high", "pom-tls", Some(&format!("{target}:{host}"))).await; self.record_alert(&alert_key, AlertCategory::TlsError, None, None, Some(error)).await; } #[instrument(skip_all)] pub async fn send_tls_recovery( &self, target: &str, label: &str, days_remaining: i64, ) { let alert_key = format!("tls:{target}"); // No cooldown on recovery — always send let subject = format!("[PoM] {target}: TLS cert renewed"); let body = format!( "Target: {label} ({target})\n\ Days remaining: {days_remaining}\n\ Instance: {}\n\ Time: {}\n\n\ - PoM", self.instance_name, chrono::Utc::now().to_rfc3339(), ); self.send_email(&subject, &body).await; self.record_alert(&alert_key, AlertCategory::TlsRecovery, None, None, None).await; } #[instrument(skip_all)] pub async fn send_peer_missing( &self, peer_name: &str, address: &str, consecutive_failures: u32, ) { let alert_key = format!("peer:{peer_name}"); if self.is_within_cooldown(&alert_key).await { info!("alert cooldown active for {alert_key}, skipping"); return; } let subject = format!("[PoM] peer {peer_name}: missing"); let body = format!( "Peer: {peer_name}\n\ Address: {address}\n\ Consecutive failures: {consecutive_failures}\n\ Instance: {}\n\ Time: {}\n\n\ - PoM", self.instance_name, chrono::Utc::now().to_rfc3339(), ); self.wam_ticket(&subject, &body, "high", "pom-peer", Some(peer_name)).await; self.record_alert(&alert_key, AlertCategory::PeerMissing, None, None, None).await; } #[instrument(skip_all)] pub async fn send_peer_recovery( &self, peer_name: &str, address: &str, ) { let subject = format!("[PoM] peer {peer_name}: recovered"); let body = format!( "Peer: {peer_name}\n\ Address: {address}\n\ Instance: {}\n\ Time: {}\n\n\ - PoM", self.instance_name, chrono::Utc::now().to_rfc3339(), ); let alert_key = format!("peer:{peer_name}"); self.send_email(&subject, &body).await; self.record_alert(&alert_key, AlertCategory::PeerRecovery, None, None, None).await; } #[instrument(skip_all)] pub async fn send_route_failure_alert( &self, target: &str, label: &str, failed_paths: &[String], ) { let alert_key = format!("route:{target}"); if self.is_within_cooldown(&alert_key).await { info!("alert cooldown active for {alert_key}, skipping"); return; } let n = failed_paths.len(); let subject = format!("[PoM] {label}: {n} route(s) failing"); let body = format!( "Target: {label} ({target})\n\ Failed routes:\n{}\n\ Instance: {}\n\ Time: {}\n\n\ - PoM", failed_paths.iter().map(|p| format!(" - {p}")).collect::>().join("\n"), self.instance_name, chrono::Utc::now().to_rfc3339(), ); self.wam_ticket(&subject, &body, "high", "pom-routes", Some(target)).await; self.record_alert(&alert_key, AlertCategory::RouteFailure, None, None, None).await; } #[instrument(skip_all)] pub async fn send_route_recovery_alert( &self, target: &str, label: &str, recovered_paths: &[String], ) { // No cooldown on recovery — always send let alert_key = format!("route:{target}"); let subject = format!("[PoM] {label}: routes recovered"); let body = format!( "Target: {label} ({target})\n\ Recovered routes:\n{}\n\ Instance: {}\n\ Time: {}\n\n\ - PoM", recovered_paths.iter().map(|p| format!(" - {p}")).collect::>().join("\n"), self.instance_name, chrono::Utc::now().to_rfc3339(), ); self.send_email(&subject, &body).await; self.record_alert(&alert_key, AlertCategory::RouteRecovery, None, None, None).await; } #[instrument(skip_all)] pub async fn send_dns_mismatch_alert( &self, target: &str, label: &str, mismatches: &[crate::types::DnsCheckResult], ) { let alert_key = format!("dns:{target}"); if self.is_within_cooldown(&alert_key).await { info!("alert cooldown active for {alert_key}, skipping"); return; } let n = mismatches.len(); let subject = format!("[PoM] {label}: {n} DNS record(s) mismatched"); let details: Vec = mismatches .iter() .map(|m| { if let Some(ref err) = m.error { format!(" - {} {}: {err}", m.name, m.record_type) } else { format!( " - {} {}: expected {:?}, got {:?}", m.name, m.record_type, m.expected, m.actual ) } }) .collect(); let body = format!( "Target: {label} ({target})\n\ DNS mismatches:\n{}\n\ Instance: {}\n\ Time: {}\n\n\ - PoM", details.join("\n"), self.instance_name, chrono::Utc::now().to_rfc3339(), ); self.wam_ticket(&subject, &body, "high", "pom-dns", Some(target)).await; self.record_alert(&alert_key, AlertCategory::DnsMismatch, None, None, None).await; } #[instrument(skip_all)] pub async fn send_dns_recovery_alert( &self, target: &str, label: &str, ) { // No cooldown on recovery — always send let alert_key = format!("dns:{target}"); let subject = format!("[PoM] {label}: DNS records recovered"); let body = format!( "Target: {label} ({target})\n\ All DNS records now match expected values.\n\ Instance: {}\n\ Time: {}\n\n\ - PoM", self.instance_name, chrono::Utc::now().to_rfc3339(), ); self.send_email(&subject, &body).await; self.record_alert(&alert_key, AlertCategory::DnsRecovery, None, None, None).await; } #[instrument(skip_all)] pub async fn send_whois_expiry_alert( &self, target: &str, label: &str, domain: &str, days_remaining: i64, ) { let alert_key = format!("whois:{target}"); if self.is_within_cooldown(&alert_key).await { info!("alert cooldown active for {alert_key}, skipping"); return; } let subject = format!("[PoM] {label}: domain {domain} expires in {days_remaining} days"); let body = format!( "Target: {label} ({target})\n\ Domain: {domain}\n\ Days remaining: {days_remaining}\n\ Instance: {}\n\ Time: {}\n\n\ - PoM", self.instance_name, chrono::Utc::now().to_rfc3339(), ); let priority = whois_expiry_priority(days_remaining); self.wam_ticket(&subject, &body, priority, "pom-whois", Some(&format!("{target}:{domain}"))).await; self.record_alert(&alert_key, AlertCategory::WhoisExpiry, None, None, None).await; } #[instrument(skip_all)] pub async fn send_whois_error_alert( &self, target: &str, label: &str, domain: &str, error: &str, ) { let alert_key = format!("whois:{target}"); if self.is_within_cooldown(&alert_key).await { info!("alert cooldown active for {alert_key}, skipping"); return; } let subject = format!("[PoM] {label}: WHOIS check failed for {domain}"); let body = format!( "Target: {label} ({target})\n\ Domain: {domain}\n\ Error: {error}\n\ Instance: {}\n\ Time: {}\n\n\ - PoM", self.instance_name, chrono::Utc::now().to_rfc3339(), ); self.wam_ticket(&subject, &body, "high", "pom-whois", Some(&format!("{target}:{domain}"))).await; self.record_alert(&alert_key, AlertCategory::WhoisError, None, None, Some(error)).await; } #[instrument(skip_all)] pub async fn send_cors_failure_alert( &self, target: &str, label: &str, failures: &[crate::types::CorsCheckResult], ) { let alert_key = format!("cors:{target}"); if self.is_within_cooldown(&alert_key).await { info!("alert cooldown active for {alert_key}, skipping"); return; } let n = failures.len(); let subject = format!("[PoM] {label}: {n} CORS preflight(s) failing"); let details: Vec = failures .iter() .map(|f| { if let Some(ref err) = f.error { format!(" - {} {} from {}: {err}", f.method, f.url, f.origin) } else { format!(" - {} {} from {}: no CORS headers", f.method, f.url, f.origin) } }) .collect(); let body = format!( "Target: {label} ({target})\n\ CORS preflight failures:\n{}\n\ Instance: {}\n\ Time: {}\n\n\ Browser-side uploads will silently fail without CORS.\n\n\ - PoM", details.join("\n"), self.instance_name, chrono::Utc::now().to_rfc3339(), ); self.wam_ticket(&subject, &body, "high", "pom-cors", Some(target)).await; self.record_alert(&alert_key, AlertCategory::CorsFailure, None, None, None).await; } #[instrument(skip_all)] pub async fn send_cors_recovery_alert( &self, target: &str, label: &str, ) { // No cooldown on recovery — always send let alert_key = format!("cors:{target}"); let subject = format!("[PoM] {label}: CORS preflights recovered"); let body = format!( "Target: {label} ({target})\n\ All CORS preflight checks passing.\n\ Instance: {}\n\ Time: {}\n\n\ - PoM", self.instance_name, chrono::Utc::now().to_rfc3339(), ); self.send_email(&subject, &body).await; self.record_alert(&alert_key, AlertCategory::CorsRecovery, None, None, None).await; } #[instrument(skip_all)] pub async fn send_latency_drift_alert( &self, target: &str, label: &str, drift_message: &str, ) { let alert_key = format!("latency:{target}"); if self.is_within_cooldown(&alert_key).await { info!("alert cooldown active for {alert_key}, skipping"); return; } let subject = format!("[PoM] {target}: latency drift detected"); let body = format!( "Target: {label} ({target})\n\ {drift_message}\n\ Instance: {}\n\ Time: {}\n\n\ - PoM", self.instance_name, chrono::Utc::now().to_rfc3339(), ); self.wam_ticket(&subject, &body, "medium", "pom-latency", Some(target)).await; self.record_alert(&alert_key, AlertCategory::LatencyDrift, None, None, Some(drift_message)).await; } #[instrument(skip_all)] pub async fn send_latency_recovery( &self, target: &str, label: &str, ) { // No cooldown on recovery — always send let alert_key = format!("latency:{target}"); let subject = format!("[PoM] {target}: latency recovered"); let body = format!( "Target: {label} ({target})\n\ Latency returned to normal.\n\ Instance: {}\n\ Time: {}\n\n\ - PoM", self.instance_name, chrono::Utc::now().to_rfc3339(), ); self.send_email(&subject, &body).await; self.record_alert(&alert_key, AlertCategory::LatencyRecovery, None, None, None).await; } #[instrument(skip_all)] pub async fn send_test_duration_drift_alert( &self, target: &str, label: &str, drift_message: &str, ) { let alert_key = format!("test_duration:{target}"); if self.is_within_cooldown(&alert_key).await { info!("alert cooldown active for {alert_key}, skipping"); return; } let subject = format!("[PoM] {target}: test duration drift detected"); let body = format!( "Target: {label} ({target})\n\ {drift_message}\n\ Instance: {}\n\ Time: {}\n\n\ - PoM", self.instance_name, chrono::Utc::now().to_rfc3339(), ); self.wam_ticket(&subject, &body, "medium", "pom-test-duration", Some(target)).await; self.record_alert(&alert_key, AlertCategory::TestDurationDrift, None, None, Some(drift_message)).await; } #[instrument(skip_all)] pub async fn send_backup_stale_alert( &self, target: &str, label: &str, database: &str, status: &str, age_hours: Option, ) { let alert_key = format!("backup:{target}:{database}"); if self.is_within_cooldown(&alert_key).await { info!("alert cooldown active for {alert_key}, skipping"); return; } let detail = backup_status_detail(status, age_hours); let subject = format!("[PoM] {label}: {database} backup {status}"); let body = format!( "Target: {label} ({target})\n\ Database: {database}\n\ Status: {status}\n\ Detail: {detail}\n\ Instance: {}\n\ Time: {}\n\n\ - PoM", self.instance_name, chrono::Utc::now().to_rfc3339(), ); let priority = backup_status_priority(status); self.wam_ticket(&subject, &body, priority, "pom-backup", Some(&format!("{target}:{database}"))).await; self.record_alert(&alert_key, AlertCategory::BackupStale, None, Some(status), None).await; } #[instrument(skip_all)] pub async fn send_backup_recovery( &self, target: &str, label: &str, database: &str, ) { // No cooldown on recovery — always send let alert_key = format!("backup:{target}:{database}"); let subject = format!("[PoM] {label}: {database} backup recovered"); let body = format!( "Target: {label} ({target})\n\ Database: {database}\n\ Backup is now current.\n\ Instance: {}\n\ Time: {}\n\n\ - PoM", self.instance_name, chrono::Utc::now().to_rfc3339(), ); self.send_email(&subject, &body).await; self.record_alert(&alert_key, AlertCategory::BackupRecovery, None, Some("ok"), None).await; } /// Fire when the scan pipeline transitions from operational → degraded or /// unreachable. Includes the audit-doc threshold issues that fired. #[instrument(skip_all)] pub async fn send_scan_pipeline_alert( &self, target: &str, label: &str, status: &str, issues: &[String], ) { let alert_key = format!("scan_pipeline:{target}"); if self.is_within_cooldown(&alert_key).await { info!("alert cooldown active for {alert_key}, skipping"); return; } let subject = format!("[PoM] {label}: scan pipeline {status}"); let body = format!( "Target: {label} ({target})\n\ Status: {status}\n\ Issues:\n{}\n\ Instance: {}\n\ Time: {}\n\n\ Dashboard: \n\n\ - PoM", issues.iter().map(|i| format!(" - {i}")).collect::>().join("\n"), self.instance_name, chrono::Utc::now().to_rfc3339(), target, ); let priority = if status == "unreachable" { "high" } else { "medium" }; self.wam_ticket(&subject, &body, priority, "pom-scan-pipeline", Some(target)).await; self.record_alert(&alert_key, AlertCategory::ScanPipelineDegraded, None, Some(status), None).await; } /// Fire on recovery from a degraded / unreachable scan-pipeline state. #[instrument(skip_all)] pub async fn send_scan_pipeline_recovery(&self, target: &str, label: &str) { let alert_key = format!("scan_pipeline:{target}"); let subject = format!("[PoM] {label}: scan pipeline recovered"); let body = format!( "Target: {label} ({target})\n\ Scan pipeline is operational.\n\ Instance: {}\n\ Time: {}\n\n\ - PoM", self.instance_name, chrono::Utc::now().to_rfc3339(), ); self.send_email(&subject, &body).await; self.record_alert(&alert_key, AlertCategory::ScanPipelineRecovery, None, Some("operational"), None).await; } /// All monitored targets are unreachable — likely a network issue with PoM itself. #[instrument(skip_all)] pub async fn send_monitoring_offline_alert(&self, target_count: usize) { let alert_key = "monitoring:self"; if self.is_within_cooldown(alert_key).await { info!("alert cooldown active for {alert_key}, skipping"); return; } let subject = format!("[PoM] all {target_count} targets unreachable"); let body = format!( "All {target_count} monitored targets are non-operational.\n\ This likely indicates a network issue with the PoM instance itself,\n\ not an actual outage of all targets.\n\n\ Instance: {}\n\ Time: {}\n\n\ - PoM", self.instance_name, chrono::Utc::now().to_rfc3339(), ); self.wam_ticket(&subject, &body, "critical", "pom-monitoring", Some("self")).await; self.record_alert(alert_key, AlertCategory::MonitoringOffline, None, None, None).await; } /// At least one target is reachable again after a monitoring-offline event. #[instrument(skip_all)] pub async fn send_monitoring_recovery(&self) { let alert_key = "monitoring:self"; let subject = "[PoM] monitoring recovered".to_string(); let body = format!( "At least one target is reachable again.\n\ Instance: {}\n\ Time: {}\n\n\ - PoM", self.instance_name, chrono::Utc::now().to_rfc3339(), ); self.send_email(&subject, &body).await; self.record_alert(alert_key, AlertCategory::MonitoringRecovery, None, None, None).await; } async fn is_within_cooldown(&self, target: &str) -> bool { let latest = match db::get_latest_alert_for_target(&self.pool, target).await { Ok(Some(row)) => row, _ => return false, }; let sent_at = match chrono::DateTime::parse_from_rfc3339(&latest.sent_at) { Ok(dt) => dt, Err(_) => return false, }; let elapsed = chrono::Utc::now().signed_duration_since(sent_at); elapsed.num_seconds() < self.config.cooldown_secs as i64 } async fn send_email(&self, subject: &str, body: &str) { let Some(ref token) = self.config.postmark_token else { info!("[dev] alert: {subject}"); info!("[dev] {body}"); return; }; let payload = serde_json::json!({ "From": self.config.from, "To": self.config.to, "Subject": subject, "TextBody": body, }); let send_fut = self.client .post("https://api.postmarkapp.com/email") .header("X-Postmark-Server-Token", token) .header("Content-Type", "application/json") .header("Accept", "application/json") .json(&payload) .send(); // Wrap in a 30-second timeout to prevent Postmark latency from blocking // the alert task. The reqwest client has its own 10s timeout, but this // guards against DNS resolution stalls and connection pool exhaustion. match tokio::time::timeout(std::time::Duration::from_secs(30), send_fut).await { Ok(Ok(resp)) if resp.status().is_success() => { info!("alert sent: {subject}"); } Ok(Ok(resp)) => { let status = resp.status(); let text = resp.text().await.unwrap_or_default(); warn!("postmark error ({status}): {text}"); } Ok(Err(e)) => { warn!("failed to send alert: {e}"); } Err(_) => { warn!("alert send timed out after 30s: {subject}"); } } } async fn record_alert( &self, target: &str, alert_type: AlertCategory, from_status: Option<&str>, to_status: Option<&str>, error: Option<&str>, ) { let alert_type_str = alert_type.to_string(); if let Err(e) = db::insert_alert(&self.pool, target, &alert_type_str, from_status, to_status, error).await { warn!("failed to record alert: {e}"); } } /// Create a WAM ticket (best-effort, fire-and-forget). async fn wam_ticket( &self, title: &str, body: &str, priority: &str, source: &str, source_ref: Option<&str>, ) { let Some(ref base_url) = self.wam_url else { return }; let url = format!("{base_url}/tickets"); let mut payload = serde_json::json!({ "title": title, "body": body, "priority": priority, "source": source, }); if let Some(r) = source_ref { payload["source_ref"] = serde_json::json!(r); } match self.client.post(&url).json(&payload).send().await { Ok(resp) if resp.status().is_success() => { info!("WAM ticket created: {title}"); } Ok(resp) => { warn!("WAM ticket creation returned {}: {title}", resp.status()); } Err(e) => { warn!("WAM unreachable: {e}"); } } } } #[cfg(test)] mod tests { use super::*; fn test_alerter(pool: SqlitePool) -> Alerter { let config = AlertConfig { postmark_token: None, // dev mode to: "test@example.com".to_string(), from: "PoM Alerts ".to_string(), cooldown_secs: 300, wam_url: None, }; Alerter::new(config, pool, "test-instance".to_string()) } #[tokio::test] async fn cooldown_prevents_duplicate_alerts() { let pool = db::connect_in_memory().await.unwrap(); let alerter = test_alerter(pool.clone()); // First alert — not in cooldown assert!(!alerter.is_within_cooldown("health:mnw").await); // Record an alert db::insert_alert(&pool, "health:mnw", "health", Some("operational"), Some("error"), None) .await .unwrap(); // Now should be in cooldown assert!(alerter.is_within_cooldown("health:mnw").await); } #[tokio::test] async fn cooldown_does_not_affect_other_targets() { let pool = db::connect_in_memory().await.unwrap(); let alerter = test_alerter(pool.clone()); db::insert_alert(&pool, "health:mnw", "health", None, None, None) .await .unwrap(); // Different target should not be in cooldown assert!(!alerter.is_within_cooldown("health:other").await); } #[tokio::test] async fn dev_mode_does_not_send_http() { let pool = db::connect_in_memory().await.unwrap(); let alerter = test_alerter(pool.clone()); // This should log instead of making HTTP calls (no panic, no error) alerter.send_health_alert("mnw", "MakeNotWork", "operational", "error", None).await; // Verify alert was recorded in DB with the prefixed key (health:mnw), // matching the cooldown lookup key format. let latest = db::get_latest_alert_for_target(&pool, "health:mnw").await.unwrap(); assert!(latest.is_some()); let row = latest.unwrap(); assert_eq!(row.alert_type, "health"); assert_eq!(row.from_status.as_deref(), Some("operational")); assert_eq!(row.to_status.as_deref(), Some("error")); } #[tokio::test] async fn route_alert_cooldown_key() { let pool = db::connect_in_memory().await.unwrap(); let alerter = test_alerter(pool.clone()); assert!(!alerter.is_within_cooldown("route:mnw").await); alerter.send_route_failure_alert("mnw", "MakeNotWork", &["/docs/faq".to_string()]).await; assert!(alerter.is_within_cooldown("route:mnw").await); assert!(!alerter.is_within_cooldown("route:mt").await); } #[tokio::test] async fn recovery_does_not_start_cooldown_for_next_failure() { let pool = db::connect_in_memory().await.unwrap(); let alerter = test_alerter(pool.clone()); // Send a failure alert — starts cooldown alerter.send_health_alert("mnw", "MakeNotWork", "operational", "error", None).await; assert!(alerter.is_within_cooldown("health:mnw").await); // Send a recovery alert (always sends, no cooldown check) alerter.send_health_recovery("mnw", "MakeNotWork", "error").await; // The recovery alert should NOT reset cooldown for failures. // is_within_cooldown now excludes recovery-type alerts, so it checks // the original failure alert's timestamp — which is still within cooldown. assert!(alerter.is_within_cooldown("health:mnw").await); } #[tokio::test] async fn dns_alert_cooldown_key() { let pool = db::connect_in_memory().await.unwrap(); let alerter = test_alerter(pool.clone()); assert!(!alerter.is_within_cooldown("dns:mnw").await); let mismatches = vec![crate::types::DnsCheckResult { target: "mnw".to_string(), name: "makenot.work".to_string(), record_type: crate::types::DnsRecordType::A, expected: vec!["1.2.3.4".to_string()], actual: vec!["5.6.7.8".to_string()], matches: false, checked_at: chrono::Utc::now().to_rfc3339(), error: None, }]; alerter.send_dns_mismatch_alert("mnw", "MakeNotWork", &mismatches).await; assert!(alerter.is_within_cooldown("dns:mnw").await); assert!(!alerter.is_within_cooldown("dns:other").await); } #[tokio::test] async fn whois_alert_cooldown_key() { let pool = db::connect_in_memory().await.unwrap(); let alerter = test_alerter(pool.clone()); assert!(!alerter.is_within_cooldown("whois:mnw").await); alerter.send_whois_expiry_alert("mnw", "MakeNotWork", "makenot.work", 15).await; assert!(alerter.is_within_cooldown("whois:mnw").await); assert!(!alerter.is_within_cooldown("whois:other").await); } #[tokio::test] async fn health_alert_cooldown_key_matches_record_key() { let pool = db::connect_in_memory().await.unwrap(); let alerter = test_alerter(pool.clone()); // Not in cooldown initially assert!(!alerter.is_within_cooldown("health:example.com").await); // Send an alert for "example.com" alerter.send_health_alert("example.com", "Example", "operational", "error", None).await; // Same target should now be in cooldown assert!(alerter.is_within_cooldown("health:example.com").await); // Different target should NOT be in cooldown assert!(!alerter.is_within_cooldown("health:other.com").await); } #[tokio::test] async fn tls_expiry_alert_cooldown_key() { let pool = db::connect_in_memory().await.unwrap(); let alerter = test_alerter(pool.clone()); assert!(!alerter.is_within_cooldown("tls:mnw").await); alerter.send_tls_expiry_alert("mnw", "makenot.work", 10, "2026-04-01T00:00:00Z").await; assert!(alerter.is_within_cooldown("tls:mnw").await); } #[tokio::test] async fn tls_error_alert_cooldown_key() { let pool = db::connect_in_memory().await.unwrap(); let alerter = test_alerter(pool.clone()); assert!(!alerter.is_within_cooldown("tls:mnw").await); alerter.send_tls_error_alert("mnw", "makenot.work", "certificate expired").await; assert!(alerter.is_within_cooldown("tls:mnw").await); } #[tokio::test] async fn latency_drift_alert_cooldown_key() { let pool = db::connect_in_memory().await.unwrap(); let alerter = test_alerter(pool.clone()); assert!(!alerter.is_within_cooldown("latency:mnw").await); alerter.send_latency_drift_alert("mnw", "MakeNotWork", "avg 500ms, baseline 100ms").await; assert!(alerter.is_within_cooldown("latency:mnw").await); } #[tokio::test] async fn test_duration_drift_alert_cooldown_key() { let pool = db::connect_in_memory().await.unwrap(); let alerter = test_alerter(pool.clone()); assert!(!alerter.is_within_cooldown("test_duration:mnw").await); alerter.send_test_duration_drift_alert("mnw", "MakeNotWork", "drift: 120s vs 60s baseline").await; assert!(alerter.is_within_cooldown("test_duration:mnw").await); } #[tokio::test] async fn monitoring_offline_alert_cooldown_key() { let pool = db::connect_in_memory().await.unwrap(); let alerter = test_alerter(pool.clone()); assert!(!alerter.is_within_cooldown("monitoring:self").await); alerter.send_monitoring_offline_alert(3).await; assert!(alerter.is_within_cooldown("monitoring:self").await); } #[tokio::test] async fn route_recovery_does_not_start_cooldown() { let pool = db::connect_in_memory().await.unwrap(); let alerter = test_alerter(pool.clone()); alerter.send_route_recovery_alert("mnw", "MakeNotWork", &["/health".to_string()]).await; // Recovery alerts are excluded from cooldown lookups, so sending a recovery // should NOT put the key into cooldown. assert!(!alerter.is_within_cooldown("route:mnw").await); } #[tokio::test] async fn dns_recovery_does_not_start_cooldown() { let pool = db::connect_in_memory().await.unwrap(); let alerter = test_alerter(pool.clone()); alerter.send_dns_recovery_alert("mnw", "MakeNotWork").await; assert!(!alerter.is_within_cooldown("dns:mnw").await); } #[tokio::test] async fn tls_recovery_does_not_start_cooldown() { let pool = db::connect_in_memory().await.unwrap(); let alerter = test_alerter(pool.clone()); alerter.send_tls_recovery("mnw", "MakeNotWork", 90).await; assert!(!alerter.is_within_cooldown("tls:mnw").await); } // ── Pure priority/severity helpers (pin the <= boundaries) ── #[test] fn tls_expiry_priority_boundaries() { // critical: days <= 3 assert_eq!(tls_expiry_priority(-5), "critical", "negative days = already expired"); assert_eq!(tls_expiry_priority(0), "critical"); assert_eq!(tls_expiry_priority(3), "critical"); // high: 4..=7 assert_eq!(tls_expiry_priority(4), "high"); assert_eq!(tls_expiry_priority(7), "high"); // medium: > 7 assert_eq!(tls_expiry_priority(8), "medium"); assert_eq!(tls_expiry_priority(90), "medium"); } #[test] fn whois_expiry_priority_boundaries() { // critical: days <= 7 assert_eq!(whois_expiry_priority(-1), "critical"); assert_eq!(whois_expiry_priority(7), "critical"); // high: 8..=14 assert_eq!(whois_expiry_priority(8), "high"); assert_eq!(whois_expiry_priority(14), "high"); // medium: > 14 assert_eq!(whois_expiry_priority(15), "medium"); assert_eq!(whois_expiry_priority(180), "medium"); } #[test] fn backup_status_priority_missing_is_critical() { assert_eq!(backup_status_priority("missing"), "critical"); assert_eq!(backup_status_priority("stale"), "high"); assert_eq!(backup_status_priority("error"), "high"); assert_eq!(backup_status_priority("anything-else"), "high"); assert_eq!(backup_status_priority(""), "high"); } #[test] fn backup_status_detail_arms() { assert_eq!( backup_status_detail("stale", Some(12)), "last backup is 12h old" ); // `stale` with no age falls through to the default arm. assert_eq!(backup_status_detail("stale", None), "status: stale"); assert_eq!(backup_status_detail("missing", None), "no backup files found"); assert_eq!(backup_status_detail("missing", Some(5)), "no backup files found"); assert_eq!(backup_status_detail("error", None), "backup check failed"); assert_eq!(backup_status_detail("error", Some(99)), "backup check failed"); assert_eq!(backup_status_detail("weird", None), "status: weird"); } #[test] fn health_status_priority_arms() { assert_eq!(health_status_priority("error"), "critical"); assert_eq!(health_status_priority("unreachable"), "critical"); assert_eq!(health_status_priority("degraded"), "high"); // Anything else (operational, unknown values) falls through to medium. assert_eq!(health_status_priority("operational"), "medium"); assert_eq!(health_status_priority("flapping"), "medium"); assert_eq!(health_status_priority(""), "medium"); } }