Skip to main content

max / pom

29.8 KB · 888 lines History Blame Raw
1 //! Email alerting via Postmark API.
2 //!
3 //! Sends alerts on health status transitions and peer disappearance/recovery.
4 //! If no `postmark_token` is configured, alerts are logged to stdout instead.
5
6 use sqlx::SqlitePool;
7 use tracing::{info, instrument, warn};
8
9 use crate::config::AlertConfig;
10 use crate::db;
11 use crate::types::AlertCategory;
12
13 #[derive(Clone)]
14 pub struct Alerter {
15 config: AlertConfig,
16 client: reqwest::Client,
17 pool: SqlitePool,
18 instance_name: String,
19 }
20
21 impl Alerter {
22 pub fn new(config: AlertConfig, pool: SqlitePool, instance_name: String) -> Self {
23 let client = reqwest::Client::builder()
24 .timeout(std::time::Duration::from_secs(10))
25 .build()
26 .unwrap_or_default();
27 Self { config, client, pool, instance_name }
28 }
29
30 #[instrument(skip_all)]
31 pub async fn send_health_alert(
32 &self,
33 target: &str,
34 label: &str,
35 from_status: &str,
36 to_status: &str,
37 error: Option<&str>,
38 ) {
39 let alert_key = format!("health:{target}");
40 if self.is_within_cooldown(&alert_key).await {
41 info!("alert cooldown active for {alert_key}, skipping");
42 return;
43 }
44
45 let subject = format!("[PoM] {target}: {from_status} -> {to_status}");
46 let mut body = format!(
47 "Target: {label} ({target})\n\
48 Status: {from_status} -> {to_status}\n\
49 Instance: {}\n\
50 Time: {}\n",
51 self.instance_name,
52 chrono::Utc::now().to_rfc3339(),
53 );
54 if let Some(err) = error {
55 body.push_str(&format!("Error: {err}\n"));
56 }
57 body.push_str("\n- PoM");
58
59 self.send_email(&subject, &body).await;
60 self.record_alert(&alert_key, AlertCategory::Health, Some(from_status), Some(to_status), error).await;
61 }
62
63 #[instrument(skip_all)]
64 pub async fn send_health_recovery(
65 &self,
66 target: &str,
67 label: &str,
68 from_status: &str,
69 ) {
70 let alert_key = format!("health:{target}");
71 // No cooldown on recovery — always send
72 let subject = format!("[PoM] {target}: recovered");
73 let body = format!(
74 "Target: {label} ({target})\n\
75 Status: {from_status} -> operational\n\
76 Instance: {}\n\
77 Time: {}\n\n\
78 - PoM",
79 self.instance_name,
80 chrono::Utc::now().to_rfc3339(),
81 );
82
83 self.send_email(&subject, &body).await;
84 self.record_alert(&alert_key, AlertCategory::Recovery, Some(from_status), Some("operational"), None).await;
85 }
86
87 #[instrument(skip_all)]
88 pub async fn send_tls_expiry_alert(
89 &self,
90 target: &str,
91 host: &str,
92 days_remaining: i64,
93 not_after: &str,
94 ) {
95 let alert_key = format!("tls:{target}");
96 if self.is_within_cooldown(&alert_key).await {
97 info!("alert cooldown active for {alert_key}, skipping");
98 return;
99 }
100
101 let subject = format!("[PoM] {target}: TLS cert expires in {days_remaining} days");
102 let body = format!(
103 "Target: {target}\n\
104 Host: {host}\n\
105 Days remaining: {days_remaining}\n\
106 Expires: {not_after}\n\
107 Instance: {}\n\
108 Time: {}\n\n\
109 - PoM",
110 self.instance_name,
111 chrono::Utc::now().to_rfc3339(),
112 );
113
114 self.send_email(&subject, &body).await;
115 self.record_alert(&alert_key, AlertCategory::TlsExpiry, None, None, None).await;
116 }
117
118 #[instrument(skip_all)]
119 pub async fn send_tls_error_alert(
120 &self,
121 target: &str,
122 host: &str,
123 error: &str,
124 ) {
125 let alert_key = format!("tls:{target}");
126 if self.is_within_cooldown(&alert_key).await {
127 info!("alert cooldown active for {alert_key}, skipping");
128 return;
129 }
130
131 let subject = format!("[PoM] {target}: TLS check failed");
132 let body = format!(
133 "Target: {target}\n\
134 Host: {host}\n\
135 Error: {error}\n\
136 Instance: {}\n\
137 Time: {}\n\n\
138 - PoM",
139 self.instance_name,
140 chrono::Utc::now().to_rfc3339(),
141 );
142
143 self.send_email(&subject, &body).await;
144 self.record_alert(&alert_key, AlertCategory::TlsError, None, None, Some(error)).await;
145 }
146
147 #[instrument(skip_all)]
148 pub async fn send_tls_recovery(
149 &self,
150 target: &str,
151 label: &str,
152 days_remaining: i64,
153 ) {
154 let alert_key = format!("tls:{target}");
155 // No cooldown on recovery — always send
156 let subject = format!("[PoM] {target}: TLS cert renewed");
157 let body = format!(
158 "Target: {label} ({target})\n\
159 Days remaining: {days_remaining}\n\
160 Instance: {}\n\
161 Time: {}\n\n\
162 - PoM",
163 self.instance_name,
164 chrono::Utc::now().to_rfc3339(),
165 );
166
167 self.send_email(&subject, &body).await;
168 self.record_alert(&alert_key, AlertCategory::TlsRecovery, None, None, None).await;
169 }
170
171 #[instrument(skip_all)]
172 pub async fn send_peer_missing(
173 &self,
174 peer_name: &str,
175 address: &str,
176 consecutive_failures: u32,
177 ) {
178 let alert_key = format!("peer:{peer_name}");
179 if self.is_within_cooldown(&alert_key).await {
180 info!("alert cooldown active for {alert_key}, skipping");
181 return;
182 }
183
184 let subject = format!("[PoM] peer {peer_name}: missing");
185 let body = format!(
186 "Peer: {peer_name}\n\
187 Address: {address}\n\
188 Consecutive failures: {consecutive_failures}\n\
189 Instance: {}\n\
190 Time: {}\n\n\
191 - PoM",
192 self.instance_name,
193 chrono::Utc::now().to_rfc3339(),
194 );
195
196 self.send_email(&subject, &body).await;
197 self.record_alert(&alert_key, AlertCategory::PeerMissing, None, None, None).await;
198 }
199
200 #[instrument(skip_all)]
201 pub async fn send_peer_recovery(
202 &self,
203 peer_name: &str,
204 address: &str,
205 ) {
206 let subject = format!("[PoM] peer {peer_name}: recovered");
207 let body = format!(
208 "Peer: {peer_name}\n\
209 Address: {address}\n\
210 Instance: {}\n\
211 Time: {}\n\n\
212 - PoM",
213 self.instance_name,
214 chrono::Utc::now().to_rfc3339(),
215 );
216
217 let alert_key = format!("peer:{peer_name}");
218 self.send_email(&subject, &body).await;
219 self.record_alert(&alert_key, AlertCategory::PeerRecovery, None, None, None).await;
220 }
221
222 #[instrument(skip_all)]
223 pub async fn send_route_failure_alert(
224 &self,
225 target: &str,
226 label: &str,
227 failed_paths: &[String],
228 ) {
229 let alert_key = format!("route:{target}");
230 if self.is_within_cooldown(&alert_key).await {
231 info!("alert cooldown active for {alert_key}, skipping");
232 return;
233 }
234
235 let n = failed_paths.len();
236 let subject = format!("[PoM] {label}: {n} route(s) failing");
237 let body = format!(
238 "Target: {label} ({target})\n\
239 Failed routes:\n{}\n\
240 Instance: {}\n\
241 Time: {}\n\n\
242 - PoM",
243 failed_paths.iter().map(|p| format!(" - {p}")).collect::<Vec<_>>().join("\n"),
244 self.instance_name,
245 chrono::Utc::now().to_rfc3339(),
246 );
247
248 self.send_email(&subject, &body).await;
249 self.record_alert(&alert_key, AlertCategory::RouteFailure, None, None, None).await;
250 }
251
252 #[instrument(skip_all)]
253 pub async fn send_route_recovery_alert(
254 &self,
255 target: &str,
256 label: &str,
257 recovered_paths: &[String],
258 ) {
259 // No cooldown on recovery — always send
260 let alert_key = format!("route:{target}");
261 let subject = format!("[PoM] {label}: routes recovered");
262 let body = format!(
263 "Target: {label} ({target})\n\
264 Recovered routes:\n{}\n\
265 Instance: {}\n\
266 Time: {}\n\n\
267 - PoM",
268 recovered_paths.iter().map(|p| format!(" - {p}")).collect::<Vec<_>>().join("\n"),
269 self.instance_name,
270 chrono::Utc::now().to_rfc3339(),
271 );
272
273 self.send_email(&subject, &body).await;
274 self.record_alert(&alert_key, AlertCategory::RouteRecovery, None, None, None).await;
275 }
276
277 #[instrument(skip_all)]
278 pub async fn send_dns_mismatch_alert(
279 &self,
280 target: &str,
281 label: &str,
282 mismatches: &[crate::types::DnsCheckResult],
283 ) {
284 let alert_key = format!("dns:{target}");
285 if self.is_within_cooldown(&alert_key).await {
286 info!("alert cooldown active for {alert_key}, skipping");
287 return;
288 }
289
290 let n = mismatches.len();
291 let subject = format!("[PoM] {label}: {n} DNS record(s) mismatched");
292 let details: Vec<String> = mismatches
293 .iter()
294 .map(|m| {
295 if let Some(ref err) = m.error {
296 format!(" - {} {}: {err}", m.name, m.record_type)
297 } else {
298 format!(
299 " - {} {}: expected {:?}, got {:?}",
300 m.name, m.record_type, m.expected, m.actual
301 )
302 }
303 })
304 .collect();
305 let body = format!(
306 "Target: {label} ({target})\n\
307 DNS mismatches:\n{}\n\
308 Instance: {}\n\
309 Time: {}\n\n\
310 - PoM",
311 details.join("\n"),
312 self.instance_name,
313 chrono::Utc::now().to_rfc3339(),
314 );
315
316 self.send_email(&subject, &body).await;
317 self.record_alert(&alert_key, AlertCategory::DnsMismatch, None, None, None).await;
318 }
319
320 #[instrument(skip_all)]
321 pub async fn send_dns_recovery_alert(
322 &self,
323 target: &str,
324 label: &str,
325 ) {
326 // No cooldown on recovery — always send
327 let alert_key = format!("dns:{target}");
328 let subject = format!("[PoM] {label}: DNS records recovered");
329 let body = format!(
330 "Target: {label} ({target})\n\
331 All DNS records now match expected values.\n\
332 Instance: {}\n\
333 Time: {}\n\n\
334 - PoM",
335 self.instance_name,
336 chrono::Utc::now().to_rfc3339(),
337 );
338
339 self.send_email(&subject, &body).await;
340 self.record_alert(&alert_key, AlertCategory::DnsRecovery, None, None, None).await;
341 }
342
343 #[instrument(skip_all)]
344 pub async fn send_whois_expiry_alert(
345 &self,
346 target: &str,
347 label: &str,
348 domain: &str,
349 days_remaining: i64,
350 ) {
351 let alert_key = format!("whois:{target}");
352 if self.is_within_cooldown(&alert_key).await {
353 info!("alert cooldown active for {alert_key}, skipping");
354 return;
355 }
356
357 let subject = format!("[PoM] {label}: domain {domain} expires in {days_remaining} days");
358 let body = format!(
359 "Target: {label} ({target})\n\
360 Domain: {domain}\n\
361 Days remaining: {days_remaining}\n\
362 Instance: {}\n\
363 Time: {}\n\n\
364 - PoM",
365 self.instance_name,
366 chrono::Utc::now().to_rfc3339(),
367 );
368
369 self.send_email(&subject, &body).await;
370 self.record_alert(&alert_key, AlertCategory::WhoisExpiry, None, None, None).await;
371 }
372
373 #[instrument(skip_all)]
374 pub async fn send_whois_error_alert(
375 &self,
376 target: &str,
377 label: &str,
378 domain: &str,
379 error: &str,
380 ) {
381 let alert_key = format!("whois:{target}");
382 if self.is_within_cooldown(&alert_key).await {
383 info!("alert cooldown active for {alert_key}, skipping");
384 return;
385 }
386
387 let subject = format!("[PoM] {label}: WHOIS check failed for {domain}");
388 let body = format!(
389 "Target: {label} ({target})\n\
390 Domain: {domain}\n\
391 Error: {error}\n\
392 Instance: {}\n\
393 Time: {}\n\n\
394 - PoM",
395 self.instance_name,
396 chrono::Utc::now().to_rfc3339(),
397 );
398
399 self.send_email(&subject, &body).await;
400 self.record_alert(&alert_key, AlertCategory::WhoisError, None, None, Some(error)).await;
401 }
402
403 #[instrument(skip_all)]
404 pub async fn send_cors_failure_alert(
405 &self,
406 target: &str,
407 label: &str,
408 failures: &[crate::types::CorsCheckResult],
409 ) {
410 let alert_key = format!("cors:{target}");
411 if self.is_within_cooldown(&alert_key).await {
412 info!("alert cooldown active for {alert_key}, skipping");
413 return;
414 }
415
416 let n = failures.len();
417 let subject = format!("[PoM] {label}: {n} CORS preflight(s) failing");
418 let details: Vec<String> = failures
419 .iter()
420 .map(|f| {
421 if let Some(ref err) = f.error {
422 format!(" - {} {} from {}: {err}", f.method, f.url, f.origin)
423 } else {
424 format!(" - {} {} from {}: no CORS headers", f.method, f.url, f.origin)
425 }
426 })
427 .collect();
428 let body = format!(
429 "Target: {label} ({target})\n\
430 CORS preflight failures:\n{}\n\
431 Instance: {}\n\
432 Time: {}\n\n\
433 Browser-side uploads will silently fail without CORS.\n\n\
434 - PoM",
435 details.join("\n"),
436 self.instance_name,
437 chrono::Utc::now().to_rfc3339(),
438 );
439
440 self.send_email(&subject, &body).await;
441 self.record_alert(&alert_key, AlertCategory::CorsFailure, None, None, None).await;
442 }
443
444 #[instrument(skip_all)]
445 pub async fn send_cors_recovery_alert(
446 &self,
447 target: &str,
448 label: &str,
449 ) {
450 // No cooldown on recovery — always send
451 let alert_key = format!("cors:{target}");
452 let subject = format!("[PoM] {label}: CORS preflights recovered");
453 let body = format!(
454 "Target: {label} ({target})\n\
455 All CORS preflight checks passing.\n\
456 Instance: {}\n\
457 Time: {}\n\n\
458 - PoM",
459 self.instance_name,
460 chrono::Utc::now().to_rfc3339(),
461 );
462
463 self.send_email(&subject, &body).await;
464 self.record_alert(&alert_key, AlertCategory::CorsRecovery, None, None, None).await;
465 }
466
467 #[instrument(skip_all)]
468 pub async fn send_latency_drift_alert(
469 &self,
470 target: &str,
471 label: &str,
472 drift_message: &str,
473 ) {
474 let alert_key = format!("latency:{target}");
475 if self.is_within_cooldown(&alert_key).await {
476 info!("alert cooldown active for {alert_key}, skipping");
477 return;
478 }
479
480 let subject = format!("[PoM] {target}: latency drift detected");
481 let body = format!(
482 "Target: {label} ({target})\n\
483 {drift_message}\n\
484 Instance: {}\n\
485 Time: {}\n\n\
486 - PoM",
487 self.instance_name,
488 chrono::Utc::now().to_rfc3339(),
489 );
490
491 self.send_email(&subject, &body).await;
492 self.record_alert(&alert_key, AlertCategory::LatencyDrift, None, None, Some(drift_message)).await;
493 }
494
495 #[instrument(skip_all)]
496 pub async fn send_latency_recovery(
497 &self,
498 target: &str,
499 label: &str,
500 ) {
501 // No cooldown on recovery — always send
502 let alert_key = format!("latency:{target}");
503 let subject = format!("[PoM] {target}: latency recovered");
504 let body = format!(
505 "Target: {label} ({target})\n\
506 Latency returned to normal.\n\
507 Instance: {}\n\
508 Time: {}\n\n\
509 - PoM",
510 self.instance_name,
511 chrono::Utc::now().to_rfc3339(),
512 );
513
514 self.send_email(&subject, &body).await;
515 self.record_alert(&alert_key, AlertCategory::LatencyRecovery, None, None, None).await;
516 }
517
518 #[instrument(skip_all)]
519 pub async fn send_test_duration_drift_alert(
520 &self,
521 target: &str,
522 label: &str,
523 drift_message: &str,
524 ) {
525 let alert_key = format!("test_duration:{target}");
526 if self.is_within_cooldown(&alert_key).await {
527 info!("alert cooldown active for {alert_key}, skipping");
528 return;
529 }
530
531 let subject = format!("[PoM] {target}: test duration drift detected");
532 let body = format!(
533 "Target: {label} ({target})\n\
534 {drift_message}\n\
535 Instance: {}\n\
536 Time: {}\n\n\
537 - PoM",
538 self.instance_name,
539 chrono::Utc::now().to_rfc3339(),
540 );
541
542 self.send_email(&subject, &body).await;
543 self.record_alert(&alert_key, AlertCategory::TestDurationDrift, None, None, Some(drift_message)).await;
544 }
545
546 /// All monitored targets are unreachable — likely a network issue with PoM itself.
547 #[instrument(skip_all)]
548 pub async fn send_monitoring_offline_alert(&self, target_count: usize) {
549 let alert_key = "monitoring:self";
550 if self.is_within_cooldown(alert_key).await {
551 info!("alert cooldown active for {alert_key}, skipping");
552 return;
553 }
554
555 let subject = format!("[PoM] all {target_count} targets unreachable");
556 let body = format!(
557 "All {target_count} monitored targets are non-operational.\n\
558 This likely indicates a network issue with the PoM instance itself,\n\
559 not an actual outage of all targets.\n\n\
560 Instance: {}\n\
561 Time: {}\n\n\
562 - PoM",
563 self.instance_name,
564 chrono::Utc::now().to_rfc3339(),
565 );
566
567 self.send_email(&subject, &body).await;
568 self.record_alert(alert_key, AlertCategory::MonitoringOffline, None, None, None).await;
569 }
570
571 /// At least one target is reachable again after a monitoring-offline event.
572 #[instrument(skip_all)]
573 pub async fn send_monitoring_recovery(&self) {
574 let alert_key = "monitoring:self";
575 let subject = "[PoM] monitoring recovered".to_string();
576 let body = format!(
577 "At least one target is reachable again.\n\
578 Instance: {}\n\
579 Time: {}\n\n\
580 - PoM",
581 self.instance_name,
582 chrono::Utc::now().to_rfc3339(),
583 );
584
585 self.send_email(&subject, &body).await;
586 self.record_alert(alert_key, AlertCategory::MonitoringRecovery, None, None, None).await;
587 }
588
589 async fn is_within_cooldown(&self, target: &str) -> bool {
590 let latest = match db::get_latest_alert_for_target(&self.pool, target).await {
591 Ok(Some(row)) => row,
592 _ => return false,
593 };
594
595 let sent_at = match chrono::DateTime::parse_from_rfc3339(&latest.sent_at) {
596 Ok(dt) => dt,
597 Err(_) => return false,
598 };
599
600 let elapsed = chrono::Utc::now().signed_duration_since(sent_at);
601 elapsed.num_seconds() < self.config.cooldown_secs as i64
602 }
603
604 async fn send_email(&self, subject: &str, body: &str) {
605 let Some(ref token) = self.config.postmark_token else {
606 info!("[dev] alert: {subject}");
607 info!("[dev] {body}");
608 return;
609 };
610
611 let payload = serde_json::json!({
612 "From": self.config.from,
613 "To": self.config.to,
614 "Subject": subject,
615 "TextBody": body,
616 });
617
618 let send_fut = self.client
619 .post("https://api.postmarkapp.com/email")
620 .header("X-Postmark-Server-Token", token)
621 .header("Content-Type", "application/json")
622 .header("Accept", "application/json")
623 .json(&payload)
624 .send();
625
626 // Wrap in a 30-second timeout to prevent Postmark latency from blocking
627 // the alert task. The reqwest client has its own 10s timeout, but this
628 // guards against DNS resolution stalls and connection pool exhaustion.
629 match tokio::time::timeout(std::time::Duration::from_secs(30), send_fut).await {
630 Ok(Ok(resp)) if resp.status().is_success() => {
631 info!("alert sent: {subject}");
632 }
633 Ok(Ok(resp)) => {
634 let status = resp.status();
635 let text = resp.text().await.unwrap_or_default();
636 warn!("postmark error ({status}): {text}");
637 }
638 Ok(Err(e)) => {
639 warn!("failed to send alert: {e}");
640 }
641 Err(_) => {
642 warn!("alert send timed out after 30s: {subject}");
643 }
644 }
645 }
646
647 async fn record_alert(
648 &self,
649 target: &str,
650 alert_type: AlertCategory,
651 from_status: Option<&str>,
652 to_status: Option<&str>,
653 error: Option<&str>,
654 ) {
655 let alert_type_str = alert_type.to_string();
656 if let Err(e) = db::insert_alert(&self.pool, target, &alert_type_str, from_status, to_status, error).await {
657 warn!("failed to record alert: {e}");
658 }
659 }
660 }
661
662 #[cfg(test)]
663 mod tests {
664 use super::*;
665
666 fn test_alerter(pool: SqlitePool) -> Alerter {
667 let config = AlertConfig {
668 postmark_token: None, // dev mode
669 to: "test@example.com".to_string(),
670 from: "PoM Alerts <pom-alerts@makenot.work>".to_string(),
671 cooldown_secs: 300,
672 };
673 Alerter::new(config, pool, "test-instance".to_string())
674 }
675
676 #[tokio::test]
677 async fn cooldown_prevents_duplicate_alerts() {
678 let pool = db::connect_in_memory().await.unwrap();
679 let alerter = test_alerter(pool.clone());
680
681 // First alert — not in cooldown
682 assert!(!alerter.is_within_cooldown("health:mnw").await);
683
684 // Record an alert
685 db::insert_alert(&pool, "health:mnw", "health", Some("operational"), Some("error"), None)
686 .await
687 .unwrap();
688
689 // Now should be in cooldown
690 assert!(alerter.is_within_cooldown("health:mnw").await);
691 }
692
693 #[tokio::test]
694 async fn cooldown_does_not_affect_other_targets() {
695 let pool = db::connect_in_memory().await.unwrap();
696 let alerter = test_alerter(pool.clone());
697
698 db::insert_alert(&pool, "health:mnw", "health", None, None, None)
699 .await
700 .unwrap();
701
702 // Different target should not be in cooldown
703 assert!(!alerter.is_within_cooldown("health:other").await);
704 }
705
706 #[tokio::test]
707 async fn dev_mode_does_not_send_http() {
708 let pool = db::connect_in_memory().await.unwrap();
709 let alerter = test_alerter(pool.clone());
710
711 // This should log instead of making HTTP calls (no panic, no error)
712 alerter.send_health_alert("mnw", "MakeNotWork", "operational", "error", None).await;
713
714 // Verify alert was recorded in DB with the prefixed key (health:mnw),
715 // matching the cooldown lookup key format.
716 let latest = db::get_latest_alert_for_target(&pool, "health:mnw").await.unwrap();
717 assert!(latest.is_some());
718 let row = latest.unwrap();
719 assert_eq!(row.alert_type, "health");
720 assert_eq!(row.from_status.as_deref(), Some("operational"));
721 assert_eq!(row.to_status.as_deref(), Some("error"));
722 }
723
724 #[tokio::test]
725 async fn route_alert_cooldown_key() {
726 let pool = db::connect_in_memory().await.unwrap();
727 let alerter = test_alerter(pool.clone());
728
729 assert!(!alerter.is_within_cooldown("route:mnw").await);
730
731 alerter.send_route_failure_alert("mnw", "MakeNotWork", &["/docs/faq".to_string()]).await;
732
733 assert!(alerter.is_within_cooldown("route:mnw").await);
734 assert!(!alerter.is_within_cooldown("route:mt").await);
735 }
736
737 #[tokio::test]
738 async fn recovery_does_not_start_cooldown_for_next_failure() {
739 let pool = db::connect_in_memory().await.unwrap();
740 let alerter = test_alerter(pool.clone());
741
742 // Send a failure alert — starts cooldown
743 alerter.send_health_alert("mnw", "MakeNotWork", "operational", "error", None).await;
744 assert!(alerter.is_within_cooldown("health:mnw").await);
745
746 // Send a recovery alert (always sends, no cooldown check)
747 alerter.send_health_recovery("mnw", "MakeNotWork", "error").await;
748
749 // The recovery alert should NOT reset cooldown for failures.
750 // is_within_cooldown now excludes recovery-type alerts, so it checks
751 // the original failure alert's timestamp — which is still within cooldown.
752 assert!(alerter.is_within_cooldown("health:mnw").await);
753 }
754
755 #[tokio::test]
756 async fn dns_alert_cooldown_key() {
757 let pool = db::connect_in_memory().await.unwrap();
758 let alerter = test_alerter(pool.clone());
759
760 assert!(!alerter.is_within_cooldown("dns:mnw").await);
761
762 let mismatches = vec![crate::types::DnsCheckResult {
763 target: "mnw".to_string(),
764 name: "makenot.work".to_string(),
765 record_type: crate::types::DnsRecordType::A,
766 expected: vec!["1.2.3.4".to_string()],
767 actual: vec!["5.6.7.8".to_string()],
768 matches: false,
769 checked_at: chrono::Utc::now().to_rfc3339(),
770 error: None,
771 }];
772 alerter.send_dns_mismatch_alert("mnw", "MakeNotWork", &mismatches).await;
773
774 assert!(alerter.is_within_cooldown("dns:mnw").await);
775 assert!(!alerter.is_within_cooldown("dns:other").await);
776 }
777
778 #[tokio::test]
779 async fn whois_alert_cooldown_key() {
780 let pool = db::connect_in_memory().await.unwrap();
781 let alerter = test_alerter(pool.clone());
782
783 assert!(!alerter.is_within_cooldown("whois:mnw").await);
784
785 alerter.send_whois_expiry_alert("mnw", "MakeNotWork", "makenot.work", 15).await;
786
787 assert!(alerter.is_within_cooldown("whois:mnw").await);
788 assert!(!alerter.is_within_cooldown("whois:other").await);
789 }
790
791 #[tokio::test]
792 async fn health_alert_cooldown_key_matches_record_key() {
793 let pool = db::connect_in_memory().await.unwrap();
794 let alerter = test_alerter(pool.clone());
795
796 // Not in cooldown initially
797 assert!(!alerter.is_within_cooldown("health:example.com").await);
798
799 // Send an alert for "example.com"
800 alerter.send_health_alert("example.com", "Example", "operational", "error", None).await;
801
802 // Same target should now be in cooldown
803 assert!(alerter.is_within_cooldown("health:example.com").await);
804
805 // Different target should NOT be in cooldown
806 assert!(!alerter.is_within_cooldown("health:other.com").await);
807 }
808
809 #[tokio::test]
810 async fn tls_expiry_alert_cooldown_key() {
811 let pool = db::connect_in_memory().await.unwrap();
812 let alerter = test_alerter(pool.clone());
813
814 assert!(!alerter.is_within_cooldown("tls:mnw").await);
815 alerter.send_tls_expiry_alert("mnw", "makenot.work", 10, "2026-04-01T00:00:00Z").await;
816 assert!(alerter.is_within_cooldown("tls:mnw").await);
817 }
818
819 #[tokio::test]
820 async fn tls_error_alert_cooldown_key() {
821 let pool = db::connect_in_memory().await.unwrap();
822 let alerter = test_alerter(pool.clone());
823
824 assert!(!alerter.is_within_cooldown("tls:mnw").await);
825 alerter.send_tls_error_alert("mnw", "makenot.work", "certificate expired").await;
826 assert!(alerter.is_within_cooldown("tls:mnw").await);
827 }
828
829 #[tokio::test]
830 async fn latency_drift_alert_cooldown_key() {
831 let pool = db::connect_in_memory().await.unwrap();
832 let alerter = test_alerter(pool.clone());
833
834 assert!(!alerter.is_within_cooldown("latency:mnw").await);
835 alerter.send_latency_drift_alert("mnw", "MakeNotWork", "avg 500ms, baseline 100ms").await;
836 assert!(alerter.is_within_cooldown("latency:mnw").await);
837 }
838
839 #[tokio::test]
840 async fn test_duration_drift_alert_cooldown_key() {
841 let pool = db::connect_in_memory().await.unwrap();
842 let alerter = test_alerter(pool.clone());
843
844 assert!(!alerter.is_within_cooldown("test_duration:mnw").await);
845 alerter.send_test_duration_drift_alert("mnw", "MakeNotWork", "drift: 120s vs 60s baseline").await;
846 assert!(alerter.is_within_cooldown("test_duration:mnw").await);
847 }
848
849 #[tokio::test]
850 async fn monitoring_offline_alert_cooldown_key() {
851 let pool = db::connect_in_memory().await.unwrap();
852 let alerter = test_alerter(pool.clone());
853
854 assert!(!alerter.is_within_cooldown("monitoring:self").await);
855 alerter.send_monitoring_offline_alert(3).await;
856 assert!(alerter.is_within_cooldown("monitoring:self").await);
857 }
858
859 #[tokio::test]
860 async fn route_recovery_does_not_start_cooldown() {
861 let pool = db::connect_in_memory().await.unwrap();
862 let alerter = test_alerter(pool.clone());
863
864 alerter.send_route_recovery_alert("mnw", "MakeNotWork", &["/health".to_string()]).await;
865 // Recovery alerts are excluded from cooldown lookups, so sending a recovery
866 // should NOT put the key into cooldown.
867 assert!(!alerter.is_within_cooldown("route:mnw").await);
868 }
869
870 #[tokio::test]
871 async fn dns_recovery_does_not_start_cooldown() {
872 let pool = db::connect_in_memory().await.unwrap();
873 let alerter = test_alerter(pool.clone());
874
875 alerter.send_dns_recovery_alert("mnw", "MakeNotWork").await;
876 assert!(!alerter.is_within_cooldown("dns:mnw").await);
877 }
878
879 #[tokio::test]
880 async fn tls_recovery_does_not_start_cooldown() {
881 let pool = db::connect_in_memory().await.unwrap();
882 let alerter = test_alerter(pool.clone());
883
884 alerter.send_tls_recovery("mnw", "MakeNotWork", 90).await;
885 assert!(!alerter.is_within_cooldown("tls:mnw").await);
886 }
887 }
888