Skip to main content

max / makenotwork

fix: suppress spurious 'recovered' alert on monitor startup The health monitor initializes previous_status as None, so the first successful check was treated as a None->Operational transition and fired a 'MNW recovered — all services operational' email on every clean restart. Skip the alert (and WAM ticket / subscriber notifications) for the bootstrap case while still recording previous_status so the next real degradation alerts normally.
Author: Max J. <87768334+MaxJMath@users.noreply.github.com> · 2026-05-21 03:19 UTC
Commit: 0467b78bf2b8a7ce50674542ba63564d1669826f
Parent: 39d5998
1 file changed, +7 insertions, -4 deletions
@@ -131,9 +131,12 @@ pub fn spawn_monitor(
131 131 // Update Prometheus DB pool gauges on every tick
132 132 crate::metrics::record_db_pool_stats(&state.db);
133 133
134 - // Log status changes
134 + // Log status changes. Skip the bootstrap None->Operational transition
135 + // so clean restarts don't fire a spurious "recovered" alert.
135 136 let status_changed = previous_status != Some(snap.status);
136 - if status_changed {
137 + let is_bootstrap_ok =
138 + previous_status.is_none() && snap.status == MonitorStatus::Operational;
139 + if status_changed && !is_bootstrap_ok {
137 140 match snap.status {
138 141 MonitorStatus::Operational => {
139 142 if previous_status.is_some() {
@@ -222,10 +225,10 @@ pub fn spawn_monitor(
222 225 );
223 226 wam.create_ticket(&title, Some(&body), priority, "health-status-change", None).await;
224 227 }
225 -
226 - previous_status = Some(snap.status);
227 228 }
228 229
230 + previous_status = Some(snap.status);
231 +
229 232 // DB pool pressure check (>80% active connections), with cooldown
230 233 {
231 234 let pool_size = state.db.size();