Skip to main content

max / makenotwork

26.8 KB · 747 lines History Blame Raw
1 //! System health dashboard and JSON monitoring endpoint.
2 //!
3 //! Two layers:
4 //! - `GET /health` (HTML); runs full live checks (DB queries, S3 probe, endpoint self-tests)
5 //! - `GET /api/health` (JSON); reads cached results from the background monitor's database.
6 //! Fast (<10ms), no live probes. This is what PoM and other external services should poll.
7
8 mod pom;
9 use pom::*;
10
11 use std::sync::Arc;
12
13 use axum::extract::State;
14 use axum::http::StatusCode;
15 use axum::response::IntoResponse;
16 use axum::Json;
17 use tower_sessions::Session;
18
19 use crate::{
20 db,
21 error::Result,
22 helpers::get_csrf_token,
23 templates::*,
24 AppState,
25 };
26
27 /// Format a [`std::time::Duration`] as a human-readable uptime string.
28 fn format_uptime(d: std::time::Duration) -> String {
29 let total_secs = d.as_secs();
30 let days = total_secs / 86400;
31 let hours = (total_secs % 86400) / 3600;
32 let minutes = (total_secs % 3600) / 60;
33 if days > 0 {
34 format!("{}d {}h {}m", days, hours, minutes)
35 } else if hours > 0 {
36 format!("{}h {}m", hours, minutes)
37 } else {
38 format!("{}m", minutes)
39 }
40 }
41
42 /// Tri-state overall status derived from service checks.
43 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
44 enum OverallStatus {
45 Operational,
46 Degraded,
47 Error,
48 }
49
50 impl OverallStatus {
51 fn label(self) -> &'static str {
52 match self {
53 Self::Operational => "All systems operational",
54 Self::Degraded => "Degraded performance",
55 Self::Error => "Issues detected",
56 }
57 }
58
59 fn css_class(self) -> &'static str {
60 match self {
61 Self::Operational => "status-ok",
62 Self::Degraded => "status-warn",
63 Self::Error => "status-error",
64 }
65 }
66
67 fn api_label(self) -> &'static str {
68 match self {
69 Self::Operational => "operational",
70 Self::Degraded => "degraded",
71 Self::Error => "error",
72 }
73 }
74 }
75
76 /// All computed health data, used by the HTML dashboard handler.
77 #[allow(dead_code)] // fields read transitively via status strings
78 struct HealthData {
79 // Overall
80 overall: OverallStatus,
81 uptime: String,
82 version: String,
83 check_duration_ms: u64,
84
85 // Database
86 db_ok: bool,
87 db_status: &'static str,
88 db_status_class: &'static str,
89 db_pool_active: u32,
90 db_pool_max: u32,
91 stats: db::health::DbHealthStats,
92
93 // Sessions
94 session_ok: bool,
95 session_status: &'static str,
96 session_status_class: &'static str,
97
98 // Storage
99 storage_configured: bool,
100 s3_reachable: bool,
101 storage_status: &'static str,
102 storage_status_class: &'static str,
103 storage_bucket: String,
104 storage_region: String,
105
106 // Stripe
107 stripe_configured: bool,
108 stripe_status: &'static str,
109 stripe_status_class: &'static str,
110 stripe_mode: &'static str,
111
112 // Email
113 #[allow(dead_code)] // used only by the HTML template via other fields
114 email_configured: bool,
115 email_status: &'static str,
116 email_status_class: &'static str,
117 email_provider: &'static str,
118
119 // SyncKit
120 synckit_configured: bool,
121 synckit_status: &'static str,
122 synckit_status_class: &'static str,
123
124 // Security & Monitoring
125 admin_configured: bool,
126
127 // Background monitor
128 monitor_enabled: bool,
129 monitor_interval_secs: u64,
130 alerts_configured: bool,
131 uptime_24h: Option<f64>,
132 uptime_7d: Option<f64>,
133 last_incident: Option<String>,
134 recent_snapshots: Vec<db::monitor::DbHealthSnapshot>,
135
136 // Server
137 environment: &'static str,
138 host: Arc<str>,
139 started_at: String,
140
141 // Tests
142 public_tests: Vec<HealthTest>,
143 db_tests: Vec<HealthTest>,
144
145 // External monitoring (PoM)
146 pom_available: bool,
147 pom_status: Option<String>,
148 pom_status_class: Option<String>,
149 pom_response_time_ms: Option<i64>,
150 pom_checked_at: Option<String>,
151 pom_uptime_24h: Option<f64>,
152 pom_uptime_7d: Option<f64>,
153 pom_recent: Vec<PomSnapshotJson>,
154 pom_incident: Option<PomIncidentJson>,
155 pom_recent_incidents: Vec<PomIncidentJson>,
156 pom_avg_latency: Option<String>,
157 pom_p95_latency: Option<String>,
158 pom_routes_total: usize,
159 pom_routes_ok: usize,
160 pom_routes_failed: Vec<String>,
161
162 // Privacy & Compliance
163 privacy_jobs: Vec<db::scheduler_jobs::SchedulerJobRun>,
164 }
165
166 /// Format scheduler job runs into display structs for the health template.
167 fn format_privacy_jobs(
168 jobs: &[db::scheduler_jobs::SchedulerJobRun],
169 now: chrono::DateTime<chrono::Utc>,
170 ) -> Vec<PrivacyJobDisplay> {
171 // Jobs we want to display, with human names and expected frequency in hours
172 let job_meta: &[(&str, &str, i64)] = &[
173 ("ip_scrub", "Session IP scrub (30-day)", 26),
174 ("session_prune", "Session prune (90-day)", 26),
175 ("terminated_account_cleanup", "Terminated account cleanup (30-day)", 26),
176 ("content_removal_cleanup", "Content removal cleanup (90-day)", 26),
177 ];
178
179 job_meta
180 .iter()
181 .map(|(key, description, max_hours)| {
182 let run = jobs.iter().find(|j| j.job_name == *key);
183 match run {
184 Some(r) => {
185 let age = now.signed_duration_since(r.last_ran_at);
186 let last_ran = if age.num_hours() < 1 {
187 format!("{}m ago", age.num_minutes().max(0))
188 } else if age.num_hours() < 48 {
189 format!("{}h ago", age.num_hours())
190 } else {
191 format!("{}d ago", age.num_days())
192 };
193 let status_class = if age.num_hours() <= *max_hours {
194 "status-ok"
195 } else {
196 "status-warn"
197 };
198 PrivacyJobDisplay {
199 name: key.to_string(),
200 description: description.to_string(),
201 last_ran,
202 rows_affected: r.rows_affected.to_string(),
203 status_class: status_class.to_string(),
204 }
205 }
206 None => PrivacyJobDisplay {
207 name: key.to_string(),
208 description: description.to_string(),
209 last_ran: "never".to_string(),
210 rows_affected: "-".to_string(),
211 status_class: "status-unknown".to_string(),
212 },
213 }
214 })
215 .collect()
216 }
217
218 /// Run all health checks and return computed data.
219 /// Used by the HTML dashboard; runs live probes (DB, S3, HTTP self-tests).
220 async fn collect_health(state: &AppState) -> HealthData {
221 use std::time::Instant;
222
223 let check_start = Instant::now();
224
225 // Helper to run a timed test
226 async fn run_test<F, Fut>(name: &str, f: F) -> HealthTest
227 where
228 F: FnOnce() -> Fut,
229 Fut: std::future::Future<Output = bool>,
230 {
231 let start = Instant::now();
232 let passed = f().await;
233 HealthTest {
234 name: name.to_string(),
235 passed,
236 latency_ms: start.elapsed().as_millis() as u64,
237 }
238 }
239
240 // Database health checks
241 let db_test_users = run_test("Count users", || async {
242 sqlx::query_scalar::<_, i64>("SELECT COUNT(*) FROM users")
243 .fetch_one(&state.db)
244 .await
245 .is_ok()
246 }).await;
247
248 let db_test_projects = run_test("Count projects", || async {
249 sqlx::query_scalar::<_, i64>("SELECT COUNT(*) FROM projects")
250 .fetch_one(&state.db)
251 .await
252 .is_ok()
253 }).await;
254
255 let db_test_items = run_test("Count items", || async {
256 sqlx::query_scalar::<_, i64>("SELECT COUNT(*) FROM items")
257 .fetch_one(&state.db)
258 .await
259 .is_ok()
260 }).await;
261
262 let db_test_transactions = run_test("Count transactions", || async {
263 sqlx::query_scalar::<_, i64>("SELECT COUNT(*) FROM transactions")
264 .fetch_one(&state.db)
265 .await
266 .is_ok()
267 }).await;
268
269 // Get actual counts
270 let stats = db::health::get_health_stats(&state.db).await.unwrap_or(db::health::DbHealthStats {
271 user_count: 0,
272 project_count: 0,
273 item_count: 0,
274 active_session_count: 0,
275 active_creator_count: 0,
276 transaction_count: 0,
277 blog_post_count: 0,
278 sync_app_count: 0,
279 sync_device_count: 0,
280 sync_log_entries: 0,
281 });
282
283 // Database status
284 let db_ok = db_test_users.passed && db_test_projects.passed;
285 let db_status = if db_ok { "Connected" } else { "Error" };
286 let db_status_class = if db_ok { "status-ok" } else { "status-error" };
287
288 // Pool info
289 let pool_max = state.db.size();
290 let pool_idle = state.db.num_idle();
291 let pool_active = pool_max.saturating_sub(pool_idle as u32);
292
293 // S3 storage status with connectivity check
294 let storage_configured = state.s3.is_some();
295 let s3_reachable = if let Some(ref s3) = state.s3 {
296 s3.check_connectivity().await.is_ok()
297 } else {
298 false
299 };
300 let (storage_status, storage_status_class) = if storage_configured && s3_reachable {
301 ("Connected", "status-ok")
302 } else if storage_configured {
303 ("Configured (unreachable)", "status-warn")
304 } else {
305 ("Not configured", "status-warn")
306 };
307 let (storage_bucket, storage_region) = if let Some(ref storage) = state.config.storage {
308 (storage.bucket.clone(), storage.region.clone())
309 } else {
310 (String::new(), String::new())
311 };
312
313 // Stripe status
314 let stripe_configured = state.stripe.is_some();
315 let stripe_status = if stripe_configured { "Configured" } else { "Not configured" };
316 let stripe_status_class = if stripe_configured { "status-ok" } else { "status-warn" };
317 let stripe_mode = if stripe_configured {
318 if state.config.stripe.as_ref().map(|s| s.secret_key.starts_with("sk_live")).unwrap_or(false) {
319 "Live"
320 } else {
321 "Test"
322 }
323 } else {
324 "-"
325 };
326
327 // Email status
328 let email_configured = std::env::var("POSTMARK_TOKEN").is_ok();
329 let email_status = if email_configured { "Configured" } else { "Dev mode (logging)" };
330 let email_status_class = if email_configured { "status-ok" } else { "status-warn" };
331 let email_provider = if email_configured { "Postmark" } else { "Console" };
332
333 // Session status
334 let session_ok = stats.active_session_count > 0;
335 let session_status = if session_ok { "Active" } else { "Error" };
336 let session_status_class = if session_ok { "status-ok" } else { "status-error" };
337
338 // SyncKit status
339 let synckit_configured = state.config.synckit_jwt_secret.is_some();
340 let synckit_status = if synckit_configured { "Configured" } else { "Not configured" };
341 let synckit_status_class = if synckit_configured { "status-ok" } else { "status-warn" };
342
343 // Security & Monitoring
344 let admin_configured = state.config.admin_user_id.is_some();
345
346 // Overall tri-state status
347 let overall = if !db_ok || !session_ok {
348 OverallStatus::Error
349 } else if storage_configured && !s3_reachable {
350 OverallStatus::Degraded
351 } else {
352 OverallStatus::Operational
353 };
354
355 // Environment
356 let environment = if cfg!(debug_assertions) { "Development" } else { "Production" };
357 let host = state.config.host_url.clone();
358
359 // Real uptime from AppState
360 let uptime = format_uptime(state.start_instant.elapsed());
361 let started_at = state.started_at.format("%Y-%m-%d %H:%M:%S UTC").to_string();
362
363 // Version with git hash
364 let version = match option_env!("GIT_HASH") {
365 Some(hash) if !hash.is_empty() => format!("{} ({})", env!("CARGO_PKG_VERSION"), hash),
366 _ => env!("CARGO_PKG_VERSION").to_string(),
367 };
368
369 // Internal route-existence checks (no HTTP self-call — avoids pool contention)
370 let public_tests: Vec<HealthTest> = vec![];
371
372 let db_tests = vec![
373 db_test_users,
374 db_test_projects,
375 db_test_items,
376 db_test_transactions,
377 ];
378
379 let check_duration_ms = check_start.elapsed().as_millis() as u64;
380
381 // External monitoring (PoM) — best-effort, don't delay the page
382 let pom = fetch_pom_status().await;
383 let (pom_available, pom_status, pom_status_class, pom_response_time_ms, pom_checked_at, pom_uptime_24h, pom_uptime_7d, pom_recent, pom_incident, pom_recent_incidents, pom_avg_latency, pom_p95_latency, pom_routes_total, pom_routes_ok, pom_routes_failed) =
384 if let Some(ref pom) = pom {
385 let latest = pom.latest.as_ref();
386 let status = latest.map(|s| s.status.clone());
387 let status_class = status.as_deref().map(|s| match s {
388 "operational" => "status-ok".to_string(),
389 "degraded" => "status-warn".to_string(),
390 _ => "status-error".to_string(),
391 });
392 let avg_latency = pom.latency_24h.as_ref().map(|l| format!("{:.0}ms", l.avg_ms));
393 let p95_latency = pom.latency_24h.as_ref().map(|l| format!("{}ms", l.p95_ms));
394 let routes_total = pom.route_status.len();
395 let routes_ok = pom.route_status.iter().filter(|r| r.ok).count();
396 let routes_failed: Vec<String> = pom.route_status.iter().filter(|r| !r.ok).map(|r| r.path.clone()).collect();
397 (
398 true,
399 status,
400 status_class,
401 latest.map(|s| s.response_time_ms),
402 latest.map(|s| format_pom_timestamp(&s.checked_at)),
403 pom.uptime_24h,
404 pom.uptime_7d,
405 pom.recent.clone(),
406 pom.current_incident.clone(),
407 pom.incidents.iter().filter(|i| i.ended_at.is_some()).cloned().collect(),
408 avg_latency,
409 p95_latency,
410 routes_total,
411 routes_ok,
412 routes_failed,
413 )
414 } else {
415 (false, None, None, None, None, None, None, Vec::new(), None, Vec::new(), None, None, 0, 0, Vec::new())
416 };
417
418 // Background monitor data (best-effort — don't fail the page if queries fail)
419 let monitor_interval_secs = std::env::var("HEALTH_CHECK_INTERVAL_SECS")
420 .ok()
421 .and_then(|v| v.parse::<u64>().ok())
422 .unwrap_or(crate::constants::HEALTH_CHECK_INTERVAL_SECS);
423
424 let alerts_configured = std::env::var("ALERT_EMAIL").is_ok();
425
426 let uptime_24h = db::monitor::get_health_uptime_percent(&state.db, 24)
427 .await
428 .unwrap_or(None);
429 let uptime_7d = db::monitor::get_health_uptime_percent(&state.db, 168)
430 .await
431 .unwrap_or(None);
432 let last_incident = db::monitor::get_last_incident(&state.db)
433 .await
434 .unwrap_or(None)
435 .map(|dt| dt.format("%Y-%m-%d %H:%M UTC").to_string());
436 let recent_snapshots = db::monitor::get_recent_health_history(&state.db, 10)
437 .await
438 .unwrap_or_default();
439
440 HealthData {
441 overall,
442 uptime,
443 version,
444 check_duration_ms,
445 db_ok,
446 db_status,
447 db_status_class,
448 db_pool_active: pool_active,
449 db_pool_max: pool_max,
450 stats,
451 session_ok,
452 session_status,
453 session_status_class,
454 storage_configured,
455 s3_reachable,
456 storage_status,
457 storage_status_class,
458 storage_bucket,
459 storage_region,
460 stripe_configured,
461 stripe_status,
462 stripe_status_class,
463 stripe_mode,
464 email_configured,
465 email_status,
466 email_status_class,
467 email_provider,
468 synckit_configured,
469 synckit_status,
470 synckit_status_class,
471 admin_configured,
472 monitor_enabled: true,
473 monitor_interval_secs,
474 alerts_configured,
475 uptime_24h,
476 uptime_7d,
477 last_incident,
478 recent_snapshots,
479 environment,
480 host,
481 started_at,
482 public_tests,
483 db_tests,
484 pom_available,
485 pom_status,
486 pom_status_class,
487 pom_response_time_ms,
488 pom_checked_at,
489 pom_uptime_24h,
490 pom_uptime_7d,
491 pom_recent,
492 pom_incident,
493 pom_recent_incidents,
494 pom_avg_latency,
495 pom_p95_latency,
496 pom_routes_total,
497 pom_routes_ok,
498 pom_routes_failed,
499 privacy_jobs: db::scheduler_jobs::get_job_runs(&state.db).await.unwrap_or_default(),
500 }
501 }
502
503 /// Render the system health dashboard with database, storage, and service status.
504 #[tracing::instrument(skip_all, name = "health::health")]
505 pub(super) async fn health(
506 State(state): State<AppState>,
507 session: Session,
508 ) -> Result<impl IntoResponse> {
509 let data = collect_health(&state).await;
510 let now = chrono::Utc::now();
511
512 let pool_utilization = if data.db_pool_max > 0 {
513 format!("{}%", (data.db_pool_active as f64 / data.db_pool_max as f64 * 100.0) as u32)
514 } else {
515 "0%".to_string()
516 };
517
518 Ok(HealthTemplate {
519 csrf_token: get_csrf_token(&session).await,
520 session_user: None,
521 overall_status: data.overall.label().to_string(),
522 overall_status_class: data.overall.css_class().to_string(),
523 uptime: data.uptime,
524 version: data.version,
525 check_duration_ms: data.check_duration_ms,
526 db_status: data.db_status.to_string(),
527 db_status_class: data.db_status_class.to_string(),
528 db_pool_size: data.db_pool_max.to_string(),
529 db_pool_max: data.db_pool_max.to_string(),
530 db_pool_utilization: pool_utilization,
531 db_active_connections: data.db_pool_active.to_string(),
532 user_count: data.stats.user_count.to_string(),
533 project_count: data.stats.project_count.to_string(),
534 item_count: data.stats.item_count.to_string(),
535 transaction_count: data.stats.transaction_count.to_string(),
536 blog_post_count: data.stats.blog_post_count.to_string(),
537 session_status: data.session_status.to_string(),
538 session_status_class: data.session_status_class.to_string(),
539 active_sessions: data.stats.active_session_count.to_string(),
540 storage_status: data.storage_status.to_string(),
541 storage_status_class: data.storage_status_class.to_string(),
542 storage_configured: data.storage_configured,
543 storage_bucket: data.storage_bucket,
544 storage_region: data.storage_region,
545 stripe_status: data.stripe_status.to_string(),
546 stripe_status_class: data.stripe_status_class.to_string(),
547 stripe_configured: data.stripe_configured,
548 stripe_mode: data.stripe_mode.to_string(),
549 connected_creators: data.stats.active_creator_count.to_string(),
550 email_status: data.email_status.to_string(),
551 email_status_class: data.email_status_class.to_string(),
552 email_provider: data.email_provider.to_string(),
553 synckit_status: data.synckit_status.to_string(),
554 synckit_status_class: data.synckit_status_class.to_string(),
555 synckit_configured: data.synckit_configured,
556 synckit_app_count: data.stats.sync_app_count.to_string(),
557 synckit_device_count: data.stats.sync_device_count.to_string(),
558 synckit_log_entries: data.stats.sync_log_entries.to_string(),
559 admin_status: if data.admin_configured { "Configured".to_string() } else { "Not configured".to_string() },
560 monitor_enabled: data.monitor_enabled,
561 monitor_interval_secs: data.monitor_interval_secs,
562 alerts_configured: data.alerts_configured,
563 uptime_24h: data.uptime_24h.map(|v| format!("{:.1}", v)),
564 uptime_7d: data.uptime_7d.map(|v| format!("{:.1}", v)),
565 last_incident: data.last_incident,
566 recent_snapshots: data.recent_snapshots.into_iter().map(|s| {
567 let status_class = match s.status.as_str() {
568 "operational" => "status-ok".to_string(),
569 "degraded" => "status-warn".to_string(),
570 _ => "status-error".to_string(),
571 };
572 HealthSnapshotDisplay {
573 checked_at: s.checked_at.format("%H:%M:%S UTC").to_string(),
574 status: s.status,
575 status_class,
576 duration_ms: s.check_duration_ms,
577 }
578 }).collect(),
579 environment: data.environment.to_string(),
580 host: data.host,
581 started_at: data.started_at,
582 public_tests: data.public_tests,
583 db_tests: data.db_tests,
584 generated_at: now.format("%Y-%m-%d %H:%M:%S UTC").to_string(),
585 pom_available: data.pom_available,
586 pom_status: data.pom_status,
587 pom_status_class: data.pom_status_class,
588 pom_response_time_ms: data.pom_response_time_ms,
589 pom_checked_at: data.pom_checked_at,
590 pom_uptime_24h: data.pom_uptime_24h.map(|v| format!("{:.1}", v)),
591 pom_uptime_7d: data.pom_uptime_7d.map(|v| format!("{:.1}", v)),
592 pom_recent: data.pom_recent.into_iter().map(|s| {
593 let status_class = match s.status.as_str() {
594 "operational" => "status-ok".to_string(),
595 "degraded" => "status-warn".to_string(),
596 _ => "status-error".to_string(),
597 };
598 PomSnapshotDisplay {
599 checked_at: format_pom_timestamp(&s.checked_at),
600 status: s.status,
601 status_class,
602 response_time_ms: s.response_time_ms,
603 }
604 }).collect(),
605 pom_avg_latency: data.pom_avg_latency,
606 pom_p95_latency: data.pom_p95_latency,
607 pom_incident_active: data.pom_incident.is_some(),
608 pom_incident_status: data.pom_incident.as_ref().map(|i| i.to_status.clone()),
609 pom_incident_since: data.pom_incident.as_ref().map(|i| format_pom_timestamp(&i.started_at)),
610 pom_recent_incidents: data.pom_recent_incidents.into_iter().map(|i| PomIncidentDisplay {
611 started_at: format_pom_timestamp(&i.started_at),
612 duration: i.duration_secs.map(format_incident_duration).unwrap_or_else(|| "-".to_string()),
613 to_status: i.to_status,
614 }).collect(),
615 pom_routes_total: data.pom_routes_total,
616 pom_routes_ok: data.pom_routes_ok,
617 pom_routes_failed: data.pom_routes_failed,
618 privacy_jobs: format_privacy_jobs(&data.privacy_jobs, now),
619 })
620 }
621
622 // ============================================================================
623 // JSON Health Endpoint (fast — reads from background monitor cache)
624 // ============================================================================
625
626 // Note: the HTML /health page has full diagnostics. The JSON endpoint is
627 // intentionally minimal (status only) to avoid leaking version, uptime,
628 // git hash, and service configuration to unauthenticated callers.
629
630 /// `GET /api/health`: fast JSON health endpoint.
631 ///
632 /// Reads the latest snapshot from the background monitor's database instead of
633 /// running live probes. Returns 200 if operational or degraded, 503 if error.
634 #[tracing::instrument(skip_all, name = "health::health_json")]
635 pub(super) async fn health_json(
636 State(state): State<AppState>,
637 ) -> impl IntoResponse {
638 // Read the latest snapshot from the background monitor (single DB row)
639 let latest = db::monitor::get_recent_health_history(&state.db, 1)
640 .await
641 .unwrap_or_default();
642
643 // Use cached monitor data only — no live probes (fast <10ms as documented).
644 // Falls back to a single DB probe only when no monitor snapshots exist yet
645 // (fresh startup before the first monitor tick).
646 let (overall, db_ok) = if let Some(snap) = latest.first() {
647 let status = match snap.status.as_str() {
648 "operational" => OverallStatus::Operational,
649 "degraded" => OverallStatus::Degraded,
650 _ => OverallStatus::Error,
651 };
652 let db_healthy = status != OverallStatus::Error;
653 (status, db_healthy)
654 } else {
655 // No monitor data yet — single minimal probe
656 let db_ok = sqlx::query_scalar::<_, i32>("SELECT 1")
657 .fetch_one(&state.db)
658 .await
659 .is_ok();
660 let status = if db_ok { OverallStatus::Operational } else { OverallStatus::Error };
661 (status, db_ok)
662 };
663
664 let http_status = if overall == OverallStatus::Error {
665 StatusCode::SERVICE_UNAVAILABLE
666 } else {
667 StatusCode::OK
668 };
669
670 (http_status, Json(health_json_body(overall, db_ok)))
671 }
672
673 /// Build the JSON body for the `/api/health` response.
674 ///
675 /// Kept as a pure function (no AppState, no DB) so the schema-drift guard
676 /// test in this module can exercise it directly. PoM polls this endpoint
677 /// and runs key-by-key assertions from `pom/deploy/pom-hetzner.toml`; the
678 /// guard test validates that every asserted path still resolves here.
679 fn health_json_body(overall: OverallStatus, db_ok: bool) -> serde_json::Value {
680 serde_json::json!({
681 "status": overall.api_label(),
682 "version": env!("CARGO_PKG_VERSION"),
683 "checks": {
684 "database": db_ok,
685 },
686 })
687 }
688
689 #[cfg(test)]
690 mod tests {
691 use super::*;
692
693 #[test]
694 fn format_uptime_minutes_only() {
695 assert_eq!(format_uptime(std::time::Duration::from_secs(0)), "0m");
696 assert_eq!(format_uptime(std::time::Duration::from_secs(59)), "0m");
697 assert_eq!(format_uptime(std::time::Duration::from_secs(60)), "1m");
698 assert_eq!(format_uptime(std::time::Duration::from_secs(300)), "5m");
699 }
700
701 #[test]
702 fn format_uptime_hours_and_minutes() {
703 assert_eq!(format_uptime(std::time::Duration::from_secs(3600)), "1h 0m");
704 assert_eq!(format_uptime(std::time::Duration::from_secs(3660)), "1h 1m");
705 assert_eq!(format_uptime(std::time::Duration::from_secs(7200)), "2h 0m");
706 }
707
708 #[test]
709 fn format_uptime_days() {
710 assert_eq!(format_uptime(std::time::Duration::from_secs(86400)), "1d 0h 0m");
711 assert_eq!(format_uptime(std::time::Duration::from_secs(90061)), "1d 1h 1m");
712 assert_eq!(format_uptime(std::time::Duration::from_secs(259200)), "3d 0h 0m");
713 }
714
715 #[test]
716 fn overall_status_labels() {
717 assert_eq!(OverallStatus::Operational.label(), "All systems operational");
718 assert_eq!(OverallStatus::Degraded.label(), "Degraded performance");
719 assert_eq!(OverallStatus::Error.label(), "Issues detected");
720 }
721
722 #[test]
723 fn overall_status_css_classes() {
724 assert_eq!(OverallStatus::Operational.css_class(), "status-ok");
725 assert_eq!(OverallStatus::Degraded.css_class(), "status-warn");
726 assert_eq!(OverallStatus::Error.css_class(), "status-error");
727 }
728
729 #[test]
730 fn overall_status_api_labels() {
731 assert_eq!(OverallStatus::Operational.api_label(), "operational");
732 assert_eq!(OverallStatus::Degraded.api_label(), "degraded");
733 assert_eq!(OverallStatus::Error.api_label(), "error");
734 }
735
736 /// Schema-drift guard for the `mnw` target. See `shared/pom-contract/`.
737 #[test]
738 fn pom_hetzner_health_expectations_resolve() {
739 let body = health_json_body(OverallStatus::Operational, true);
740 pom_contract::assert_health_expectations_resolve(
741 "../pom/deploy/pom-hetzner.toml",
742 "mnw",
743 &body,
744 );
745 }
746 }
747