use schemars::JsonSchema; use serde::Deserialize; use tracing::instrument; use crate::checks::http; use crate::db; use crate::types::{LatencyStats, TargetInfo}; use super::PomServer; #[derive(Debug, Deserialize, JsonSchema)] pub struct CheckHealthParams { /// Target name to check (omit to check all targets) pub target: Option, } #[derive(Debug, Deserialize, JsonSchema)] pub struct HealthHistoryParams { /// Filter by target name pub target: Option, /// Number of results to return (default 10) pub limit: Option, } impl PomServer { #[instrument(skip_all)] pub async fn get_status_impl( &self, ) -> crate::error::Result { let mut status_parts = Vec::new(); for name in self.config.target_names() { let target = self.config.get_target(&name).unwrap(); let mut target_status = format!("## {name} ({})\n", target.label); // Latest health if let Ok(Some(health)) = db::get_latest_health(&self.pool, &name).await { target_status.push_str(&format!( "Health: {} ({}ms, {})\n", health.status, health.response_time_ms, health.checked_at )); if let Some(details) = &health.details { if let Some(v) = &details.version { target_status.push_str(&format!("Version: {v}\n")); } if let Some(u) = &details.uptime { target_status.push_str(&format!("Uptime: {u}\n")); } } if let Some(err) = &health.error { target_status.push_str(&format!("Error: {err}\n")); } } else { target_status.push_str("Health: no data\n"); } // 24h latency stats let latency_cutoff = (chrono::Utc::now() - chrono::Duration::hours(24)).to_rfc3339(); if let Ok(times) = db::get_response_times(&self.pool, &name, &latency_cutoff).await { let operational_times: Vec = times.iter() .filter(|(_, ms)| *ms > 0) .map(|(_, ms)| *ms) .collect(); if let Some(l) = LatencyStats::from_times(&operational_times) { target_status.push_str(&format!( "Latency (24h): avg {:.0}ms, p95 {}ms, range {}-{}ms ({} samples)\n", l.avg_ms, l.p95_ms, l.min_ms, l.max_ms, l.sample_count )); } } // Active incident if let Ok(Some(incident)) = db::get_open_incident(&self.pool, &name).await { target_status.push_str(&format!( "Incident: [ACTIVE] {} since {}\n", incident.to_status, incident.started_at )); } // Recent incidents if let Ok(incidents) = db::get_recent_incidents(&self.pool, &name, 5).await { let closed: Vec<_> = incidents.iter().filter(|i| i.ended_at.is_some()).collect(); if !closed.is_empty() { target_status.push_str("Recent incidents:\n"); for inc in closed { let duration = inc.duration_secs.map(|d| format!(" ({d}s)")).unwrap_or_default(); target_status.push_str(&format!( " {} -> {} at {}{}\n", inc.from_status, inc.to_status, inc.started_at, duration )); } } } // Latest test run let latest_test = db::get_latest_test_run(&self.pool, &name).await.ok().flatten(); if let Some(ref test) = latest_test { let result = if test.passed { "PASSED" } else { "FAILED" }; target_status.push_str(&format!("Tests: {result}")); if let Some(d) = test.duration_secs { target_status.push_str(&format!(" ({d}s)")); } target_status.push_str(&format!(" ({})\n", test.started_at)); if let (Some(p), Some(f)) = (test.summary.total_passed, test.summary.total_failed) { target_status.push_str(&format!(" {p} passed, {f} failed\n")); } for step in &test.summary.steps { let mark = if step.passed { "PASS" } else { "FAIL" }; target_status.push_str(&format!(" {mark} {}\n", step.name)); } } else { target_status.push_str("Tests: no data\n"); } // Test staleness if let Some(tests_config) = &target.tests { let current_version = db::get_latest_health(&self.pool, &name) .await .ok() .flatten() .and_then(|h| h.details) .and_then(|d| d.version); let tested_version = if let Some(ref test) = latest_test { db::get_version_at_time(&self.pool, &name, &test.started_at) .await .unwrap_or(None) } else { None }; let staleness = http::compute_test_staleness( current_version.as_deref(), tested_version.as_deref(), latest_test.as_ref().map(|t| t.started_at.as_str()), tests_config.staleness_days, ); if staleness.stale && let Some(reason) = &staleness.reason { target_status.push_str(&format!("Tests: STALE \u{2014} {reason}\n")); } } status_parts.push(target_status); } if status_parts.is_empty() { return Ok("No targets configured.".to_string()); } Ok(status_parts.join("\n")) } #[instrument(skip_all)] pub async fn check_health_impl( &self, params: CheckHealthParams, ) -> crate::error::Result { let targets: Vec = match ¶ms.target { Some(t) => { if self.config.get_target(t).is_none() { return Ok(format!("Unknown target: {t}")); } vec![t.clone()] } None => self.config.target_names(), }; let mut results = Vec::new(); for name in &targets { let target = self.config.get_target(name).unwrap(); if let Some(health_config) = &target.health { let snapshot = http::check_health(name, health_config, health_config.expect.as_ref()).await; db::insert_health_check(&self.pool, &snapshot).await?; results.push(serde_json::to_string_pretty(&snapshot)?); } else { results.push(format!("{name}: no health endpoint configured")); } } Ok(results.join("\n\n")) } #[instrument(skip_all)] pub async fn health_history_impl( &self, params: HealthHistoryParams, ) -> crate::error::Result { let limit = params.limit.unwrap_or(10); let history = db::get_health_history(&self.pool, params.target.as_deref(), limit).await?; if history.is_empty() { return Ok("No health check history.".to_string()); } Ok(serde_json::to_string_pretty(&history)?) } #[instrument(skip_all)] pub async fn get_mesh_status_impl( &self, ) -> crate::error::Result { let listen = &self.config.serve.listen; let url = format!("http://{listen}/api/mesh"); let client = reqwest::Client::builder() .timeout(std::time::Duration::from_secs(5)) .build()?; let response = client.get(&url).send().await.map_err(|e| { crate::error::PomError::Config(format!( "Could not reach local PoM instance at {listen}: {e}" )) })?; let data: serde_json::Value = response.json().await?; let Some(instances) = data.get("instances").and_then(|v| v.as_object()) else { return Ok("No mesh data available. Is serve mode running?".to_string()); }; let mut output = String::from("# Peer Mesh Status\n\n"); for (name, instance_data) in instances { let instance = instance_data.get("instance"); let version = instance .and_then(|i| i.get("version")) .and_then(|v| v.as_str()) .unwrap_or("?"); output.push_str(&format!("## {name} (v{version})\n")); if let Some(targets) = instance_data.get("targets").and_then(|v| v.as_object()) { for (target_name, target_data) in targets { let status = target_data.get("status").and_then(|v| v.as_str()).unwrap_or("?"); let ms = target_data.get("response_time_ms").and_then(|v| v.as_i64()); let ms_str = ms.map(|m| format!(" ({m}ms)")).unwrap_or_default(); output.push_str(&format!("- Target {target_name}: {status}{ms_str}\n")); } } if let Some(peers) = instance_data.get("peers").and_then(|v| v.as_object()) { for (peer_name, peer_data) in peers { let status = peer_data.get("status").and_then(|v| v.as_str()).unwrap_or("?"); let latency = peer_data .get("latency_ms") .and_then(|v| v.as_u64()) .map(|ms| format!(" ({ms}ms)")) .unwrap_or_default(); output.push_str(&format!("- Peer {peer_name}: {status}{latency}\n")); } } if let Some(err) = instance_data.get("error").and_then(|v| v.as_str()) { output.push_str(&format!("- ({err})\n")); } output.push('\n'); } Ok(output) } #[instrument(skip_all)] pub async fn list_targets_impl( &self, ) -> crate::error::Result { let targets: Vec = self .config .target_names() .into_iter() .map(|name| { let t = self.config.get_target(&name).unwrap(); TargetInfo { name, label: t.label.clone(), has_health: t.health.is_some(), has_tests: t.tests.is_some(), } }) .collect(); Ok(serde_json::to_string_pretty(&targets)?) } }