| 1 |
use schemars::JsonSchema; |
| 2 |
use serde::Deserialize; |
| 3 |
use tracing::instrument; |
| 4 |
|
| 5 |
use crate::checks::http; |
| 6 |
use crate::db; |
| 7 |
use crate::types::{LatencyStats, TargetInfo}; |
| 8 |
|
| 9 |
use super::PomServer; |
| 10 |
|
| 11 |
#[derive(Debug, Deserialize, JsonSchema)] |
| 12 |
pub struct CheckHealthParams { |
| 13 |
|
| 14 |
pub target: Option<String>, |
| 15 |
} |
| 16 |
|
| 17 |
#[derive(Debug, Deserialize, JsonSchema)] |
| 18 |
pub struct HealthHistoryParams { |
| 19 |
|
| 20 |
pub target: Option<String>, |
| 21 |
|
| 22 |
pub limit: Option<i64>, |
| 23 |
} |
| 24 |
|
| 25 |
impl PomServer { |
| 26 |
#[instrument(skip_all)] |
| 27 |
pub async fn get_status_impl( |
| 28 |
&self, |
| 29 |
) -> crate::error::Result<String> { |
| 30 |
let mut status_parts = Vec::new(); |
| 31 |
|
| 32 |
for name in self.config.target_names() { |
| 33 |
let target = self.config.get_target(&name).unwrap(); |
| 34 |
let mut target_status = format!("## {name} ({})\n", target.label); |
| 35 |
|
| 36 |
|
| 37 |
if let Ok(Some(health)) = db::get_latest_health(&self.pool, &name).await { |
| 38 |
target_status.push_str(&format!( |
| 39 |
"Health: {} ({}ms, {})\n", |
| 40 |
health.status, health.response_time_ms, health.checked_at |
| 41 |
)); |
| 42 |
if let Some(details) = &health.details { |
| 43 |
if let Some(v) = &details.version { |
| 44 |
target_status.push_str(&format!("Version: {v}\n")); |
| 45 |
} |
| 46 |
if let Some(u) = &details.uptime { |
| 47 |
target_status.push_str(&format!("Uptime: {u}\n")); |
| 48 |
} |
| 49 |
} |
| 50 |
if let Some(err) = &health.error { |
| 51 |
target_status.push_str(&format!("Error: {err}\n")); |
| 52 |
} |
| 53 |
} else { |
| 54 |
target_status.push_str("Health: no data\n"); |
| 55 |
} |
| 56 |
|
| 57 |
|
| 58 |
let latency_cutoff = (chrono::Utc::now() - chrono::Duration::hours(24)).to_rfc3339(); |
| 59 |
if let Ok(times) = db::get_response_times(&self.pool, &name, &latency_cutoff).await { |
| 60 |
let operational_times: Vec<i64> = times.iter() |
| 61 |
.filter(|(_, ms)| *ms > 0) |
| 62 |
.map(|(_, ms)| *ms) |
| 63 |
.collect(); |
| 64 |
if let Some(l) = LatencyStats::from_times(&operational_times) { |
| 65 |
target_status.push_str(&format!( |
| 66 |
"Latency (24h): avg {:.0}ms, p95 {}ms, range {}-{}ms ({} samples)\n", |
| 67 |
l.avg_ms, l.p95_ms, l.min_ms, l.max_ms, l.sample_count |
| 68 |
)); |
| 69 |
} |
| 70 |
} |
| 71 |
|
| 72 |
|
| 73 |
if let Ok(Some(incident)) = db::get_open_incident(&self.pool, &name).await { |
| 74 |
target_status.push_str(&format!( |
| 75 |
"Incident: [ACTIVE] {} since {}\n", |
| 76 |
incident.to_status, incident.started_at |
| 77 |
)); |
| 78 |
} |
| 79 |
|
| 80 |
|
| 81 |
if let Ok(incidents) = db::get_recent_incidents(&self.pool, &name, 5).await { |
| 82 |
let closed: Vec<_> = incidents.iter().filter(|i| i.ended_at.is_some()).collect(); |
| 83 |
if !closed.is_empty() { |
| 84 |
target_status.push_str("Recent incidents:\n"); |
| 85 |
for inc in closed { |
| 86 |
let duration = inc.duration_secs.map(|d| format!(" ({d}s)")).unwrap_or_default(); |
| 87 |
target_status.push_str(&format!( |
| 88 |
" {} -> {} at {}{}\n", |
| 89 |
inc.from_status, inc.to_status, inc.started_at, duration |
| 90 |
)); |
| 91 |
} |
| 92 |
} |
| 93 |
} |
| 94 |
|
| 95 |
|
| 96 |
let latest_test = db::get_latest_test_run(&self.pool, &name).await.ok().flatten(); |
| 97 |
if let Some(ref test) = latest_test { |
| 98 |
let result = if test.passed { "PASSED" } else { "FAILED" }; |
| 99 |
target_status.push_str(&format!("Tests: {result}")); |
| 100 |
if let Some(d) = test.duration_secs { |
| 101 |
target_status.push_str(&format!(" ({d}s)")); |
| 102 |
} |
| 103 |
target_status.push_str(&format!(" ({})\n", test.started_at)); |
| 104 |
if let (Some(p), Some(f)) = (test.summary.total_passed, test.summary.total_failed) { |
| 105 |
target_status.push_str(&format!(" {p} passed, {f} failed\n")); |
| 106 |
} |
| 107 |
for step in &test.summary.steps { |
| 108 |
let mark = if step.passed { "PASS" } else { "FAIL" }; |
| 109 |
target_status.push_str(&format!(" {mark} {}\n", step.name)); |
| 110 |
} |
| 111 |
} else { |
| 112 |
target_status.push_str("Tests: no data\n"); |
| 113 |
} |
| 114 |
|
| 115 |
|
| 116 |
if let Some(tests_config) = &target.tests { |
| 117 |
let current_version = db::get_latest_health(&self.pool, &name) |
| 118 |
.await |
| 119 |
.ok() |
| 120 |
.flatten() |
| 121 |
.and_then(|h| h.details) |
| 122 |
.and_then(|d| d.version); |
| 123 |
|
| 124 |
let tested_version = if let Some(ref test) = latest_test { |
| 125 |
db::get_version_at_time(&self.pool, &name, &test.started_at) |
| 126 |
.await |
| 127 |
.unwrap_or(None) |
| 128 |
} else { |
| 129 |
None |
| 130 |
}; |
| 131 |
|
| 132 |
let staleness = http::compute_test_staleness( |
| 133 |
current_version.as_deref(), |
| 134 |
tested_version.as_deref(), |
| 135 |
latest_test.as_ref().map(|t| t.started_at.as_str()), |
| 136 |
tests_config.staleness_days, |
| 137 |
); |
| 138 |
|
| 139 |
if staleness.stale |
| 140 |
&& let Some(reason) = &staleness.reason |
| 141 |
{ |
| 142 |
target_status.push_str(&format!("Tests: STALE \u{2014} {reason}\n")); |
| 143 |
} |
| 144 |
} |
| 145 |
|
| 146 |
status_parts.push(target_status); |
| 147 |
} |
| 148 |
|
| 149 |
if status_parts.is_empty() { |
| 150 |
return Ok("No targets configured.".to_string()); |
| 151 |
} |
| 152 |
|
| 153 |
Ok(status_parts.join("\n")) |
| 154 |
} |
| 155 |
|
| 156 |
#[instrument(skip_all)] |
| 157 |
pub async fn check_health_impl( |
| 158 |
&self, |
| 159 |
params: CheckHealthParams, |
| 160 |
) -> crate::error::Result<String> { |
| 161 |
let targets: Vec<String> = match ¶ms.target { |
| 162 |
Some(t) => { |
| 163 |
if self.config.get_target(t).is_none() { |
| 164 |
return Ok(format!("Unknown target: {t}")); |
| 165 |
} |
| 166 |
vec![t.clone()] |
| 167 |
} |
| 168 |
None => self.config.target_names(), |
| 169 |
}; |
| 170 |
|
| 171 |
let mut results = Vec::new(); |
| 172 |
|
| 173 |
for name in &targets { |
| 174 |
let target = self.config.get_target(name).unwrap(); |
| 175 |
if let Some(health_config) = &target.health { |
| 176 |
let snapshot = http::check_health(name, health_config, health_config.expect.as_ref()).await; |
| 177 |
db::insert_health_check(&self.pool, &snapshot).await?; |
| 178 |
results.push(serde_json::to_string_pretty(&snapshot)?); |
| 179 |
} else { |
| 180 |
results.push(format!("{name}: no health endpoint configured")); |
| 181 |
} |
| 182 |
} |
| 183 |
|
| 184 |
Ok(results.join("\n\n")) |
| 185 |
} |
| 186 |
|
| 187 |
#[instrument(skip_all)] |
| 188 |
pub async fn health_history_impl( |
| 189 |
&self, |
| 190 |
params: HealthHistoryParams, |
| 191 |
) -> crate::error::Result<String> { |
| 192 |
let limit = params.limit.unwrap_or(10); |
| 193 |
let history = db::get_health_history(&self.pool, params.target.as_deref(), limit).await?; |
| 194 |
|
| 195 |
if history.is_empty() { |
| 196 |
return Ok("No health check history.".to_string()); |
| 197 |
} |
| 198 |
|
| 199 |
Ok(serde_json::to_string_pretty(&history)?) |
| 200 |
} |
| 201 |
|
| 202 |
#[instrument(skip_all)] |
| 203 |
pub async fn get_mesh_status_impl( |
| 204 |
&self, |
| 205 |
) -> crate::error::Result<String> { |
| 206 |
let listen = &self.config.serve.listen; |
| 207 |
let url = format!("http://{listen}/api/mesh"); |
| 208 |
|
| 209 |
let client = reqwest::Client::builder() |
| 210 |
.timeout(std::time::Duration::from_secs(5)) |
| 211 |
.build()?; |
| 212 |
|
| 213 |
let response = client.get(&url).send().await.map_err(|e| { |
| 214 |
crate::error::PomError::Config(format!( |
| 215 |
"Could not reach local PoM instance at {listen}: {e}" |
| 216 |
)) |
| 217 |
})?; |
| 218 |
|
| 219 |
let data: serde_json::Value = response.json().await?; |
| 220 |
|
| 221 |
let Some(instances) = data.get("instances").and_then(|v| v.as_object()) else { |
| 222 |
return Ok("No mesh data available. Is serve mode running?".to_string()); |
| 223 |
}; |
| 224 |
|
| 225 |
let mut output = String::from("# Peer Mesh Status\n\n"); |
| 226 |
|
| 227 |
for (name, instance_data) in instances { |
| 228 |
let instance = instance_data.get("instance"); |
| 229 |
let version = instance |
| 230 |
.and_then(|i| i.get("version")) |
| 231 |
.and_then(|v| v.as_str()) |
| 232 |
.unwrap_or("?"); |
| 233 |
|
| 234 |
output.push_str(&format!("## {name} (v{version})\n")); |
| 235 |
|
| 236 |
if let Some(targets) = instance_data.get("targets").and_then(|v| v.as_object()) { |
| 237 |
for (target_name, target_data) in targets { |
| 238 |
let status = target_data.get("status").and_then(|v| v.as_str()).unwrap_or("?"); |
| 239 |
let ms = target_data.get("response_time_ms").and_then(|v| v.as_i64()); |
| 240 |
let ms_str = ms.map(|m| format!(" ({m}ms)")).unwrap_or_default(); |
| 241 |
output.push_str(&format!("- Target {target_name}: {status}{ms_str}\n")); |
| 242 |
} |
| 243 |
} |
| 244 |
|
| 245 |
if let Some(peers) = instance_data.get("peers").and_then(|v| v.as_object()) { |
| 246 |
for (peer_name, peer_data) in peers { |
| 247 |
let status = peer_data.get("status").and_then(|v| v.as_str()).unwrap_or("?"); |
| 248 |
let latency = peer_data |
| 249 |
.get("latency_ms") |
| 250 |
.and_then(|v| v.as_u64()) |
| 251 |
.map(|ms| format!(" ({ms}ms)")) |
| 252 |
.unwrap_or_default(); |
| 253 |
output.push_str(&format!("- Peer {peer_name}: {status}{latency}\n")); |
| 254 |
} |
| 255 |
} |
| 256 |
|
| 257 |
if let Some(err) = instance_data.get("error").and_then(|v| v.as_str()) { |
| 258 |
output.push_str(&format!("- ({err})\n")); |
| 259 |
} |
| 260 |
|
| 261 |
output.push('\n'); |
| 262 |
} |
| 263 |
|
| 264 |
Ok(output) |
| 265 |
} |
| 266 |
|
| 267 |
#[instrument(skip_all)] |
| 268 |
pub async fn list_targets_impl( |
| 269 |
&self, |
| 270 |
) -> crate::error::Result<String> { |
| 271 |
let targets: Vec<TargetInfo> = self |
| 272 |
.config |
| 273 |
.target_names() |
| 274 |
.into_iter() |
| 275 |
.map(|name| { |
| 276 |
let t = self.config.get_target(&name).unwrap(); |
| 277 |
TargetInfo { |
| 278 |
name, |
| 279 |
label: t.label.clone(), |
| 280 |
has_health: t.health.is_some(), |
| 281 |
has_tests: t.tests.is_some(), |
| 282 |
} |
| 283 |
}) |
| 284 |
.collect(); |
| 285 |
|
| 286 |
Ok(serde_json::to_string_pretty(&targets)?) |
| 287 |
} |
| 288 |
} |
| 289 |
|