use std::time::Instant; use tracing::instrument; use crate::config::{HealthConfig, HealthExpectation}; use crate::types::{HealthDetails, HealthSnapshot, HealthStatus}; /// Maximum response body size we'll read into memory (10 MB). const MAX_RESPONSE_BYTES: u64 = 10 * 1024 * 1024; #[instrument(skip_all)] pub async fn check_health( target_name: &str, config: &HealthConfig, expect: Option<&HealthExpectation>, ) -> HealthSnapshot { let client = reqwest::Client::builder() .timeout(std::time::Duration::from_secs(config.timeout_secs)) .build() .unwrap_or_else(|_| reqwest::Client::new()); let start = Instant::now(); let checked_at = chrono::Utc::now().to_rfc3339(); match client.get(&config.url).send().await { Ok(response) => { let response_time_ms = start.elapsed().as_millis() as i64; let status_code = response.status().as_u16(); // Reject responses that declare a content-length exceeding our limit. if let Some(len) = response.content_length() && len > MAX_RESPONSE_BYTES { return HealthSnapshot { id: None, target: target_name.to_string(), status: HealthStatus::Degraded, checked_at, response_time_ms, details: None, error: Some(format!( "Response body too large: {len} bytes (limit: {MAX_RESPONSE_BYTES} bytes)" )), }; } // Read body with size cap (handles chunked/streaming responses without content-length). let body_result = match response.bytes().await { Ok(bytes) => { if bytes.len() as u64 > MAX_RESPONSE_BYTES { Err(format!( "Response body too large: {} bytes (limit: {MAX_RESPONSE_BYTES} bytes)", bytes.len() )) } else { String::from_utf8(bytes.to_vec()) .map_err(|e| format!("Response body not valid UTF-8: {e}")) } } Err(e) => Err(format!("Failed to read response body: {e}")), }; match body_result { Ok(body) => { let json: Option = serde_json::from_str(&body).ok(); let (mut status, details, mut error) = if let Some(ref json) = json { let (s, d) = classify_json_response(status_code, json); (s, Some(d), None) } else { (classify_non_json(status_code), None, Some("Failed to parse response as JSON".to_string())) }; // Apply expectation validation if let Some(exp) = expect { let failures = validate_expectations(exp, status_code, &body, json.as_ref()); if !failures.is_empty() { status = HealthStatus::Degraded; error = Some(failures.join("; ")); } else if json.is_none() { // Non-JSON response but all expectations passed — treat as operational status = HealthStatus::Operational; error = None; } } HealthSnapshot { id: None, target: target_name.to_string(), status, checked_at, response_time_ms, details, error, } } Err(e) => HealthSnapshot { id: None, target: target_name.to_string(), status: HealthStatus::Degraded, checked_at, response_time_ms, details: None, error: Some(e), }, } } Err(e) => { let response_time_ms = start.elapsed().as_millis() as i64; HealthSnapshot { id: None, target: target_name.to_string(), status: HealthStatus::Unreachable, checked_at, response_time_ms, details: None, error: Some(format!("{e}")), } } } } /// Walk a dot-separated path through nested JSON objects. pub fn resolve_json_path<'a>(value: &'a serde_json::Value, path: &str) -> Option<&'a serde_json::Value> { let mut current = value; for key in path.split('.') { current = current.get(key)?; } Some(current) } /// Validate response against expectations. Returns a list of failure descriptions. pub fn validate_expectations( expect: &HealthExpectation, status_code: u16, body: &str, json: Option<&serde_json::Value>, ) -> Vec { let mut failures = Vec::new(); if let Some(expected_code) = expect.status_code && status_code != expected_code { failures.push(format!("expected status {expected_code}, got {status_code}")); } if let Some(ref substring) = expect.body_contains && !body.contains(substring.as_str()) { failures.push(format!("body missing expected substring \"{substring}\"")); } if !expect.json_fields.is_empty() { if let Some(json) = json { for (path, expected_value) in &expect.json_fields { match resolve_json_path(json, path) { Some(actual) => { let actual_str: std::borrow::Cow<'_, str> = match actual { serde_json::Value::String(s) => std::borrow::Cow::Borrowed(s), other => std::borrow::Cow::Owned(other.to_string()), }; if *actual_str != *expected_value { failures.push(format!("json field \"{path}\": expected \"{expected_value}\", got \"{actual_str}\"")); } } None => { failures.push(format!("json field \"{path}\" not found")); } } } } else { failures.push("expected JSON response for field validation, got non-JSON".to_string()); } } failures } /// Classify a JSON health response into status + details. pub fn classify_json_response( status_code: u16, json: &serde_json::Value, ) -> (HealthStatus, HealthDetails) { let api_status = json .get("status") .and_then(|s| s.as_str()) .unwrap_or("unknown"); let status = match api_status { "operational" => HealthStatus::Operational, "degraded" => HealthStatus::Degraded, _ if (200..300).contains(&status_code) => HealthStatus::Degraded, _ => HealthStatus::Error, }; let details = HealthDetails { version: json.get("version").and_then(|v| v.as_str()).map(String::from), uptime: json.get("uptime").and_then(|v| v.as_str()).map(String::from), checks: json.get("checks").cloned(), monitoring: json.get("monitoring").cloned(), }; (status, details) } /// Classify a response that couldn't be parsed as JSON. pub fn classify_non_json(status_code: u16) -> HealthStatus { if (200..300).contains(&status_code) { HealthStatus::Degraded } else { HealthStatus::Error } } /// Detect sustained latency drift by checking if all recent response times /// exceed the baseline average by the given threshold multiplier. /// /// Returns a description string if drift is detected, `None` otherwise. /// Requires at least 10 baseline samples to avoid false positives. pub fn detect_latency_drift( recent_times: &[i64], baseline: &crate::types::LatencyStats, threshold: f64, ) -> Option { if baseline.sample_count < 10 || recent_times.is_empty() { return None; } let drift_threshold = baseline.avg_ms * threshold; let all_over = recent_times.iter().all(|&t| t as f64 > drift_threshold); if all_over { let avg_recent: f64 = recent_times.iter().sum::() as f64 / recent_times.len() as f64; Some(format!( "latency drift: last {} checks avg {:.0}ms (baseline avg {:.0}ms, threshold {:.0}ms)", recent_times.len(), avg_recent, baseline.avg_ms, drift_threshold, )) } else { None } } /// Detect sustained test duration drift by checking if all recent durations /// exceed the baseline average by the given threshold multiplier. /// /// Returns a description string if drift is detected, `None` otherwise. /// Requires at least `baseline_count` samples for the baseline window. pub fn detect_test_duration_drift( durations: &[(String, i64)], baseline_count: usize, recent_count: usize, threshold: f64, ) -> Option { if durations.len() < baseline_count + recent_count { return None; } // durations are ordered most recent first from get_test_durations let recent = &durations[..recent_count]; let baseline = &durations[recent_count..]; if baseline.is_empty() { return None; } let baseline_avg = baseline.iter().map(|(_, d)| *d).sum::() as f64 / baseline.len() as f64; let drift_threshold = baseline_avg * threshold; let all_over = recent.iter().all(|(_, d)| *d as f64 > drift_threshold); if all_over { let recent_avg = recent.iter().map(|(_, d)| *d).sum::() as f64 / recent.len() as f64; Some(format!( "test duration drift: last {} runs avg {:.0}s (baseline avg {:.0}s, threshold {:.0}s)", recent_count, recent_avg, baseline_avg, drift_threshold, )) } else { None } } /// Compute test staleness from version and timing data. /// /// A target's tests are considered stale when: /// 1. No tests have ever been run /// 2. Tests are older than `staleness_days` /// 3. The deployed version has changed since the last test run pub fn compute_test_staleness( current_version: Option<&str>, tested_version: Option<&str>, last_test_at: Option<&str>, staleness_days: u64, ) -> crate::types::TestStaleness { let Some(last_test_at) = last_test_at else { return crate::types::TestStaleness { stale: true, reason: Some("no tests have been run".to_string()), current_version: current_version.map(String::from), tested_version: None, last_test_at: None, days_since_test: None, }; }; let days_since = chrono::DateTime::parse_from_rfc3339(last_test_at) .ok() .map(|dt| { let now = chrono::Utc::now(); (now - dt.with_timezone(&chrono::Utc)).num_days() }); if let Some(days) = days_since && days >= staleness_days as i64 { return crate::types::TestStaleness { stale: true, reason: Some(format!("tests are {days} days old (threshold: {staleness_days}d)")), current_version: current_version.map(String::from), tested_version: tested_version.map(String::from), last_test_at: Some(last_test_at.to_string()), days_since_test: Some(days), }; } if let (Some(current), Some(tested)) = (current_version, tested_version) && current != tested { return crate::types::TestStaleness { stale: true, reason: Some(format!("version changed: {tested} -> {current}")), current_version: Some(current.to_string()), tested_version: Some(tested.to_string()), last_test_at: Some(last_test_at.to_string()), days_since_test: days_since, }; } crate::types::TestStaleness { stale: false, reason: None, current_version: current_version.map(String::from), tested_version: tested_version.map(String::from), last_test_at: Some(last_test_at.to_string()), days_since_test: days_since, } } #[cfg(test)] mod tests { use super::*; use std::collections::HashMap; #[test] fn classify_operational() { let json = serde_json::json!({ "status": "operational", "version": "2.1.0", "uptime": "3d 12h", }); let (status, details) = classify_json_response(200, &json); assert_eq!(status, HealthStatus::Operational); assert_eq!(details.version.as_deref(), Some("2.1.0")); assert_eq!(details.uptime.as_deref(), Some("3d 12h")); } #[test] fn classify_degraded_explicit() { let json = serde_json::json!({ "status": "degraded" }); let (status, _) = classify_json_response(200, &json); assert_eq!(status, HealthStatus::Degraded); } #[test] fn classify_unknown_status_with_success_code() { let json = serde_json::json!({ "status": "starting_up" }); let (status, _) = classify_json_response(200, &json); assert_eq!(status, HealthStatus::Degraded); } #[test] fn classify_unknown_status_with_error_code() { let json = serde_json::json!({ "status": "starting_up" }); let (status, _) = classify_json_response(503, &json); assert_eq!(status, HealthStatus::Error); } #[test] fn classify_missing_status_field() { let json = serde_json::json!({ "version": "1.0.0" }); let (status, details) = classify_json_response(200, &json); assert_eq!(status, HealthStatus::Degraded); // "unknown" falls through assert_eq!(details.version.as_deref(), Some("1.0.0")); } #[test] fn classify_extracts_checks_and_monitoring() { let json = serde_json::json!({ "status": "operational", "checks": { "db": "ok", "redis": "ok" }, "monitoring": { "external": true }, }); let (_, details) = classify_json_response(200, &json); assert!(details.checks.is_some()); assert!(details.monitoring.is_some()); } #[test] fn classify_non_json_success() { assert_eq!(classify_non_json(200), HealthStatus::Degraded); assert_eq!(classify_non_json(204), HealthStatus::Degraded); } #[test] fn classify_non_json_error() { assert_eq!(classify_non_json(500), HealthStatus::Error); assert_eq!(classify_non_json(404), HealthStatus::Error); } // --- resolve_json_path --- #[test] fn resolve_json_path_top_level() { let json = serde_json::json!({"status": "operational"}); let val = resolve_json_path(&json, "status").unwrap(); assert_eq!(val, "operational"); } #[test] fn resolve_json_path_nested() { let json = serde_json::json!({"checks": {"db": "ok", "redis": "warn"}}); let val = resolve_json_path(&json, "checks.db").unwrap(); assert_eq!(val, "ok"); } #[test] fn resolve_json_path_deeply_nested() { let json = serde_json::json!({"a": {"b": {"c": 42}}}); let val = resolve_json_path(&json, "a.b.c").unwrap(); assert_eq!(val, 42); } #[test] fn resolve_json_path_missing() { let json = serde_json::json!({"status": "operational"}); assert!(resolve_json_path(&json, "missing").is_none()); } #[test] fn resolve_json_path_partial_missing() { let json = serde_json::json!({"checks": {"db": "ok"}}); assert!(resolve_json_path(&json, "checks.redis").is_none()); } // --- validate_expectations --- #[test] fn validate_status_code_match() { let expect = HealthExpectation { status_code: Some(200), ..Default::default() }; let failures = validate_expectations(&expect, 200, "", None); assert!(failures.is_empty()); } #[test] fn validate_status_code_mismatch() { let expect = HealthExpectation { status_code: Some(200), ..Default::default() }; let failures = validate_expectations(&expect, 503, "", None); assert_eq!(failures.len(), 1); assert!(failures[0].contains("expected status 200")); assert!(failures[0].contains("got 503")); } #[test] fn validate_body_contains_match() { let expect = HealthExpectation { body_contains: Some("operational".to_string()), ..Default::default() }; let failures = validate_expectations(&expect, 200, r#"{"status":"operational"}"#, None); assert!(failures.is_empty()); } #[test] fn validate_body_contains_mismatch() { let expect = HealthExpectation { body_contains: Some("operational".to_string()), ..Default::default() }; let failures = validate_expectations(&expect, 200, r#"{"status":"error"}"#, None); assert_eq!(failures.len(), 1); assert!(failures[0].contains("body missing")); } #[test] fn validate_json_fields_match() { let mut fields = HashMap::new(); fields.insert("status".to_string(), "operational".to_string()); fields.insert("checks.db".to_string(), "ok".to_string()); let expect = HealthExpectation { json_fields: fields, ..Default::default() }; let json = serde_json::json!({"status": "operational", "checks": {"db": "ok"}}); let failures = validate_expectations(&expect, 200, "", Some(&json)); assert!(failures.is_empty()); } #[test] fn validate_json_fields_mismatch() { let mut fields = HashMap::new(); fields.insert("status".to_string(), "operational".to_string()); let expect = HealthExpectation { json_fields: fields, ..Default::default() }; let json = serde_json::json!({"status": "degraded"}); let failures = validate_expectations(&expect, 200, "", Some(&json)); assert_eq!(failures.len(), 1); assert!(failures[0].contains("expected \"operational\"")); assert!(failures[0].contains("got \"degraded\"")); } #[test] fn validate_json_field_missing() { let mut fields = HashMap::new(); fields.insert("checks.redis".to_string(), "ok".to_string()); let expect = HealthExpectation { json_fields: fields, ..Default::default() }; let json = serde_json::json!({"checks": {"db": "ok"}}); let failures = validate_expectations(&expect, 200, "", Some(&json)); assert_eq!(failures.len(), 1); assert!(failures[0].contains("not found")); } #[test] fn validate_json_fields_on_non_json() { let mut fields = HashMap::new(); fields.insert("status".to_string(), "ok".to_string()); let expect = HealthExpectation { json_fields: fields, ..Default::default() }; let failures = validate_expectations(&expect, 200, "not json", None); assert_eq!(failures.len(), 1); assert!(failures[0].contains("non-JSON")); } #[test] fn validate_mixed_failures() { let mut fields = HashMap::new(); fields.insert("status".to_string(), "operational".to_string()); let expect = HealthExpectation { status_code: Some(200), body_contains: Some("healthy".to_string()), json_fields: fields, }; let json = serde_json::json!({"status": "degraded"}); let failures = validate_expectations(&expect, 503, r#"{"status":"degraded"}"#, Some(&json)); assert_eq!(failures.len(), 3); // status code + body + json field } #[test] fn validate_empty_expectations_always_pass() { let expect = HealthExpectation::default(); let failures = validate_expectations(&expect, 500, "garbage", None); assert!(failures.is_empty()); } // --- detect_latency_drift --- fn baseline(avg: f64, count: i64) -> crate::types::LatencyStats { crate::types::LatencyStats { min_ms: avg as i64 / 2, max_ms: avg as i64 * 2, avg_ms: avg, p95_ms: (avg * 1.5) as i64, sample_count: count, } } #[test] fn drift_all_over_threshold() { let bl = baseline(100.0, 100); let recent = vec![250, 260, 270]; // all > 200 (100 * 2.0) let result = detect_latency_drift(&recent, &bl, 2.0); assert!(result.is_some()); assert!(result.unwrap().contains("latency drift")); } #[test] fn drift_one_under_threshold() { let bl = baseline(100.0, 100); let recent = vec![250, 150, 270]; // 150 < 200 let result = detect_latency_drift(&recent, &bl, 2.0); assert!(result.is_none()); } #[test] fn drift_insufficient_baseline() { let bl = baseline(100.0, 5); // < 10 samples let recent = vec![250, 260, 270]; let result = detect_latency_drift(&recent, &bl, 2.0); assert!(result.is_none()); } #[test] fn drift_empty_recent() { let bl = baseline(100.0, 100); let result = detect_latency_drift(&[], &bl, 2.0); assert!(result.is_none()); } #[test] fn drift_threshold_edge() { let bl = baseline(100.0, 100); // Exactly at threshold (200): not strictly over let recent = vec![200, 200, 200]; let result = detect_latency_drift(&recent, &bl, 2.0); assert!(result.is_none()); // must be strictly greater } #[test] fn drift_just_over_threshold() { let bl = baseline(100.0, 100); let recent = vec![201, 201, 201]; let result = detect_latency_drift(&recent, &bl, 2.0); assert!(result.is_some()); } // --- compute_test_staleness --- #[test] fn staleness_no_test_run() { let result = compute_test_staleness(Some("1.0.0"), None, None, 7); assert!(result.stale); assert_eq!(result.reason.as_deref(), Some("no tests have been run")); assert!(result.last_test_at.is_none()); } #[test] fn staleness_stale_by_age() { let old = (chrono::Utc::now() - chrono::Duration::days(10)).to_rfc3339(); let result = compute_test_staleness(Some("1.0.0"), Some("1.0.0"), Some(&old), 7); assert!(result.stale); let reason = result.reason.unwrap(); assert!(reason.contains("days old"), "reason was: {reason}"); assert!(reason.contains("threshold: 7d"), "reason was: {reason}"); } #[test] fn staleness_stale_by_version() { let recent = chrono::Utc::now().to_rfc3339(); let result = compute_test_staleness(Some("1.1.0"), Some("1.0.0"), Some(&recent), 7); assert!(result.stale); let reason = result.reason.unwrap(); assert!(reason.contains("version changed: 1.0.0 -> 1.1.0"), "reason was: {reason}"); } #[test] fn staleness_fresh() { let recent = chrono::Utc::now().to_rfc3339(); let result = compute_test_staleness(Some("1.0.0"), Some("1.0.0"), Some(&recent), 7); assert!(!result.stale); assert!(result.reason.is_none()); } #[test] fn staleness_missing_versions_not_stale() { let recent = chrono::Utc::now().to_rfc3339(); let result = compute_test_staleness(None, None, Some(&recent), 7); assert!(!result.stale); } #[test] fn staleness_at_threshold_is_stale() { // Pins `days >= staleness_days` (vs `>`). days == 7, threshold == 7 // must report stale. let exactly = (chrono::Utc::now() - chrono::Duration::days(7)).to_rfc3339(); let result = compute_test_staleness(Some("1.0.0"), Some("1.0.0"), Some(&exactly), 7); assert!(result.stale, "days == threshold must be stale"); } #[test] fn staleness_one_day_under_threshold_is_fresh() { let just_under = (chrono::Utc::now() - chrono::Duration::days(6) - chrono::Duration::hours(20)).to_rfc3339(); let result = compute_test_staleness(Some("1.0.0"), Some("1.0.0"), Some(&just_under), 7); assert!(!result.stale, "days < threshold must not be stale"); } // --- classify_non_json: 200..300 range boundaries --- #[test] fn classify_non_json_status_boundaries() { // Pins the `(200..300).contains(&status_code)` range. assert_eq!(classify_non_json(199), HealthStatus::Error, "199 is below 2xx"); assert_eq!(classify_non_json(200), HealthStatus::Degraded, "200 is start of 2xx"); assert_eq!(classify_non_json(299), HealthStatus::Degraded, "299 is end of 2xx"); assert_eq!(classify_non_json(300), HealthStatus::Error, "300 is start of 3xx"); } #[test] fn classify_json_unknown_status_3xx_is_error() { // Pins the `_ if (200..300).contains(&status_code)` guard in // classify_json_response: status_code 300 with unknown api_status // must fall through to Error, not Degraded. let json = serde_json::json!({ "status": "starting_up" }); assert_eq!(classify_json_response(300, &json).0, HealthStatus::Error); assert_eq!(classify_json_response(199, &json).0, HealthStatus::Error); } // --- detect_test_duration_drift --- fn dur(name: &str, secs: i64) -> (String, i64) { (name.to_string(), secs) } #[test] fn duration_drift_all_recent_over_threshold() { // 3 recent + 4 baseline, all recent > 2x baseline avg → drift detected. // baseline_count=4, recent_count=3, total=7 let durations = vec![ dur("a", 200), dur("b", 210), dur("c", 220), // recent (most recent first) dur("d", 80), dur("e", 100), dur("f", 90), dur("g", 110), // baseline avg = 95 ]; let result = detect_test_duration_drift(&durations, 4, 3, 2.0); let msg = result.expect("drift should be detected"); assert!(msg.contains("test duration drift")); assert!(msg.contains("last 3 runs")); } #[test] fn duration_drift_one_recent_under_threshold_no_drift() { // One recent (105) is below 2x baseline avg (95 * 2 = 190). let durations = vec![ dur("a", 250), dur("b", 105), dur("c", 270), dur("d", 80), dur("e", 100), dur("f", 90), dur("g", 110), ]; assert!(detect_test_duration_drift(&durations, 4, 3, 2.0).is_none()); } #[test] fn duration_drift_insufficient_samples() { // Total samples < baseline_count + recent_count → no drift, return None. let durations = vec![dur("a", 500), dur("b", 500), dur("c", 100)]; assert!(detect_test_duration_drift(&durations, 4, 3, 2.0).is_none()); } #[test] fn duration_drift_at_threshold_is_not_drift() { // Pins `*d as f64 > drift_threshold` (strictly greater). At exactly // 2x baseline avg, must NOT report drift. // baseline avg = 100, threshold = 100 * 2.0 = 200. Recent values == 200. let durations = vec![ dur("a", 200), dur("b", 200), dur("c", 200), dur("d", 100), dur("e", 100), dur("f", 100), dur("g", 100), ]; assert!(detect_test_duration_drift(&durations, 4, 3, 2.0).is_none()); } #[test] fn duration_drift_just_over_threshold_detects() { let durations = vec![ dur("a", 201), dur("b", 201), dur("c", 201), dur("d", 100), dur("e", 100), dur("f", 100), dur("g", 100), ]; assert!(detect_test_duration_drift(&durations, 4, 3, 2.0).is_some()); } #[test] fn duration_drift_zero_baseline_count_returns_none() { // Edge: baseline_count=0 means `baseline` slice is empty; should None // (pins the `if baseline.is_empty()` early return). let durations = vec![dur("a", 100), dur("b", 200)]; assert!(detect_test_duration_drift(&durations, 0, 2, 2.0).is_none()); } // --- resolve_json_path edge cases --- #[test] fn resolve_json_path_empty_path_segment_is_none() { // path "a..b" splits to ["a", "", "b"]; `.get("")` returns None. let json = serde_json::json!({"a": {"b": 1}}); assert!(resolve_json_path(&json, "a..b").is_none()); } #[test] fn resolve_json_path_through_non_object_is_none() { // Trying to descend into a string value should return None. let json = serde_json::json!({"name": "hello"}); assert!(resolve_json_path(&json, "name.length").is_none()); } }