Skip to main content

max / makenotwork

shared/pom-contract: schema-drift guard for /api/health producers New test-only crate exposing one helper: pom_contract::assert_health_expectations_resolve( "../pom/deploy/pom-hetzner.toml", "<target>", &body, ); It loads the PoM config, pulls targets.<target>.health.expect.json_fields, walks each dot-path against the supplied body using PoM's exact path semantics, and panics with a precise diagnostic on drift (missing field, value mismatch, or unknown target). 5 internal unit tests covering each case. Producer integration (both refactored to expose a pure JSON-body builder so the guard can run without spinning up the app): - server: health_json now delegates to health_json_body(overall, db_ok); guard test wired against the "mnw" target - multithreaded: health handler now delegates to health_body(db_ok); guard test wired against the "mt" target Both guards verified to fail on intentional drift and pass on the real code path. Pattern documented in MNW/CLAUDE.md (gitignored, kept local). Catches the same class of bug that produced the 13-day MNW Degraded and 33-day MT Degraded false-positive incidents. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
Author: Max J. <87768334+MaxJMath@users.noreply.github.com> · 2026-05-25 19:56 UTC
Commit: 87e3e66bb28cb22be348210054f8e581d273910c
Parent: 62876b1
9 files changed, +504 insertions, -6 deletions
@@ -1080,7 +1080,7 @@ dependencies = [
1080 1080
1081 1081 [[package]]
1082 1082 name = "docengine"
1083 - version = "0.3.1"
1083 + version = "0.3.4"
1084 1084 dependencies = [
1085 1085 "ammonia",
1086 1086 "pulldown-cmark",
@@ -2241,6 +2241,7 @@ dependencies = [
2241 2241 "http-body-util",
2242 2242 "mt-core",
2243 2243 "mt-db",
2244 + "pom-contract",
2244 2245 "pulldown-cmark",
2245 2246 "rand 0.8.5",
2246 2247 "regex-lite",
@@ -2561,6 +2562,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
2561 2562 checksum = "b4596b6d070b27117e987119b4dac604f3c58cfb0b191112e24771b2faeac1a6"
2562 2563
2563 2564 [[package]]
2565 + name = "pom-contract"
2566 + version = "0.1.0"
2567 + dependencies = [
2568 + "serde_json",
2569 + "toml",
2570 + ]
2571 +
2572 + [[package]]
2564 2573 name = "portable-atomic"
2565 2574 version = "1.13.1"
2566 2575 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -3179,6 +3188,15 @@ dependencies = [
3179 3188 ]
3180 3189
3181 3190 [[package]]
3191 + name = "serde_spanned"
3192 + version = "0.6.9"
3193 + source = "registry+https://github.com/rust-lang/crates.io-index"
3194 + checksum = "bf41e0cfaf7226dca15e8197172c295a782857fcb97fad1808a166870dee75a3"
3195 + dependencies = [
3196 + "serde",
3197 + ]
3198 +
3199 + [[package]]
3182 3200 name = "serde_urlencoded"
3183 3201 version = "0.7.1"
3184 3202 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -3847,6 +3865,47 @@ dependencies = [
3847 3865 ]
3848 3866
3849 3867 [[package]]
3868 + name = "toml"
3869 + version = "0.8.23"
3870 + source = "registry+https://github.com/rust-lang/crates.io-index"
3871 + checksum = "dc1beb996b9d83529a9e75c17a1686767d148d70663143c7854d8b4a09ced362"
3872 + dependencies = [
3873 + "serde",
3874 + "serde_spanned",
3875 + "toml_datetime",
3876 + "toml_edit",
3877 + ]
3878 +
3879 + [[package]]
3880 + name = "toml_datetime"
3881 + version = "0.6.11"
3882 + source = "registry+https://github.com/rust-lang/crates.io-index"
3883 + checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c"
3884 + dependencies = [
3885 + "serde",
3886 + ]
3887 +
3888 + [[package]]
3889 + name = "toml_edit"
3890 + version = "0.22.27"
3891 + source = "registry+https://github.com/rust-lang/crates.io-index"
3892 + checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a"
3893 + dependencies = [
3894 + "indexmap",
3895 + "serde",
3896 + "serde_spanned",
3897 + "toml_datetime",
3898 + "toml_write",
3899 + "winnow",
3900 + ]
3901 +
3902 + [[package]]
3903 + name = "toml_write"
3904 + version = "0.1.2"
3905 + source = "registry+https://github.com/rust-lang/crates.io-index"
3906 + checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801"
3907 +
3908 + [[package]]
3850 3909 name = "tower"
3851 3910 version = "0.5.3"
3852 3911 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -97,3 +97,4 @@ time = "0.3"
97 97 [dev-dependencies]
98 98 http-body-util = "0.1"
99 99 wiremock = "0.6"
100 + pom-contract = { path = "../shared/pom-contract" }
@@ -314,13 +314,38 @@ async fn health(
314 314 .await
315 315 .is_ok();
316 316
317 - let status = if db_ok { "operational" } else { "degraded" };
317 + Json(health_body(db_ok))
318 + }
318 319
319 - Json(serde_json::json!({
320 + /// Build the JSON body for the `/api/health` response.
321 + ///
322 + /// Kept as a pure function (no AppState, no DB) so the schema-drift guard
323 + /// test in this module can exercise it directly. PoM polls this endpoint
324 + /// and runs key-by-key assertions from `pom/deploy/pom-hetzner.toml`; the
325 + /// guard test validates that every asserted path still resolves here.
326 + fn health_body(db_ok: bool) -> serde_json::Value {
327 + let status = if db_ok { "operational" } else { "degraded" };
328 + serde_json::json!({
320 329 "status": status,
321 330 "version": env!("CARGO_PKG_VERSION"),
322 331 "database": db_ok,
323 - }))
332 + })
333 + }
334 +
335 + #[cfg(test)]
336 + mod health_tests {
337 + use super::health_body;
338 +
339 + /// Schema-drift guard for the `mt` target. See `shared/pom-contract/`.
340 + #[test]
341 + fn pom_hetzner_health_expectations_resolve() {
342 + let body = health_body(true);
343 + pom_contract::assert_health_expectations_resolve(
344 + "../pom/deploy/pom-hetzner.toml",
345 + "mt",
346 + &body,
347 + );
348 + }
324 349 }
325 350
326 351 // ============================================================================
@@ -4171,6 +4171,7 @@ dependencies = [
4171 4171 "metrics-exporter-prometheus",
4172 4172 "object 0.37.3",
4173 4173 "openssl",
4174 + "pom-contract",
4174 4175 "proptest",
4175 4176 "rand 0.9.2",
4176 4177 "regex",
@@ -5049,6 +5050,14 @@ dependencies = [
5049 5050 ]
5050 5051
5051 5052 [[package]]
5053 + name = "pom-contract"
5054 + version = "0.1.0"
5055 + dependencies = [
5056 + "serde_json",
5057 + "toml",
5058 + ]
5059 +
5060 + [[package]]
5052 5061 name = "portable-atomic"
5053 5062 version = "1.13.1"
5054 5063 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -153,3 +153,4 @@ webauthn-authenticator-rs = { version = "0.5", features = ["softpasskey"] }
153 153 tempfile = "3"
154 154 proptest = "1"
155 155 wiremock = "0.6"
156 + pom-contract = { path = "../shared/pom-contract" }
@@ -667,13 +667,23 @@ pub(super) async fn health_json(
667 667 StatusCode::OK
668 668 };
669 669
670 - (http_status, Json(serde_json::json!({
670 + (http_status, Json(health_json_body(overall, db_ok)))
671 + }
672 +
673 + /// Build the JSON body for the `/api/health` response.
674 + ///
675 + /// Kept as a pure function (no AppState, no DB) so the schema-drift guard
676 + /// test in this module can exercise it directly. PoM polls this endpoint
677 + /// and runs key-by-key assertions from `pom/deploy/pom-hetzner.toml`; the
678 + /// guard test validates that every asserted path still resolves here.
679 + fn health_json_body(overall: OverallStatus, db_ok: bool) -> serde_json::Value {
680 + serde_json::json!({
671 681 "status": overall.api_label(),
672 682 "version": env!("CARGO_PKG_VERSION"),
673 683 "checks": {
674 684 "database": db_ok,
675 685 },
676 - })))
686 + })
677 687 }
678 688
679 689 #[cfg(test)]
@@ -722,4 +732,15 @@ mod tests {
722 732 assert_eq!(OverallStatus::Degraded.api_label(), "degraded");
723 733 assert_eq!(OverallStatus::Error.api_label(), "error");
724 734 }
735 +
736 + /// Schema-drift guard for the `mnw` target. See `shared/pom-contract/`.
737 + #[test]
738 + fn pom_hetzner_health_expectations_resolve() {
739 + let body = health_json_body(OverallStatus::Operational, true);
740 + pom_contract::assert_health_expectations_resolve(
741 + "../pom/deploy/pom-hetzner.toml",
742 + "mnw",
743 + &body,
744 + );
745 + }
725 746 }
@@ -0,0 +1,187 @@
1 + # This file is automatically @generated by Cargo.
2 + # It is not intended for manual editing.
3 + version = 4
4 +
5 + [[package]]
6 + name = "equivalent"
7 + version = "1.0.2"
8 + source = "registry+https://github.com/rust-lang/crates.io-index"
9 + checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
10 +
11 + [[package]]
12 + name = "hashbrown"
13 + version = "0.17.1"
14 + source = "registry+https://github.com/rust-lang/crates.io-index"
15 + checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a"
16 +
17 + [[package]]
18 + name = "indexmap"
19 + version = "2.14.0"
20 + source = "registry+https://github.com/rust-lang/crates.io-index"
21 + checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9"
22 + dependencies = [
23 + "equivalent",
24 + "hashbrown",
25 + ]
26 +
27 + [[package]]
28 + name = "itoa"
29 + version = "1.0.18"
30 + source = "registry+https://github.com/rust-lang/crates.io-index"
31 + checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682"
32 +
33 + [[package]]
34 + name = "memchr"
35 + version = "2.8.0"
36 + source = "registry+https://github.com/rust-lang/crates.io-index"
37 + checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
38 +
39 + [[package]]
40 + name = "pom-contract"
41 + version = "0.1.0"
42 + dependencies = [
43 + "serde_json",
44 + "toml",
45 + ]
46 +
47 + [[package]]
48 + name = "proc-macro2"
49 + version = "1.0.106"
50 + source = "registry+https://github.com/rust-lang/crates.io-index"
51 + checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
52 + dependencies = [
53 + "unicode-ident",
54 + ]
55 +
56 + [[package]]
57 + name = "quote"
58 + version = "1.0.45"
59 + source = "registry+https://github.com/rust-lang/crates.io-index"
60 + checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924"
61 + dependencies = [
62 + "proc-macro2",
63 + ]
64 +
65 + [[package]]
66 + name = "serde"
67 + version = "1.0.228"
68 + source = "registry+https://github.com/rust-lang/crates.io-index"
69 + checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
70 + dependencies = [
71 + "serde_core",
72 + ]
73 +
74 + [[package]]
75 + name = "serde_core"
76 + version = "1.0.228"
77 + source = "registry+https://github.com/rust-lang/crates.io-index"
78 + checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
79 + dependencies = [
80 + "serde_derive",
81 + ]
82 +
83 + [[package]]
84 + name = "serde_derive"
85 + version = "1.0.228"
86 + source = "registry+https://github.com/rust-lang/crates.io-index"
87 + checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
88 + dependencies = [
89 + "proc-macro2",
90 + "quote",
91 + "syn",
92 + ]
93 +
94 + [[package]]
95 + name = "serde_json"
96 + version = "1.0.150"
97 + source = "registry+https://github.com/rust-lang/crates.io-index"
98 + checksum = "e8014e44b4736ed0538adeecded0fce2a272f22dc9578a7eb6b2d9993c74cfb9"
99 + dependencies = [
100 + "itoa",
101 + "memchr",
102 + "serde",
103 + "serde_core",
104 + "zmij",
105 + ]
106 +
107 + [[package]]
108 + name = "serde_spanned"
109 + version = "0.6.9"
110 + source = "registry+https://github.com/rust-lang/crates.io-index"
111 + checksum = "bf41e0cfaf7226dca15e8197172c295a782857fcb97fad1808a166870dee75a3"
112 + dependencies = [
113 + "serde",
114 + ]
115 +
116 + [[package]]
117 + name = "syn"
118 + version = "2.0.117"
119 + source = "registry+https://github.com/rust-lang/crates.io-index"
120 + checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99"
121 + dependencies = [
122 + "proc-macro2",
123 + "quote",
124 + "unicode-ident",
125 + ]
126 +
127 + [[package]]
128 + name = "toml"
129 + version = "0.8.23"
130 + source = "registry+https://github.com/rust-lang/crates.io-index"
131 + checksum = "dc1beb996b9d83529a9e75c17a1686767d148d70663143c7854d8b4a09ced362"
132 + dependencies = [
133 + "serde",
134 + "serde_spanned",
135 + "toml_datetime",
136 + "toml_edit",
137 + ]
138 +
139 + [[package]]
140 + name = "toml_datetime"
141 + version = "0.6.11"
142 + source = "registry+https://github.com/rust-lang/crates.io-index"
143 + checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c"
144 + dependencies = [
145 + "serde",
146 + ]
147 +
148 + [[package]]
149 + name = "toml_edit"
150 + version = "0.22.27"
151 + source = "registry+https://github.com/rust-lang/crates.io-index"
152 + checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a"
153 + dependencies = [
154 + "indexmap",
155 + "serde",
156 + "serde_spanned",
157 + "toml_datetime",
158 + "toml_write",
159 + "winnow",
160 + ]
161 +
162 + [[package]]
163 + name = "toml_write"
164 + version = "0.1.2"
165 + source = "registry+https://github.com/rust-lang/crates.io-index"
166 + checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801"
167 +
168 + [[package]]
169 + name = "unicode-ident"
170 + version = "1.0.24"
171 + source = "registry+https://github.com/rust-lang/crates.io-index"
172 + checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
173 +
174 + [[package]]
175 + name = "winnow"
176 + version = "0.7.15"
177 + source = "registry+https://github.com/rust-lang/crates.io-index"
178 + checksum = "df79d97927682d2fd8adb29682d1140b343be4ac0f08fd68b7765d9c059d3945"
179 + dependencies = [
180 + "memchr",
181 + ]
182 +
183 + [[package]]
184 + name = "zmij"
185 + version = "1.0.21"
186 + source = "registry+https://github.com/rust-lang/crates.io-index"
187 + checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa"
@@ -0,0 +1,9 @@
1 + [package]
2 + name = "pom-contract"
3 + version = "0.1.0"
4 + edition = "2024"
5 + description = "Test-only helper for asserting that a service's /api/health response still matches PoM's json_fields expectations from pom-hetzner.toml."
6 +
7 + [dependencies]
8 + serde_json = "1"
9 + toml = "0.8"
@@ -0,0 +1,186 @@
1 + //! Schema-drift guard for services monitored by PoM.
2 + //!
3 + //! Background: PoM polls each target's `/api/health` and runs key-by-key
4 + //! assertions from `pom/deploy/pom-hetzner.toml` (`json_fields = { ... }`).
5 + //! If a producer changes the response shape without updating PoM — or vice
6 + //! versa — every snapshot becomes `Degraded` and an incident sits open
7 + //! until someone notices. The May-12 (MNW) and April-22 (MT) incidents
8 + //! each ran for weeks before discovery.
9 + //!
10 + //! This crate provides a single test helper that producer crates wire into
11 + //! their `#[cfg(test)]` blocks. Run alongside the health endpoint's pure
12 + //! body builder, it fails at PR time the moment the response shape stops
13 + //! satisfying PoM's expectations.
14 + //!
15 + //! # Usage
16 + //!
17 + //! ```ignore
18 + //! #[test]
19 + //! fn pom_hetzner_health_expectations_resolve() {
20 + //! let body = health_body(/* db_ok: */ true);
21 + //! pom_contract::assert_health_expectations_resolve(
22 + //! "../pom/deploy/pom-hetzner.toml",
23 + //! "mnw",
24 + //! &body,
25 + //! );
26 + //! }
27 + //! ```
28 + //!
29 + //! Paths are resolved relative to the calling crate's manifest directory
30 + //! at test time (`cargo test`'s CWD). The helper panics with a precise
31 + //! diagnostic listing every missing or mismatched field.
32 +
33 + use std::path::Path;
34 +
35 + /// Assert that every `json_fields` entry under `targets.<target>.health.expect`
36 + /// in the PoM config at `pom_config_path` resolves to its expected value when
37 + /// walked against `body`.
38 + ///
39 + /// Panics with a multi-line diagnostic on drift; returns silently on success.
40 + pub fn assert_health_expectations_resolve(
41 + pom_config_path: impl AsRef<Path>,
42 + target: &str,
43 + body: &serde_json::Value,
44 + ) {
45 + let path = pom_config_path.as_ref();
46 + let raw = std::fs::read_to_string(path)
47 + .unwrap_or_else(|e| panic!("failed to read {}: {e}", path.display()));
48 + let cfg: toml::Value = raw
49 + .parse()
50 + .unwrap_or_else(|e| panic!("failed to parse {}: {e}", path.display()));
51 +
52 + let json_fields = cfg
53 + .get("targets")
54 + .and_then(|t| t.get(target))
55 + .and_then(|t| t.get("health"))
56 + .and_then(|h| h.get("expect"))
57 + .and_then(|e| e.get("json_fields"))
58 + .and_then(|f| f.as_table())
59 + .unwrap_or_else(|| {
60 + panic!(
61 + "{} has no targets.{target}.health.expect.json_fields",
62 + path.display()
63 + )
64 + });
65 +
66 + let mut failures = Vec::new();
67 + for (key, expected) in json_fields {
68 + let expected_str = expected
69 + .as_str()
70 + .map(|s| s.to_string())
71 + .unwrap_or_else(|| expected.to_string());
72 +
73 + match resolve_json_path(body, key) {
74 + None => failures.push(format!(
75 + " json field \"{key}\" missing from response (expected \"{expected_str}\")",
76 + )),
77 + Some(actual) => {
78 + let actual_str = match actual {
79 + serde_json::Value::String(s) => s.clone(),
80 + other => other.to_string(),
81 + };
82 + if actual_str != expected_str {
83 + failures.push(format!(
84 + " json field \"{key}\": PoM expects \"{expected_str}\", response yields \"{actual_str}\"",
85 + ));
86 + }
87 + }
88 + }
89 + }
90 +
91 + if !failures.is_empty() {
92 + panic!(
93 + "PoM schema-drift detected for target \"{target}\" — {} expectation(s) no longer resolve:\n{}\n\nFix: either restore the missing field in the response builder or drop the assertion from `{}`.",
94 + failures.len(),
95 + failures.join("\n"),
96 + path.display(),
97 + );
98 + }
99 + }
100 +
101 + /// Walk a dot-separated JSON path. Mirrors PoM's `resolve_json_path` exactly
102 + /// so this helper's path semantics match what runs against prod.
103 + fn resolve_json_path<'a>(
104 + value: &'a serde_json::Value,
105 + path: &str,
106 + ) -> Option<&'a serde_json::Value> {
107 + let mut current = value;
108 + for key in path.split('.') {
109 + current = current.get(key)?;
110 + }
111 + Some(current)
112 + }
113 +
114 + #[cfg(test)]
115 + mod tests {
116 + use super::*;
117 + use serde_json::json;
118 +
119 + fn write_config(dir: &std::path::Path, fields: &str) -> std::path::PathBuf {
120 + let path = dir.join("pom.toml");
121 + let content = format!(
122 + "[targets.demo.health.expect]\nstatus_code = 200\njson_fields = {{ {fields} }}\n"
123 + );
124 + std::fs::write(&path, content).unwrap();
125 + path
126 + }
127 +
128 + fn tempdir() -> std::path::PathBuf {
129 + let p = std::env::temp_dir().join(format!(
130 + "pom-contract-test-{}-{}",
131 + std::process::id(),
132 + std::time::SystemTime::now()
133 + .duration_since(std::time::UNIX_EPOCH)
134 + .unwrap()
135 + .as_nanos()
136 + ));
137 + std::fs::create_dir_all(&p).unwrap();
138 + p
139 + }
140 +
141 + #[test]
142 + fn passes_when_all_fields_resolve() {
143 + let dir = tempdir();
144 + let cfg = write_config(&dir, r#""status" = "operational", "database" = "true""#);
145 + let body = json!({ "status": "operational", "database": true });
146 + assert_health_expectations_resolve(&cfg, "demo", &body);
147 + }
148 +
149 + #[test]
150 + fn passes_with_nested_path() {
151 + let dir = tempdir();
152 + let cfg = write_config(&dir, r#""status" = "operational", "checks.database" = "true""#);
153 + let body = json!({ "status": "operational", "checks": { "database": true } });
154 + assert_health_expectations_resolve(&cfg, "demo", &body);
155 + }
156 +
157 + #[test]
158 + #[should_panic(expected = "json field \"checks.git_storage\" missing")]
159 + fn fails_when_field_missing() {
160 + let dir = tempdir();
161 + let cfg = write_config(
162 + &dir,
163 + r#""status" = "operational", "checks.git_storage" = "true""#,
164 + );
165 + let body = json!({ "status": "operational", "checks": { "database": true } });
166 + assert_health_expectations_resolve(&cfg, "demo", &body);
167 + }
168 +
169 + #[test]
170 + #[should_panic(expected = "PoM expects \"true\", response yields \"false\"")]
171 + fn fails_when_value_mismatches() {
172 + let dir = tempdir();
173 + let cfg = write_config(&dir, r#""status" = "operational", "database" = "true""#);
174 + let body = json!({ "status": "operational", "database": false });
175 + assert_health_expectations_resolve(&cfg, "demo", &body);
176 + }
177 +
178 + #[test]
179 + #[should_panic(expected = "no targets.unknown.health.expect.json_fields")]
180 + fn fails_when_target_absent() {
181 + let dir = tempdir();
182 + let cfg = write_config(&dir, r#""status" = "operational""#);
183 + let body = json!({ "status": "operational" });
184 + assert_health_expectations_resolve(&cfg, "unknown", &body);
185 + }
186 + }