max / pom
25 files changed,
+6037 insertions,
-466 deletions
| @@ -2,3 +2,10 @@ | |||
| 2 | 2 | *.db | |
| 3 | 3 | *.db-wal | |
| 4 | 4 | *.db-shm | |
| 5 | + | ||
| 6 | + | # OS | |
| 7 | + | .DS_Store | |
| 8 | + | ||
| 9 | + | # IDE | |
| 10 | + | .idea/ | |
| 11 | + | .vscode/ |
| @@ -77,6 +77,51 @@ dependencies = [ | |||
| 77 | 77 | ] | |
| 78 | 78 | ||
| 79 | 79 | [[package]] | |
| 80 | + | name = "anyhow" | |
| 81 | + | version = "1.0.102" | |
| 82 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 83 | + | checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" | |
| 84 | + | ||
| 85 | + | [[package]] | |
| 86 | + | name = "asn1-rs" | |
| 87 | + | version = "0.6.2" | |
| 88 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 89 | + | checksum = "5493c3bedbacf7fd7382c6346bbd66687d12bbaad3a89a2d2c303ee6cf20b048" | |
| 90 | + | dependencies = [ | |
| 91 | + | "asn1-rs-derive", | |
| 92 | + | "asn1-rs-impl", | |
| 93 | + | "displaydoc", | |
| 94 | + | "nom", | |
| 95 | + | "num-traits", | |
| 96 | + | "rusticata-macros", | |
| 97 | + | "thiserror 1.0.69", | |
| 98 | + | "time", | |
| 99 | + | ] | |
| 100 | + | ||
| 101 | + | [[package]] | |
| 102 | + | name = "asn1-rs-derive" | |
| 103 | + | version = "0.5.1" | |
| 104 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 105 | + | checksum = "965c2d33e53cb6b267e148a4cb0760bc01f4904c1cd4bb4002a085bb016d1490" | |
| 106 | + | dependencies = [ | |
| 107 | + | "proc-macro2", | |
| 108 | + | "quote", | |
| 109 | + | "syn", | |
| 110 | + | "synstructure", | |
| 111 | + | ] | |
| 112 | + | ||
| 113 | + | [[package]] | |
| 114 | + | name = "asn1-rs-impl" | |
| 115 | + | version = "0.2.0" | |
| 116 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 117 | + | checksum = "7b18050c2cd6fe86c3a76584ef5e0baf286d038cda203eb6223df2cc413565f7" | |
| 118 | + | dependencies = [ | |
| 119 | + | "proc-macro2", | |
| 120 | + | "quote", | |
| 121 | + | "syn", | |
| 122 | + | ] | |
| 123 | + | ||
| 124 | + | [[package]] | |
| 80 | 125 | name = "atoi" | |
| 81 | 126 | version = "2.0.0" | |
| 82 | 127 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| @@ -98,6 +143,78 @@ source = "registry+https://github.com/rust-lang/crates.io-index" | |||
| 98 | 143 | checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" | |
| 99 | 144 | ||
| 100 | 145 | [[package]] | |
| 146 | + | name = "aws-lc-rs" | |
| 147 | + | version = "1.16.1" | |
| 148 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 149 | + | checksum = "94bffc006df10ac2a68c83692d734a465f8ee6c5b384d8545a636f81d858f4bf" | |
| 150 | + | dependencies = [ | |
| 151 | + | "aws-lc-sys", | |
| 152 | + | "zeroize", | |
| 153 | + | ] | |
| 154 | + | ||
| 155 | + | [[package]] | |
| 156 | + | name = "aws-lc-sys" | |
| 157 | + | version = "0.38.0" | |
| 158 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 159 | + | checksum = "4321e568ed89bb5a7d291a7f37997c2c0df89809d7b6d12062c81ddb54aa782e" | |
| 160 | + | dependencies = [ | |
| 161 | + | "cc", | |
| 162 | + | "cmake", | |
| 163 | + | "dunce", | |
| 164 | + | "fs_extra", | |
| 165 | + | ] | |
| 166 | + | ||
| 167 | + | [[package]] | |
| 168 | + | name = "axum" | |
| 169 | + | version = "0.8.8" | |
| 170 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 171 | + | checksum = "8b52af3cb4058c895d37317bb27508dccc8e5f2d39454016b297bf4a400597b8" | |
| 172 | + | dependencies = [ | |
| 173 | + | "axum-core", | |
| 174 | + | "bytes", | |
| 175 | + | "form_urlencoded", | |
| 176 | + | "futures-util", | |
| 177 | + | "http", | |
| 178 | + | "http-body", | |
| 179 | + | "http-body-util", | |
| 180 | + | "hyper", | |
| 181 | + | "hyper-util", | |
| 182 | + | "itoa", | |
| 183 | + | "matchit", | |
| 184 | + | "memchr", | |
| 185 | + | "mime", | |
| 186 | + | "percent-encoding", | |
| 187 | + | "pin-project-lite", | |
| 188 | + | "serde_core", | |
| 189 | + | "serde_json", | |
| 190 | + | "serde_path_to_error", | |
| 191 | + | "serde_urlencoded", | |
| 192 | + | "sync_wrapper", | |
| 193 | + | "tokio", | |
| 194 | + | "tower", | |
| 195 | + | "tower-layer", | |
| 196 | + | "tower-service", | |
| 197 | + | ] | |
| 198 | + | ||
| 199 | + | [[package]] | |
| 200 | + | name = "axum-core" | |
| 201 | + | version = "0.5.6" | |
| 202 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 203 | + | checksum = "08c78f31d7b1291f7ee735c1c6780ccde7785daae9a9206026862dab7d8792d1" | |
| 204 | + | dependencies = [ | |
| 205 | + | "bytes", | |
| 206 | + | "futures-core", | |
| 207 | + | "http", | |
| 208 | + | "http-body", | |
| 209 | + | "http-body-util", | |
| 210 | + | "mime", | |
| 211 | + | "pin-project-lite", | |
| 212 | + | "sync_wrapper", | |
| 213 | + | "tower-layer", | |
| 214 | + | "tower-service", | |
| 215 | + | ] | |
| 216 | + | ||
| 217 | + | [[package]] | |
| 101 | 218 | name = "base64" | |
| 102 | 219 | version = "0.21.7" | |
| 103 | 220 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| @@ -158,6 +275,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" | |||
| 158 | 275 | checksum = "aebf35691d1bfb0ac386a69bac2fde4dd276fb618cf8bf4f5318fe285e821bb2" | |
| 159 | 276 | dependencies = [ | |
| 160 | 277 | "find-msvc-tools", | |
| 278 | + | "jobserver", | |
| 279 | + | "libc", | |
| 161 | 280 | "shlex", | |
| 162 | 281 | ] | |
| 163 | 282 | ||
| @@ -228,6 +347,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index" | |||
| 228 | 347 | checksum = "3a822ea5bc7590f9d40f1ba12c0dc3c2760f3482c6984db1573ad11031420831" | |
| 229 | 348 | ||
| 230 | 349 | [[package]] | |
| 350 | + | name = "cmake" | |
| 351 | + | version = "0.1.57" | |
| 352 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 353 | + | checksum = "75443c44cd6b379beb8c5b45d85d0773baf31cce901fe7bb252f4eff3008ef7d" | |
| 354 | + | dependencies = [ | |
| 355 | + | "cc", | |
| 356 | + | ] | |
| 357 | + | ||
| 358 | + | [[package]] | |
| 231 | 359 | name = "colorchoice" | |
| 232 | 360 | version = "1.0.4" | |
| 233 | 361 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| @@ -304,6 +432,12 @@ dependencies = [ | |||
| 304 | 432 | ] | |
| 305 | 433 | ||
| 306 | 434 | [[package]] | |
| 435 | + | name = "data-encoding" | |
| 436 | + | version = "2.10.0" | |
| 437 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 438 | + | checksum = "d7a1e2f27636f116493b8b860f5546edb47c8d8f8ea73e1d2a20be88e28d1fea" | |
| 439 | + | ||
| 440 | + | [[package]] | |
| 307 | 441 | name = "der" | |
| 308 | 442 | version = "0.7.10" | |
| 309 | 443 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| @@ -315,6 +449,29 @@ dependencies = [ | |||
| 315 | 449 | ] | |
| 316 | 450 | ||
| 317 | 451 | [[package]] | |
| 452 | + | name = "der-parser" | |
| 453 | + | version = "9.0.0" | |
| 454 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 455 | + | checksum = "5cd0a5c643689626bec213c4d8bd4d96acc8ffdb4ad4bb6bc16abf27d5f4b553" | |
| 456 | + | dependencies = [ | |
| 457 | + | "asn1-rs", | |
| 458 | + | "displaydoc", | |
| 459 | + | "nom", | |
| 460 | + | "num-bigint", | |
| 461 | + | "num-traits", | |
| 462 | + | "rusticata-macros", | |
| 463 | + | ] | |
| 464 | + | ||
| 465 | + | [[package]] | |
| 466 | + | name = "deranged" | |
| 467 | + | version = "0.5.8" | |
| 468 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 469 | + | checksum = "7cd812cc2bc1d69d4764bd80df88b4317eaef9e773c75226407d9bc0876b211c" | |
| 470 | + | dependencies = [ | |
| 471 | + | "powerfmt", | |
| 472 | + | ] | |
| 473 | + | ||
| 474 | + | [[package]] | |
| 318 | 475 | name = "digest" | |
| 319 | 476 | version = "0.10.7" | |
| 320 | 477 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| @@ -365,6 +522,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" | |||
| 365 | 522 | checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b" | |
| 366 | 523 | ||
| 367 | 524 | [[package]] | |
| 525 | + | name = "dunce" | |
| 526 | + | version = "1.0.5" | |
| 527 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 528 | + | checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" | |
| 529 | + | ||
| 530 | + | [[package]] | |
| 368 | 531 | name = "dyn-clone" | |
| 369 | 532 | version = "1.0.20" | |
| 370 | 533 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| @@ -450,6 +613,12 @@ dependencies = [ | |||
| 450 | 613 | ] | |
| 451 | 614 | ||
| 452 | 615 | [[package]] | |
| 616 | + | name = "fs_extra" | |
| 617 | + | version = "1.3.0" | |
| 618 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 619 | + | checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" | |
| 620 | + | ||
| 621 | + | [[package]] | |
| 453 | 622 | name = "futures" | |
| 454 | 623 | version = "0.3.32" | |
| 455 | 624 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| @@ -580,12 +749,25 @@ dependencies = [ | |||
| 580 | 749 | "cfg-if", | |
| 581 | 750 | "js-sys", | |
| 582 | 751 | "libc", | |
| 583 | - | "r-efi", | |
| 752 | + | "r-efi 5.3.0", | |
| 584 | 753 | "wasip2", | |
| 585 | 754 | "wasm-bindgen", | |
| 586 | 755 | ] | |
| 587 | 756 | ||
| 588 | 757 | [[package]] | |
| 758 | + | name = "getrandom" | |
| 759 | + | version = "0.4.2" | |
| 760 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 761 | + | checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555" | |
| 762 | + | dependencies = [ | |
| 763 | + | "cfg-if", | |
| 764 | + | "libc", | |
| 765 | + | "r-efi 6.0.0", | |
| 766 | + | "wasip2", | |
| 767 | + | "wasip3", | |
| 768 | + | ] | |
| 769 | + | ||
| 770 | + | [[package]] | |
| 589 | 771 | name = "hashbrown" | |
| 590 | 772 | version = "0.15.5" | |
| 591 | 773 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| @@ -651,6 +833,17 @@ dependencies = [ | |||
| 651 | 833 | ] | |
| 652 | 834 | ||
| 653 | 835 | [[package]] | |
| 836 | + | name = "hostname" | |
| 837 | + | version = "0.4.2" | |
| 838 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 839 | + | checksum = "617aaa3557aef3810a6369d0a99fac8a080891b68bd9f9812a1eeda0c0730cbd" | |
| 840 | + | dependencies = [ | |
| 841 | + | "cfg-if", | |
| 842 | + | "libc", | |
| 843 | + | "windows-link", | |
| 844 | + | ] | |
| 845 | + | ||
| 846 | + | [[package]] | |
| 654 | 847 | name = "http" | |
| 655 | 848 | version = "1.4.0" | |
| 656 | 849 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| @@ -690,6 +883,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" | |||
| 690 | 883 | checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" | |
| 691 | 884 | ||
| 692 | 885 | [[package]] | |
| 886 | + | name = "httpdate" | |
| 887 | + | version = "1.0.3" | |
| 888 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 889 | + | checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" | |
| 890 | + | ||
| 891 | + | [[package]] | |
| 693 | 892 | name = "hyper" | |
| 694 | 893 | version = "1.8.1" | |
| 695 | 894 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| @@ -702,6 +901,7 @@ dependencies = [ | |||
| 702 | 901 | "http", | |
| 703 | 902 | "http-body", | |
| 704 | 903 | "httparse", | |
| 904 | + | "httpdate", | |
| 705 | 905 | "itoa", | |
| 706 | 906 | "pin-project-lite", | |
| 707 | 907 | "pin-utils", | |
| @@ -856,6 +1056,12 @@ dependencies = [ | |||
| 856 | 1056 | ] | |
| 857 | 1057 | ||
| 858 | 1058 | [[package]] | |
| 1059 | + | name = "id-arena" | |
| 1060 | + | version = "2.3.0" | |
| 1061 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 1062 | + | checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" | |
| 1063 | + | ||
| 1064 | + | [[package]] | |
| 859 | 1065 | name = "idna" | |
| 860 | 1066 | version = "1.1.0" | |
| 861 | 1067 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| @@ -884,6 +1090,8 @@ checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" | |||
| 884 | 1090 | dependencies = [ | |
| 885 | 1091 | "equivalent", | |
| 886 | 1092 | "hashbrown 0.16.1", | |
| 1093 | + | "serde", | |
| 1094 | + | "serde_core", | |
| 887 | 1095 | ] | |
| 888 | 1096 | ||
| 889 | 1097 | [[package]] | |
| @@ -915,6 +1123,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" | |||
| 915 | 1123 | checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" | |
| 916 | 1124 | ||
| 917 | 1125 | [[package]] | |
| 1126 | + | name = "jobserver" | |
| 1127 | + | version = "0.1.34" | |
| 1128 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 1129 | + | checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" | |
| 1130 | + | dependencies = [ | |
| 1131 | + | "getrandom 0.3.4", | |
| 1132 | + | "libc", | |
| 1133 | + | ] | |
| 1134 | + | ||
| 1135 | + | [[package]] | |
| 918 | 1136 | name = "js-sys" | |
| 919 | 1137 | version = "0.3.91" | |
| 920 | 1138 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| @@ -934,6 +1152,12 @@ dependencies = [ | |||
| 934 | 1152 | ] | |
| 935 | 1153 | ||
| 936 | 1154 | [[package]] | |
| 1155 | + | name = "leb128fmt" | |
| 1156 | + | version = "0.1.0" | |
| 1157 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 1158 | + | checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" | |
| 1159 | + | ||
| 1160 | + | [[package]] | |
| 937 | 1161 | name = "libc" | |
| 938 | 1162 | version = "0.2.183" | |
| 939 | 1163 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| @@ -1005,6 +1229,12 @@ dependencies = [ | |||
| 1005 | 1229 | ] | |
| 1006 | 1230 | ||
| 1007 | 1231 | [[package]] | |
| 1232 | + | name = "matchit" | |
| 1233 | + | version = "0.8.4" | |
| 1234 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 1235 | + | checksum = "47e1ffaa40ddd1f3ed91f717a33c8c0ee23fff369e3aa8772b9605cc1d22f4c3" | |
| 1236 | + | ||
| 1237 | + | [[package]] | |
| 1008 | 1238 | name = "md-5" | |
| 1009 | 1239 | version = "0.10.6" | |
| 1010 | 1240 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| @@ -1021,6 +1251,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index" | |||
| 1021 | 1251 | checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" | |
| 1022 | 1252 | ||
| 1023 | 1253 | [[package]] | |
| 1254 | + | name = "mime" | |
| 1255 | + | version = "0.3.17" | |
| 1256 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 1257 | + | checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" | |
| 1258 | + | ||
| 1259 | + | [[package]] | |
| 1260 | + | name = "minimal-lexical" | |
| 1261 | + | version = "0.2.1" | |
| 1262 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 1263 | + | checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" | |
| 1264 | + | ||
| 1265 | + | [[package]] | |
| 1024 | 1266 | name = "mio" | |
| 1025 | 1267 | version = "1.1.1" | |
| 1026 | 1268 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| @@ -1032,6 +1274,16 @@ dependencies = [ | |||
| 1032 | 1274 | ] | |
| 1033 | 1275 | ||
| 1034 | 1276 | [[package]] | |
| 1277 | + | name = "nom" | |
| 1278 | + | version = "7.1.3" | |
| 1279 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 1280 | + | checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" | |
| 1281 | + | dependencies = [ | |
| 1282 | + | "memchr", | |
| 1283 | + | "minimal-lexical", | |
| 1284 | + | ] | |
| 1285 | + | ||
| 1286 | + | [[package]] | |
| 1035 | 1287 | name = "nu-ansi-term" | |
| 1036 | 1288 | version = "0.50.3" | |
| 1037 | 1289 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| @@ -1041,6 +1293,16 @@ dependencies = [ | |||
| 1041 | 1293 | ] | |
| 1042 | 1294 | ||
| 1043 | 1295 | [[package]] | |
| 1296 | + | name = "num-bigint" | |
| 1297 | + | version = "0.4.6" | |
| 1298 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 1299 | + | checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" | |
| 1300 | + | dependencies = [ | |
| 1301 | + | "num-integer", | |
| 1302 | + | "num-traits", | |
| 1303 | + | ] | |
| 1304 | + | ||
| 1305 | + | [[package]] | |
| 1044 | 1306 | name = "num-bigint-dig" | |
| 1045 | 1307 | version = "0.8.6" | |
| 1046 | 1308 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| @@ -1057,6 +1319,12 @@ dependencies = [ | |||
| 1057 | 1319 | ] | |
| 1058 | 1320 | ||
| 1059 | 1321 | [[package]] | |
| 1322 | + | name = "num-conv" | |
| 1323 | + | version = "0.2.0" | |
| 1324 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 1325 | + | checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050" | |
| 1326 | + | ||
| 1327 | + | [[package]] | |
| 1060 | 1328 | name = "num-integer" | |
| 1061 | 1329 | version = "0.1.46" | |
| 1062 | 1330 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| @@ -1087,6 +1355,15 @@ dependencies = [ | |||
| 1087 | 1355 | ] | |
| 1088 | 1356 | ||
| 1089 | 1357 | [[package]] | |
| 1358 | + | name = "oid-registry" | |
| 1359 | + | version = "0.7.1" | |
| 1360 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 1361 | + | checksum = "a8d8034d9489cdaf79228eb9f6a3b8d7bb32ba00d6645ebd48eef4077ceb5bd9" | |
| 1362 | + | dependencies = [ | |
| 1363 | + | "asn1-rs", | |
| 1364 | + | ] | |
| 1365 | + | ||
| 1366 | + | [[package]] | |
| 1090 | 1367 | name = "once_cell" | |
| 1091 | 1368 | version = "1.21.3" | |
| 1092 | 1369 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| @@ -1201,21 +1478,31 @@ checksum = "b4596b6d070b27117e987119b4dac604f3c58cfb0b191112e24771b2faeac1a6" | |||
| 1201 | 1478 | ||
| 1202 | 1479 | [[package]] | |
| 1203 | 1480 | name = "pom" | |
| 1204 | - | version = "0.1.0" | |
| 1481 | + | version = "0.2.1" | |
| 1205 | 1482 | dependencies = [ | |
| 1483 | + | "axum", | |
| 1206 | 1484 | "chrono", | |
| 1207 | 1485 | "clap", | |
| 1208 | 1486 | "dirs", | |
| 1487 | + | "hostname", | |
| 1488 | + | "http-body-util", | |
| 1209 | 1489 | "reqwest", | |
| 1210 | 1490 | "rmcp", | |
| 1491 | + | "rustls-pki-types", | |
| 1211 | 1492 | "schemars", | |
| 1212 | 1493 | "serde", | |
| 1213 | 1494 | "serde_json", | |
| 1214 | 1495 | "sqlx", | |
| 1496 | + | "thiserror 2.0.18", | |
| 1215 | 1497 | "tokio", | |
| 1498 | + | "tokio-rustls", | |
| 1216 | 1499 | "toml", | |
| 1500 | + | "tower", | |
| 1217 | 1501 | "tracing", | |
| 1218 | 1502 | "tracing-subscriber", | |
| 1503 | + | "uuid", | |
| 1504 | + | "webpki-roots", | |
| 1505 | + | "x509-parser", | |
| 1219 | 1506 | ] | |
| 1220 | 1507 | ||
| 1221 | 1508 | [[package]] | |
| @@ -1228,6 +1515,12 @@ dependencies = [ | |||
| 1228 | 1515 | ] | |
| 1229 | 1516 | ||
| 1230 | 1517 | [[package]] | |
| 1518 | + | name = "powerfmt" | |
| 1519 | + | version = "0.2.0" | |
| 1520 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 1521 | + | checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" | |
| 1522 | + | ||
| 1523 | + | [[package]] | |
| 1231 | 1524 | name = "ppv-lite86" | |
| 1232 | 1525 | version = "0.2.21" | |
| 1233 | 1526 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| @@ -1237,6 +1530,16 @@ dependencies = [ | |||
| 1237 | 1530 | ] | |
| 1238 | 1531 | ||
| 1239 | 1532 | [[package]] | |
| 1533 | + | name = "prettyplease" | |
| 1534 | + | version = "0.2.37" | |
| 1535 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 1536 | + | checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" | |
| 1537 | + | dependencies = [ | |
| 1538 | + | "proc-macro2", | |
| 1539 | + | "syn", | |
| 1540 | + | ] | |
| 1541 | + | ||
| 1542 | + | [[package]] | |
| 1240 | 1543 | name = "proc-macro2" | |
| 1241 | 1544 | version = "1.0.106" | |
| 1242 | 1545 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| @@ -1259,7 +1562,7 @@ dependencies = [ | |||
| 1259 | 1562 | "rustc-hash", | |
| 1260 | 1563 | "rustls", | |
| 1261 | 1564 | "socket2", | |
| 1262 | - | "thiserror", | |
| 1565 | + | "thiserror 2.0.18", | |
| 1263 | 1566 | "tokio", | |
| 1264 | 1567 | "tracing", | |
| 1265 | 1568 | "web-time", | |
| @@ -1280,7 +1583,7 @@ dependencies = [ | |||
| 1280 | 1583 | "rustls", | |
| 1281 | 1584 | "rustls-pki-types", | |
| 1282 | 1585 | "slab", | |
| 1283 | - | "thiserror", | |
| 1586 | + | "thiserror 2.0.18", | |
| 1284 | 1587 | "tinyvec", | |
| 1285 | 1588 | "tracing", | |
| 1286 | 1589 | "web-time", | |
| @@ -1316,6 +1619,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" | |||
| 1316 | 1619 | checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" | |
| 1317 | 1620 | ||
| 1318 | 1621 | [[package]] | |
| 1622 | + | name = "r-efi" | |
| 1623 | + | version = "6.0.0" | |
| 1624 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 1625 | + | checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" | |
| 1626 | + |
Lines truncated
| @@ -1,6 +1,6 @@ | |||
| 1 | 1 | [package] | |
| 2 | 2 | name = "pom" | |
| 3 | - | version = "0.1.0" | |
| 3 | + | version = "0.2.2" | |
| 4 | 4 | edition = "2024" | |
| 5 | 5 | ||
| 6 | 6 | [lib] | |
| @@ -24,6 +24,9 @@ tokio = { version = "1", features = ["rt-multi-thread", "macros", "io-std", "io- | |||
| 24 | 24 | # HTTP client | |
| 25 | 25 | reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] } | |
| 26 | 26 | ||
| 27 | + | # HTTP server (API in serve mode) | |
| 28 | + | axum = { version = "0.8", default-features = false, features = ["json", "tokio", "http1", "query"] } | |
| 29 | + | ||
| 27 | 30 | # Database | |
| 28 | 31 | sqlx = { version = "0.8", features = ["runtime-tokio", "sqlite"] } | |
| 29 | 32 | ||
| @@ -35,12 +38,29 @@ schemars = "0.8" | |||
| 35 | 38 | # Config | |
| 36 | 39 | toml = "0.8" | |
| 37 | 40 | ||
| 41 | + | # Errors | |
| 42 | + | thiserror = "2" | |
| 43 | + | ||
| 38 | 44 | # Time | |
| 39 | 45 | chrono = { version = "0.4", features = ["serde"] } | |
| 40 | 46 | ||
| 41 | 47 | # Paths | |
| 42 | 48 | dirs = "6" | |
| 43 | 49 | ||
| 50 | + | # Identity | |
| 51 | + | uuid = { version = "1", features = ["v4"] } | |
| 52 | + | hostname = "0.4" | |
| 53 | + | ||
| 54 | + | # TLS certificate checking | |
| 55 | + | x509-parser = "0.16" | |
| 56 | + | tokio-rustls = "0.26" | |
| 57 | + | rustls-pki-types = "1" | |
| 58 | + | webpki-roots = "1" | |
| 59 | + | ||
| 44 | 60 | # Logging | |
| 45 | 61 | tracing = "0.1" | |
| 46 | 62 | tracing-subscriber = { version = "0.3", features = ["env-filter"] } | |
| 63 | + | ||
| 64 | + | [dev-dependencies] | |
| 65 | + | tower = { version = "0.5", features = ["util"] } | |
| 66 | + | http-body-util = "0.1" |
| @@ -2,19 +2,20 @@ | |||
| 2 | 2 | set -euo pipefail | |
| 3 | 3 | ||
| 4 | 4 | ASTRA_HOST="max@100.106.221.39" | |
| 5 | - | HETZNER_HOST="root@5.78.144.244" | |
| 5 | + | HETZNER_HOST="root@100.120.174.96" | |
| 6 | 6 | ||
| 7 | 7 | SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" | |
| 8 | 8 | PROJECT_DIR="$(dirname "$SCRIPT_DIR")" | |
| 9 | 9 | ||
| 10 | 10 | deploy_target() { | |
| 11 | 11 | local name="$1" | |
| 12 | - | local host target | |
| 12 | + | local host target sudo_prefix="" | |
| 13 | 13 | ||
| 14 | 14 | case "$name" in | |
| 15 | 15 | astra) | |
| 16 | 16 | host="$ASTRA_HOST" | |
| 17 | 17 | target="aarch64-unknown-linux-gnu" | |
| 18 | + | sudo_prefix="sudo" | |
| 18 | 19 | ;; | |
| 19 | 20 | hetzner) | |
| 20 | 21 | host="$HETZNER_HOST" | |
| @@ -31,16 +32,23 @@ deploy_target() { | |||
| 31 | 32 | ||
| 32 | 33 | local binary="$PROJECT_DIR/target/$target/release/pom" | |
| 33 | 34 | ||
| 35 | + | local config_file="$SCRIPT_DIR/pom-${name}.toml" | |
| 36 | + | if [ ! -f "$config_file" ]; then | |
| 37 | + | echo "Config not found: $config_file" | |
| 38 | + | exit 1 | |
| 39 | + | fi | |
| 40 | + | ||
| 34 | 41 | echo "=== Deploying to $name ($host) ===" | |
| 35 | - | ssh "$host" "mkdir -p /etc/pom" | |
| 36 | - | scp "$binary" "$host:/usr/local/bin/pom" | |
| 37 | - | scp "$PROJECT_DIR/pom.toml" "$host:/etc/pom/pom.toml" | |
| 38 | - | scp "$SCRIPT_DIR/pom.service" "$host:/etc/systemd/system/pom.service" | |
| 42 | + | ssh "$host" "$sudo_prefix mkdir -p /etc/pom" | |
| 43 | + | scp "$binary" "$host:/tmp/pom" | |
| 44 | + | scp "$config_file" "$host:/tmp/pom.toml" | |
| 45 | + | scp "$SCRIPT_DIR/pom.service" "$host:/tmp/pom.service" | |
| 39 | 46 | ||
| 40 | - | ssh "$host" "systemctl daemon-reload && systemctl enable pom && systemctl restart pom" | |
| 47 | + | ssh "$host" "$sudo_prefix mv /tmp/pom /usr/local/bin/pom && $sudo_prefix chmod +x /usr/local/bin/pom && $sudo_prefix mv /tmp/pom.toml /etc/pom/pom.toml && $sudo_prefix mv /tmp/pom.service /etc/systemd/system/pom.service" | |
| 48 | + | ssh "$host" "$sudo_prefix systemctl daemon-reload && $sudo_prefix systemctl enable pom && $sudo_prefix systemctl restart pom" | |
| 41 | 49 | ||
| 42 | 50 | echo "=== $name: deployed ===" | |
| 43 | - | ssh "$host" "systemctl status pom --no-pager" | |
| 51 | + | ssh "$host" "$sudo_prefix systemctl status pom --no-pager" | |
| 44 | 52 | } | |
| 45 | 53 | ||
| 46 | 54 | if [ $# -eq 0 ]; then |
| @@ -0,0 +1,44 @@ | |||
| 1 | + | [serve] | |
| 2 | + | interval_secs = 300 | |
| 3 | + | prune_days = 30 | |
| 4 | + | listen = "0.0.0.0:9100" | |
| 5 | + | peer_heartbeat_secs = 60 | |
| 6 | + | ||
| 7 | + | [instance] | |
| 8 | + | name = "astra" | |
| 9 | + | ||
| 10 | + | [targets.mnw] | |
| 11 | + | label = "Makenotwork Production" | |
| 12 | + | ||
| 13 | + | [targets.mnw.health] | |
| 14 | + | url = "https://makenot.work/api/health" | |
| 15 | + | timeout_secs = 10 | |
| 16 | + | ||
| 17 | + | [targets.mnw.health.expect] | |
| 18 | + | status_code = 200 | |
| 19 | + | json_fields = { "status" = "operational" } | |
| 20 | + | ||
| 21 | + | [targets.mnw.health.trending] | |
| 22 | + | baseline_window_hours = 168 | |
| 23 | + | spike_threshold = 2.0 | |
| 24 | + | ||
| 25 | + | [targets.mnw.tls] | |
| 26 | + | host = "makenot.work" | |
| 27 | + | ||
| 28 | + | [targets.mnw.tests] | |
| 29 | + | ssh = "max@100.106.221.39" | |
| 30 | + | command = "/home/max/staging/run-ci.sh" | |
| 31 | + | timeout_secs = 600 | |
| 32 | + | staleness_days = 7 | |
| 33 | + | ||
| 34 | + | [peers.hetzner] | |
| 35 | + | address = "100.120.174.96:9100" | |
| 36 | + | on_missing = "alert" | |
| 37 | + | ||
| 38 | + | [peers.macbook] | |
| 39 | + | address = "100.100.246.136:9100" | |
| 40 | + | on_missing = "log" | |
| 41 | + | ||
| 42 | + | [alerts] | |
| 43 | + | # postmark_token = "" # set in production — omit for dev mode (log only) | |
| 44 | + | to = "pom-alerts@makenot.work" |
| @@ -0,0 +1,44 @@ | |||
| 1 | + | [serve] | |
| 2 | + | interval_secs = 300 | |
| 3 | + | prune_days = 30 | |
| 4 | + | listen = "0.0.0.0:9100" | |
| 5 | + | peer_heartbeat_secs = 60 | |
| 6 | + | ||
| 7 | + | [instance] | |
| 8 | + | name = "hetzner" | |
| 9 | + | ||
| 10 | + | [targets.mnw] | |
| 11 | + | label = "Makenotwork Production" | |
| 12 | + | ||
| 13 | + | [targets.mnw.health] | |
| 14 | + | url = "https://makenot.work/api/health" | |
| 15 | + | timeout_secs = 10 | |
| 16 | + | ||
| 17 | + | [targets.mnw.health.expect] | |
| 18 | + | status_code = 200 | |
| 19 | + | json_fields = { "status" = "operational" } | |
| 20 | + | ||
| 21 | + | [targets.mnw.health.trending] | |
| 22 | + | baseline_window_hours = 168 | |
| 23 | + | spike_threshold = 2.0 | |
| 24 | + | ||
| 25 | + | [targets.mnw.tls] | |
| 26 | + | host = "makenot.work" | |
| 27 | + | ||
| 28 | + | [targets.mnw.tests] | |
| 29 | + | ssh = "max@100.106.221.39" | |
| 30 | + | command = "/home/max/staging/run-ci.sh" | |
| 31 | + | timeout_secs = 600 | |
| 32 | + | staleness_days = 7 | |
| 33 | + | ||
| 34 | + | [peers.astra] | |
| 35 | + | address = "100.106.221.39:9100" | |
| 36 | + | on_missing = "alert" | |
| 37 | + | ||
| 38 | + | [peers.macbook] | |
| 39 | + | address = "100.100.246.136:9100" | |
| 40 | + | on_missing = "log" | |
| 41 | + | ||
| 42 | + | [alerts] | |
| 43 | + | # postmark_token = "" # set in production — omit for dev mode (log only) | |
| 44 | + | to = "pom-alerts@makenot.work" |
| @@ -1,6 +1,7 @@ | |||
| 1 | 1 | [serve] | |
| 2 | 2 | interval_secs = 300 | |
| 3 | 3 | prune_days = 30 | |
| 4 | + | listen = "127.0.0.1:9100" | |
| 4 | 5 | ||
| 5 | 6 | [targets.mnw] | |
| 6 | 7 | label = "Makenotwork Production" |
| @@ -0,0 +1,389 @@ | |||
| 1 | + | //! Email alerting via Postmark API. | |
| 2 | + | //! | |
| 3 | + | //! Sends alerts on health status transitions and peer disappearance/recovery. | |
| 4 | + | //! If no `postmark_token` is configured, alerts are logged to stdout instead. | |
| 5 | + | ||
| 6 | + | use sqlx::SqlitePool; | |
| 7 | + | use tracing::{info, warn}; | |
| 8 | + | ||
| 9 | + | use crate::config::AlertConfig; | |
| 10 | + | use crate::db; | |
| 11 | + | ||
| 12 | + | #[derive(Clone)] | |
| 13 | + | pub struct Alerter { | |
| 14 | + | config: AlertConfig, | |
| 15 | + | client: reqwest::Client, | |
| 16 | + | pool: SqlitePool, | |
| 17 | + | instance_name: String, | |
| 18 | + | } | |
| 19 | + | ||
| 20 | + | impl Alerter { | |
| 21 | + | pub fn new(config: AlertConfig, pool: SqlitePool, instance_name: String) -> Self { | |
| 22 | + | let client = reqwest::Client::builder() | |
| 23 | + | .timeout(std::time::Duration::from_secs(10)) | |
| 24 | + | .build() | |
| 25 | + | .unwrap_or_default(); | |
| 26 | + | Self { config, client, pool, instance_name } | |
| 27 | + | } | |
| 28 | + | ||
| 29 | + | pub async fn send_health_alert( | |
| 30 | + | &self, | |
| 31 | + | target: &str, | |
| 32 | + | label: &str, | |
| 33 | + | from_status: &str, | |
| 34 | + | to_status: &str, | |
| 35 | + | error: Option<&str>, | |
| 36 | + | ) { | |
| 37 | + | let alert_key = format!("health:{target}"); | |
| 38 | + | if self.is_within_cooldown(&alert_key).await { | |
| 39 | + | info!("alert cooldown active for {alert_key}, skipping"); | |
| 40 | + | return; | |
| 41 | + | } | |
| 42 | + | ||
| 43 | + | let subject = format!("[PoM] {target}: {from_status} -> {to_status}"); | |
| 44 | + | let mut body = format!( | |
| 45 | + | "Target: {label} ({target})\n\ | |
| 46 | + | Status: {from_status} -> {to_status}\n\ | |
| 47 | + | Instance: {}\n\ | |
| 48 | + | Time: {}\n", | |
| 49 | + | self.instance_name, | |
| 50 | + | chrono::Utc::now().to_rfc3339(), | |
| 51 | + | ); | |
| 52 | + | if let Some(err) = error { | |
| 53 | + | body.push_str(&format!("Error: {err}\n")); | |
| 54 | + | } | |
| 55 | + | body.push_str("\n- PoM"); | |
| 56 | + | ||
| 57 | + | self.send_email(&subject, &body).await; | |
| 58 | + | self.record_alert(target, "health", Some(from_status), Some(to_status), error).await; | |
| 59 | + | } | |
| 60 | + | ||
| 61 | + | pub async fn send_health_recovery( | |
| 62 | + | &self, | |
| 63 | + | target: &str, | |
| 64 | + | label: &str, | |
| 65 | + | from_status: &str, | |
| 66 | + | ) { | |
| 67 | + | let alert_key = format!("health:{target}"); | |
| 68 | + | // No cooldown on recovery — always send | |
| 69 | + | let subject = format!("[PoM] {target}: recovered"); | |
| 70 | + | let body = format!( | |
| 71 | + | "Target: {label} ({target})\n\ | |
| 72 | + | Status: {from_status} -> operational\n\ | |
| 73 | + | Instance: {}\n\ | |
| 74 | + | Time: {}\n\n\ | |
| 75 | + | - PoM", | |
| 76 | + | self.instance_name, | |
| 77 | + | chrono::Utc::now().to_rfc3339(), | |
| 78 | + | ); | |
| 79 | + | ||
| 80 | + | self.send_email(&subject, &body).await; | |
| 81 | + | self.record_alert(&alert_key, "recovery", Some(from_status), Some("operational"), None).await; | |
| 82 | + | } | |
| 83 | + | ||
| 84 | + | pub async fn send_tls_expiry_alert( | |
| 85 | + | &self, | |
| 86 | + | target: &str, | |
| 87 | + | host: &str, | |
| 88 | + | days_remaining: i64, | |
| 89 | + | not_after: &str, | |
| 90 | + | ) { | |
| 91 | + | let alert_key = format!("tls:{target}"); | |
| 92 | + | if self.is_within_cooldown(&alert_key).await { | |
| 93 | + | info!("alert cooldown active for {alert_key}, skipping"); | |
| 94 | + | return; | |
| 95 | + | } | |
| 96 | + | ||
| 97 | + | let subject = format!("[PoM] {target}: TLS cert expires in {days_remaining} days"); | |
| 98 | + | let body = format!( | |
| 99 | + | "Target: {target}\n\ | |
| 100 | + | Host: {host}\n\ | |
| 101 | + | Days remaining: {days_remaining}\n\ | |
| 102 | + | Expires: {not_after}\n\ | |
| 103 | + | Instance: {}\n\ | |
| 104 | + | Time: {}\n\n\ | |
| 105 | + | - PoM", | |
| 106 | + | self.instance_name, | |
| 107 | + | chrono::Utc::now().to_rfc3339(), | |
| 108 | + | ); | |
| 109 | + | ||
| 110 | + | self.send_email(&subject, &body).await; | |
| 111 | + | self.record_alert(&alert_key, "tls_expiry", None, None, None).await; | |
| 112 | + | } | |
| 113 | + | ||
| 114 | + | pub async fn send_tls_error_alert( | |
| 115 | + | &self, | |
| 116 | + | target: &str, | |
| 117 | + | host: &str, | |
| 118 | + | error: &str, | |
| 119 | + | ) { | |
| 120 | + | let alert_key = format!("tls:{target}"); | |
| 121 | + | if self.is_within_cooldown(&alert_key).await { | |
| 122 | + | info!("alert cooldown active for {alert_key}, skipping"); | |
| 123 | + | return; | |
| 124 | + | } | |
| 125 | + | ||
| 126 | + | let subject = format!("[PoM] {target}: TLS check failed"); | |
| 127 | + | let body = format!( | |
| 128 | + | "Target: {target}\n\ | |
| 129 | + | Host: {host}\n\ | |
| 130 | + | Error: {error}\n\ | |
| 131 | + | Instance: {}\n\ | |
| 132 | + | Time: {}\n\n\ | |
| 133 | + | - PoM", | |
| 134 | + | self.instance_name, | |
| 135 | + | chrono::Utc::now().to_rfc3339(), | |
| 136 | + | ); | |
| 137 | + | ||
| 138 | + | self.send_email(&subject, &body).await; | |
| 139 | + | self.record_alert(&alert_key, "tls_error", None, None, Some(error)).await; | |
| 140 | + | } | |
| 141 | + | ||
| 142 | + | pub async fn send_tls_recovery( | |
| 143 | + | &self, | |
| 144 | + | target: &str, | |
| 145 | + | label: &str, | |
| 146 | + | days_remaining: i64, | |
| 147 | + | ) { | |
| 148 | + | let alert_key = format!("tls:{target}"); | |
| 149 | + | // No cooldown on recovery — always send | |
| 150 | + | let subject = format!("[PoM] {target}: TLS cert renewed"); | |
| 151 | + | let body = format!( | |
| 152 | + | "Target: {label} ({target})\n\ | |
| 153 | + | Days remaining: {days_remaining}\n\ | |
| 154 | + | Instance: {}\n\ | |
| 155 | + | Time: {}\n\n\ | |
| 156 | + | - PoM", | |
| 157 | + | self.instance_name, | |
| 158 | + | chrono::Utc::now().to_rfc3339(), | |
| 159 | + | ); | |
| 160 | + | ||
| 161 | + | self.send_email(&subject, &body).await; | |
| 162 | + | self.record_alert(&alert_key, "tls_recovery", None, None, None).await; | |
| 163 | + | } | |
| 164 | + | ||
| 165 | + | pub async fn send_peer_missing( | |
| 166 | + | &self, | |
| 167 | + | peer_name: &str, | |
| 168 | + | address: &str, | |
| 169 | + | consecutive_failures: u32, | |
| 170 | + | ) { | |
| 171 | + | let alert_key = format!("peer:{peer_name}"); | |
| 172 | + | if self.is_within_cooldown(&alert_key).await { | |
| 173 | + | info!("alert cooldown active for {alert_key}, skipping"); | |
| 174 | + | return; | |
| 175 | + | } | |
| 176 | + | ||
| 177 | + | let subject = format!("[PoM] peer {peer_name}: missing"); | |
| 178 | + | let body = format!( | |
| 179 | + | "Peer: {peer_name}\n\ | |
| 180 | + | Address: {address}\n\ | |
| 181 | + | Consecutive failures: {consecutive_failures}\n\ | |
| 182 | + | Instance: {}\n\ | |
| 183 | + | Time: {}\n\n\ | |
| 184 | + | - PoM", | |
| 185 | + | self.instance_name, | |
| 186 | + | chrono::Utc::now().to_rfc3339(), | |
| 187 | + | ); | |
| 188 | + | ||
| 189 | + | self.send_email(&subject, &body).await; | |
| 190 | + | self.record_alert(&alert_key, "peer_missing", None, None, None).await; | |
| 191 | + | } | |
| 192 | + | ||
| 193 | + | pub async fn send_peer_recovery( | |
| 194 | + | &self, | |
| 195 | + | peer_name: &str, | |
| 196 | + | address: &str, | |
| 197 | + | ) { | |
| 198 | + | let subject = format!("[PoM] peer {peer_name}: recovered"); | |
| 199 | + | let body = format!( | |
| 200 | + | "Peer: {peer_name}\n\ | |
| 201 | + | Address: {address}\n\ | |
| 202 | + | Instance: {}\n\ | |
| 203 | + | Time: {}\n\n\ | |
| 204 | + | - PoM", | |
| 205 | + | self.instance_name, | |
| 206 | + | chrono::Utc::now().to_rfc3339(), | |
| 207 | + | ); | |
| 208 | + | ||
| 209 | + | let alert_key = format!("peer:{peer_name}"); | |
| 210 | + | self.send_email(&subject, &body).await; | |
| 211 | + | self.record_alert(&alert_key, "peer_recovery", None, None, None).await; | |
| 212 | + | } | |
| 213 | + | ||
| 214 | + | pub async fn send_latency_drift_alert( | |
| 215 | + | &self, | |
| 216 | + | target: &str, | |
| 217 | + | label: &str, | |
| 218 | + | drift_message: &str, | |
| 219 | + | ) { | |
| 220 | + | let alert_key = format!("latency:{target}"); | |
| 221 | + | if self.is_within_cooldown(&alert_key).await { | |
| 222 | + | info!("alert cooldown active for {alert_key}, skipping"); | |
| 223 | + | return; | |
| 224 | + | } | |
| 225 | + | ||
| 226 | + | let subject = format!("[PoM] {target}: latency drift detected"); | |
| 227 | + | let body = format!( | |
| 228 | + | "Target: {label} ({target})\n\ | |
| 229 | + | {drift_message}\n\ | |
| 230 | + | Instance: {}\n\ | |
| 231 | + | Time: {}\n\n\ | |
| 232 | + | - PoM", | |
| 233 | + | self.instance_name, | |
| 234 | + | chrono::Utc::now().to_rfc3339(), | |
| 235 | + | ); | |
| 236 | + | ||
| 237 | + | self.send_email(&subject, &body).await; | |
| 238 | + | self.record_alert(&alert_key, "latency_drift", None, None, Some(drift_message)).await; | |
| 239 | + | } | |
| 240 | + | ||
| 241 | + | pub async fn send_latency_recovery( | |
| 242 | + | &self, | |
| 243 | + | target: &str, | |
| 244 | + | label: &str, | |
| 245 | + | ) { | |
| 246 | + | // No cooldown on recovery — always send | |
| 247 | + | let alert_key = format!("latency:{target}"); | |
| 248 | + | let subject = format!("[PoM] {target}: latency recovered"); | |
| 249 | + | let body = format!( | |
| 250 | + | "Target: {label} ({target})\n\ | |
| 251 | + | Latency returned to normal.\n\ | |
| 252 | + | Instance: {}\n\ | |
| 253 | + | Time: {}\n\n\ | |
| 254 | + | - PoM", | |
| 255 | + | self.instance_name, | |
| 256 | + | chrono::Utc::now().to_rfc3339(), | |
| 257 | + | ); | |
| 258 | + | ||
| 259 | + | self.send_email(&subject, &body).await; | |
| 260 | + | self.record_alert(&alert_key, "latency_recovery", None, None, None).await; | |
| 261 | + | } | |
| 262 | + | ||
| 263 | + | async fn is_within_cooldown(&self, target: &str) -> bool { | |
| 264 | + | let latest = match db::get_latest_alert_for_target(&self.pool, target).await { | |
| 265 | + | Ok(Some(row)) => row, | |
| 266 | + | _ => return false, | |
| 267 | + | }; | |
| 268 | + | ||
| 269 | + | let sent_at = match chrono::DateTime::parse_from_rfc3339(&latest.sent_at) { | |
| 270 | + | Ok(dt) => dt, | |
| 271 | + | Err(_) => return false, | |
| 272 | + | }; | |
| 273 | + | ||
| 274 | + | let elapsed = chrono::Utc::now().signed_duration_since(sent_at); | |
| 275 | + | elapsed.num_seconds() < self.config.cooldown_secs as i64 | |
| 276 | + | } | |
| 277 | + | ||
| 278 | + | async fn send_email(&self, subject: &str, body: &str) { | |
| 279 | + | let Some(ref token) = self.config.postmark_token else { | |
| 280 | + | info!("[dev] alert: {subject}"); | |
| 281 | + | info!("[dev] {body}"); | |
| 282 | + | return; | |
| 283 | + | }; | |
| 284 | + | ||
| 285 | + | let payload = serde_json::json!({ | |
| 286 | + | "From": self.config.from, | |
| 287 | + | "To": self.config.to, | |
| 288 | + | "Subject": subject, | |
| 289 | + | "TextBody": body, | |
| 290 | + | }); | |
| 291 | + | ||
| 292 | + | match self.client | |
| 293 | + | .post("https://api.postmarkapp.com/email") | |
| 294 | + | .header("X-Postmark-Server-Token", token) | |
| 295 | + | .header("Content-Type", "application/json") | |
| 296 | + | .header("Accept", "application/json") | |
| 297 | + | .json(&payload) | |
| 298 | + | .send() | |
| 299 | + | .await | |
| 300 | + | { | |
| 301 | + | Ok(resp) if resp.status().is_success() => { | |
| 302 | + | info!("alert sent: {subject}"); | |
| 303 | + | } | |
| 304 | + | Ok(resp) => { | |
| 305 | + | let status = resp.status(); | |
| 306 | + | let text = resp.text().await.unwrap_or_default(); | |
| 307 | + | warn!("postmark error ({status}): {text}"); | |
| 308 | + | } | |
| 309 | + | Err(e) => { | |
| 310 | + | warn!("failed to send alert: {e}"); | |
| 311 | + | } | |
| 312 | + | } | |
| 313 | + | } | |
| 314 | + | ||
| 315 | + | async fn record_alert( | |
| 316 | + | &self, | |
| 317 | + | target: &str, | |
| 318 | + | alert_type: &str, | |
| 319 | + | from_status: Option<&str>, | |
| 320 | + | to_status: Option<&str>, | |
| 321 | + | error: Option<&str>, | |
| 322 | + | ) { | |
| 323 | + | if let Err(e) = db::insert_alert(&self.pool, target, alert_type, from_status, to_status, error).await { | |
| 324 | + | warn!("failed to record alert: {e}"); | |
| 325 | + | } | |
| 326 | + | } | |
| 327 | + | } | |
| 328 | + | ||
| 329 | + | #[cfg(test)] | |
| 330 | + | mod tests { | |
| 331 | + | use super::*; | |
| 332 | + | ||
| 333 | + | fn test_alerter(pool: SqlitePool) -> Alerter { | |
| 334 | + | let config = AlertConfig { | |
| 335 | + | postmark_token: None, // dev mode | |
| 336 | + | to: "test@example.com".to_string(), | |
| 337 | + | from: "PoM Alerts <pom-alerts@makenot.work>".to_string(), | |
| 338 | + | cooldown_secs: 300, | |
| 339 | + | }; | |
| 340 | + | Alerter::new(config, pool, "test-instance".to_string()) | |
| 341 | + | } | |
| 342 | + | ||
| 343 | + | #[tokio::test] | |
| 344 | + | async fn cooldown_prevents_duplicate_alerts() { | |
| 345 | + | let pool = db::connect_in_memory().await.unwrap(); | |
| 346 | + | let alerter = test_alerter(pool.clone()); | |
| 347 | + | ||
| 348 | + | // First alert — not in cooldown | |
| 349 | + | assert!(!alerter.is_within_cooldown("health:mnw").await); | |
| 350 | + | ||
| 351 | + | // Record an alert | |
| 352 | + | db::insert_alert(&pool, "health:mnw", "health", Some("operational"), Some("error"), None) | |
| 353 | + | .await | |
| 354 | + | .unwrap(); | |
| 355 | + | ||
| 356 | + | // Now should be in cooldown | |
| 357 | + | assert!(alerter.is_within_cooldown("health:mnw").await); | |
| 358 | + | } | |
| 359 | + | ||
| 360 | + | #[tokio::test] | |
| 361 | + | async fn cooldown_does_not_affect_other_targets() { | |
| 362 | + | let pool = db::connect_in_memory().await.unwrap(); | |
| 363 | + | let alerter = test_alerter(pool.clone()); | |
| 364 | + | ||
| 365 | + | db::insert_alert(&pool, "health:mnw", "health", None, None, None) | |
| 366 | + | .await | |
| 367 | + | .unwrap(); | |
| 368 | + | ||
| 369 | + | // Different target should not be in cooldown | |
| 370 | + | assert!(!alerter.is_within_cooldown("health:other").await); | |
| 371 | + | } | |
| 372 | + | ||
| 373 | + | #[tokio::test] | |
| 374 | + | async fn dev_mode_does_not_send_http() { | |
| 375 | + | let pool = db::connect_in_memory().await.unwrap(); | |
| 376 | + | let alerter = test_alerter(pool.clone()); | |
| 377 | + | ||
| 378 | + | // This should log instead of making HTTP calls (no panic, no error) | |
| 379 | + | alerter.send_health_alert("mnw", "MakeNotWork", "operational", "error", None).await; | |
| 380 | + | ||
| 381 | + | // Verify alert was recorded in DB | |
| 382 | + | let latest = db::get_latest_alert_for_target(&pool, "mnw").await.unwrap(); | |
| 383 | + | assert!(latest.is_some()); | |
| 384 | + | let row = latest.unwrap(); | |
| 385 | + | assert_eq!(row.alert_type, "health"); | |
| 386 | + | assert_eq!(row.from_status.as_deref(), Some("operational")); | |
| 387 | + | assert_eq!(row.to_status.as_deref(), Some("error")); | |
| 388 | + | } | |
| 389 | + | } |
| @@ -0,0 +1,411 @@ | |||
| 1 | + | //! HTTP API for serve mode — exposes health check data to consumers like MNW. | |
| 2 | + | ||
| 3 | + | use std::collections::HashMap; | |
| 4 | + | use std::sync::Arc; | |
| 5 | + | ||
| 6 | + | use axum::extract::{Path, State as AxumState}; | |
| 7 | + | use axum::http::StatusCode; | |
| 8 | + | use axum::response::IntoResponse; | |
| 9 | + | use axum::routing::get; | |
| 10 | + | use axum::{Json, Router}; | |
| 11 | + | use serde::Serialize; | |
| 12 | + | ||
| 13 | + | use crate::checks::http::compute_test_staleness; | |
| 14 | + | use crate::config::Config; | |
| 15 | + | use crate::db; | |
| 16 | + | use crate::peer::SharedMeshState; | |
| 17 | + | use crate::types::{HealthSnapshot, LatencyBucket, LatencyStats, TestStaleness}; | |
| 18 | + | ||
| 19 | + | /// Shared state for the API server. | |
| 20 | + | #[derive(Clone)] | |
| 21 | + | pub struct ApiState { | |
| 22 | + | pub pool: sqlx::SqlitePool, | |
| 23 | + | pub config: Arc<Config>, | |
| 24 | + | pub mesh: Option<SharedMeshState>, | |
| 25 | + | } | |
| 26 | + | ||
| 27 | + | /// Build the axum router for the PoM API. | |
| 28 | + | pub fn router(pool: sqlx::SqlitePool, config: Config, mesh: Option<SharedMeshState>) -> Router { | |
| 29 | + | let state = ApiState { | |
| 30 | + | pool, | |
| 31 | + | config: Arc::new(config), | |
| 32 | + | mesh, | |
| 33 | + | }; | |
| 34 | + | ||
| 35 | + | Router::new() | |
| 36 | + | .route("/api/status", get(status_all)) | |
| 37 | + | .route("/api/status/{target}", get(status_target)) | |
| 38 | + | .route("/api/trends/{target}", get(trends)) | |
| 39 | + | .route("/api/peer/info", get(peer_info)) | |
| 40 | + | .route("/api/peer/status", get(peer_status)) | |
| 41 | + | .route("/api/mesh", get(mesh_view)) | |
| 42 | + | .with_state(state) | |
| 43 | + | } | |
| 44 | + | ||
| 45 | + | // --- Response types --- | |
| 46 | + | ||
| 47 | + | #[derive(Serialize)] | |
| 48 | + | struct StatusResponse { | |
| 49 | + | targets: HashMap<String, TargetStatus>, | |
| 50 | + | } | |
| 51 | + | ||
| 52 | + | #[derive(Serialize)] | |
| 53 | + | struct TargetStatus { | |
| 54 | + | label: String, | |
| 55 | + | latest: Option<SnapshotJson>, | |
| 56 | + | recent: Vec<SnapshotJson>, | |
| 57 | + | uptime_24h: Option<f64>, | |
| 58 | + | uptime_7d: Option<f64>, | |
| 59 | + | #[serde(skip_serializing_if = "Option::is_none")] | |
| 60 | + | latency_24h: Option<LatencyStats>, | |
| 61 | + | #[serde(skip_serializing_if = "Option::is_none")] | |
| 62 | + | tls: Option<db::TlsCheckRow>, | |
| 63 | + | #[serde(skip_serializing_if = "Option::is_none")] | |
| 64 | + | test_staleness: Option<TestStaleness>, | |
| 65 | + | #[serde(skip_serializing_if = "Option::is_none")] | |
| 66 | + | current_incident: Option<db::IncidentRow>, | |
| 67 | + | #[serde(skip_serializing_if = "Vec::is_empty")] | |
| 68 | + | incidents: Vec<db::IncidentRow>, | |
| 69 | + | } | |
| 70 | + | ||
| 71 | + | #[derive(Serialize)] | |
| 72 | + | struct SnapshotJson { | |
| 73 | + | status: String, | |
| 74 | + | checked_at: String, | |
| 75 | + | response_time_ms: i64, | |
| 76 | + | #[serde(skip_serializing_if = "Option::is_none")] | |
| 77 | + | details: Option<serde_json::Value>, | |
| 78 | + | #[serde(skip_serializing_if = "Option::is_none")] | |
| 79 | + | error: Option<String>, | |
| 80 | + | } | |
| 81 | + | ||
| 82 | + | impl From<HealthSnapshot> for SnapshotJson { | |
| 83 | + | fn from(s: HealthSnapshot) -> Self { | |
| 84 | + | Self { | |
| 85 | + | status: s.status.to_string(), | |
| 86 | + | checked_at: s.checked_at, | |
| 87 | + | response_time_ms: s.response_time_ms, | |
| 88 | + | details: s.details.map(|d| serde_json::to_value(d).unwrap_or_default()), | |
| 89 | + | error: s.error, | |
| 90 | + | } | |
| 91 | + | } | |
| 92 | + | } | |
| 93 | + | ||
| 94 | + | /// Build a `TargetStatus` for a single target. | |
| 95 | + | async fn build_target_status( | |
| 96 | + | pool: &sqlx::SqlitePool, | |
| 97 | + | name: &str, | |
| 98 | + | label: &str, | |
| 99 | + | config: &Config, | |
| 100 | + | ) -> TargetStatus { | |
| 101 | + | let recent = db::get_health_history(pool, Some(name), 10) | |
| 102 | + | .await | |
| 103 | + | .unwrap_or_default(); | |
| 104 | + | ||
| 105 | + | let latest_snapshot = recent.first().cloned(); | |
| 106 | + | let latest = latest_snapshot.clone().map(SnapshotJson::from); | |
| 107 | + | let recent_json: Vec<SnapshotJson> = recent.into_iter().map(SnapshotJson::from).collect(); | |
| 108 | + | ||
| 109 | + | let uptime_24h = db::get_uptime_percent(pool, name, 24) | |
| 110 | + | .await | |
| 111 | + | .unwrap_or(None); | |
| 112 | + | let uptime_7d = db::get_uptime_percent(pool, name, 168) | |
| 113 | + | .await | |
| 114 | + | .unwrap_or(None); | |
| 115 | + | ||
| 116 | + | // Compute 24h latency stats from operational checks | |
| 117 | + | let latency_24h = { | |
| 118 | + | let cutoff = (chrono::Utc::now() - chrono::Duration::hours(24)).to_rfc3339(); | |
| 119 | + | let times = db::get_response_times(pool, name, &cutoff) | |
| 120 | + | .await | |
| 121 | + | .unwrap_or_default(); | |
| 122 | + | let operational_times: Vec<i64> = times.iter() | |
| 123 | + | .filter(|(_, ms)| *ms > 0) | |
| 124 | + | .map(|(_, ms)| *ms) | |
| 125 | + | .collect(); | |
| 126 | + | LatencyStats::from_times(&operational_times) | |
| 127 | + | }; | |
| 128 | + | ||
| 129 | + | let tls = db::get_latest_tls_check(pool, name) | |
| 130 | + | .await | |
| 131 | + | .unwrap_or(None); | |
| 132 | + | ||
| 133 | + | // Compute test staleness for targets with test config | |
| 134 | + | let test_staleness = if let Some(target_config) = config.get_target(name) | |
| 135 | + | && let Some(tests_config) = &target_config.tests | |
| 136 | + | { | |
| 137 | + | let current_version = latest_snapshot | |
| 138 | + | .as_ref() | |
| 139 | + | .and_then(|s| s.details.as_ref()) | |
| 140 | + | .and_then(|d| d.version.clone()); | |
| 141 | + | ||
| 142 | + | let latest_test = db::get_latest_test_run(pool, name).await.unwrap_or(None); | |
| 143 | + | ||
| 144 | + | let tested_version = if let Some(ref test) = latest_test { | |
| 145 | + | db::get_version_at_time(pool, name, &test.started_at) | |
| 146 | + | .await | |
| 147 | + | .unwrap_or(None) | |
| 148 | + | } else { | |
| 149 | + | None | |
| 150 | + | }; | |
| 151 | + | ||
| 152 | + | let staleness = compute_test_staleness( | |
| 153 | + | current_version.as_deref(), | |
| 154 | + | tested_version.as_deref(), | |
| 155 | + | latest_test.as_ref().map(|t| t.started_at.as_str()), | |
| 156 | + | tests_config.staleness_days, | |
| 157 | + | ); | |
| 158 | + | Some(staleness) | |
| 159 | + | } else { | |
| 160 | + | None | |
| 161 | + | }; | |
| 162 | + | ||
| 163 | + | let current_incident = db::get_open_incident(pool, name) | |
| 164 | + | .await | |
| 165 | + | .unwrap_or(None); | |
| 166 | + | ||
| 167 | + | let incidents = db::get_recent_incidents(pool, name, 10) | |
| 168 | + | .await | |
| 169 | + | .unwrap_or_default(); | |
| 170 | + | ||
| 171 | + | TargetStatus { | |
| 172 | + | label: label.to_string(), | |
| 173 | + | latest, | |
| 174 | + | recent: recent_json, | |
| 175 | + | uptime_24h, | |
| 176 | + | uptime_7d, | |
| 177 | + | latency_24h, | |
| 178 | + | tls, | |
| 179 | + | test_staleness, | |
| 180 | + | current_incident, | |
| 181 | + | incidents, | |
| 182 | + | } | |
| 183 | + | } | |
| 184 | + | ||
| 185 | + | /// `GET /api/status` — JSON summary for all targets. | |
| 186 | + | async fn status_all( | |
| 187 | + | AxumState(state): AxumState<ApiState>, | |
| 188 | + | ) -> impl IntoResponse { | |
| 189 | + | let mut targets = HashMap::new(); | |
| 190 | + | ||
| 191 | + | for name in state.config.target_names() { | |
| 192 | + | if let Some(target_config) = state.config.get_target(&name) { | |
| 193 | + | let status = build_target_status(&state.pool, &name, &target_config.label, &state.config).await; | |
| 194 | + | targets.insert(name, status); | |
| 195 | + | } | |
| 196 | + | } | |
| 197 | + | ||
| 198 | + | Json(StatusResponse { targets }) | |
| 199 | + | } | |
| 200 | + | ||
| 201 | + | /// `GET /api/status/{target}` — JSON summary for a single target. | |
| 202 | + | async fn status_target( | |
| 203 | + | AxumState(state): AxumState<ApiState>, | |
| 204 | + | Path(target): Path<String>, | |
| 205 | + | ) -> impl IntoResponse { | |
| 206 | + | let Some(target_config) = state.config.get_target(&target) else { | |
| 207 | + | return Err((StatusCode::NOT_FOUND, Json(serde_json::json!({ | |
| 208 | + | "error": format!("unknown target: {target}") | |
| 209 | + | })))); | |
| 210 | + | }; | |
| 211 | + | ||
| 212 | + | let status = build_target_status(&state.pool, &target, &target_config.label, &state.config).await; | |
| 213 | + | Ok(Json(status)) | |
| 214 | + | } | |
| 215 | + | ||
| 216 | + | // --- Peer endpoints --- | |
| 217 | + | ||
| 218 | + | /// `GET /api/peer/info` — Returns this instance's identity info. | |
| 219 | + | async fn peer_info( | |
| 220 | + | AxumState(state): AxumState<ApiState>, | |
| 221 | + | ) -> impl IntoResponse { | |
| 222 | + | let Some(ref mesh) = state.mesh else { | |
| 223 | + | return Err((StatusCode::SERVICE_UNAVAILABLE, Json(serde_json::json!({ | |
| 224 | + | "error": "peer mesh not enabled" | |
| 225 | + | })))); | |
| 226 | + | }; | |
| 227 | + | ||
| 228 | + | let mesh_state = mesh.read().await; | |
| 229 | + | Ok(Json(serde_json::to_value(&mesh_state.instance).unwrap_or_default())) | |
| 230 | + | } | |
| 231 | + | ||
| 232 | + | /// `GET /api/peer/status` — This instance's full view: own info + target statuses + peer summaries. | |
| 233 | + | async fn peer_status( | |
| 234 | + | AxumState(state): AxumState<ApiState>, | |
| 235 | + | ) -> impl IntoResponse { | |
| 236 | + | let Some(ref mesh) = state.mesh else { | |
| 237 | + | return Err((StatusCode::SERVICE_UNAVAILABLE, Json(serde_json::json!({ | |
| 238 | + | "error": "peer mesh not enabled" | |
| 239 | + | })))); | |
| 240 | + | }; | |
| 241 | + | ||
| 242 | + | // Collect mesh data under lock, then drop lock before DB queries | |
| 243 | + | let (instance, peers) = { | |
| 244 | + | let mesh_state = mesh.read().await; | |
| 245 | + | let instance = mesh_state.instance.clone(); | |
| 246 | + | let peers: HashMap<String, serde_json::Value> = mesh_state.peers.iter().map(|(name, peer)| { | |
| 247 | + | (name.clone(), serde_json::json!({ | |
| 248 | + | "status": peer.status, | |
| 249 | + | "last_seen": peer.last_seen, | |
| 250 | + | "latency_ms": peer.latency_ms, | |
| 251 | + | })) | |
| 252 | + | }).collect(); | |
| 253 | + | (instance, peers) | |
| 254 | + | }; | |
| 255 | + | ||
| 256 | + | // Build target statuses (DB queries with no lock held) | |
| 257 | + | let mut targets = HashMap::new(); | |
| 258 | + | for name in state.config.target_names() { | |
| 259 | + | if let Some(target_config) = state.config.get_target(&name) | |
| 260 | + | && let Ok(Some(latest)) = db::get_latest_health(&state.pool, &name).await | |
| 261 | + | { | |
| 262 | + | targets.insert(name, serde_json::json!({ | |
| 263 | + | "label": target_config.label, | |
| 264 | + | "status": latest.status.to_string(), | |
| 265 | + | "response_time_ms": latest.response_time_ms, | |
| 266 | + | "checked_at": latest.checked_at, | |
| 267 | + | })); | |
| 268 | + | } | |
| 269 | + | } | |
| 270 | + | ||
| 271 | + | Ok(Json(serde_json::json!({ | |
| 272 | + | "instance": instance, | |
| 273 | + | "targets": targets, | |
| 274 | + | "peers": peers, | |
| 275 | + | }))) | |
| 276 | + | } | |
| 277 | + | ||
| 278 | + | /// `GET /api/mesh` — Aggregated view: self + each peer's cached status. | |
| 279 | + | async fn mesh_view( | |
| 280 | + | AxumState(state): AxumState<ApiState>, | |
| 281 | + | ) -> impl IntoResponse { | |
| 282 | + | let Some(ref mesh) = state.mesh else { | |
| 283 | + | return Err((StatusCode::SERVICE_UNAVAILABLE, Json(serde_json::json!({ | |
| 284 | + | "error": "peer mesh not enabled" | |
| 285 | + | })))); | |
| 286 | + | }; | |
| 287 | + | ||
| 288 | + | // Collect all mesh data under lock, then drop lock before DB queries | |
| 289 | + | let (instance, own_peers_json, peer_entries) = { | |
| 290 | + | let mesh_state = mesh.read().await; | |
| 291 | + | let instance = mesh_state.instance.clone(); | |
| 292 | + | let own_peers: HashMap<String, serde_json::Value> = mesh_state.peers.iter().map(|(name, peer)| { | |
| 293 | + | (name.clone(), serde_json::json!({ | |
| 294 | + | "status": peer.status, | |
| 295 | + | "last_seen": peer.last_seen, | |
| 296 | + | "latency_ms": peer.latency_ms, | |
| 297 | + | })) | |
| 298 | + | }).collect(); | |
| 299 | + | let peer_entries: Vec<(String, Option<serde_json::Value>, serde_json::Value)> = mesh_state.peers.iter().map(|(name, peer)| { | |
| 300 | + | let fallback = serde_json::json!({ | |
| 301 | + | "status": peer.status, | |
| 302 | + | "last_seen": peer.last_seen, | |
| 303 | + | "error": "no status data cached", | |
| 304 | + | }); | |
| 305 | + | (name.clone(), peer.status_data.clone(), fallback) | |
| 306 | + | }).collect(); | |
| 307 | + | (instance, own_peers, peer_entries) | |
| 308 | + | }; | |
| 309 | + | ||
| 310 | + | // Build target statuses (DB queries with no lock held) | |
| 311 | + | let mut targets = HashMap::new(); | |
| 312 | + | for name in state.config.target_names() { | |
| 313 | + | if let Some(target_config) = state.config.get_target(&name) | |
| 314 | + | && let Ok(Some(latest)) = db::get_latest_health(&state.pool, &name).await | |
| 315 | + | { | |
| 316 | + | targets.insert(name, serde_json::json!({ | |
| 317 | + | "label": target_config.label, | |
| 318 | + | "status": latest.status.to_string(), | |
| 319 | + | "response_time_ms": latest.response_time_ms, | |
| 320 | + | "checked_at": latest.checked_at, | |
| 321 | + | })); | |
| 322 | + | } | |
| 323 | + | } | |
| 324 | + | ||
| 325 | + | let self_entry = serde_json::json!({ | |
| 326 | + | "instance": instance, | |
| 327 | + | "targets": targets, | |
| 328 | + | "peers": own_peers_json, | |
| 329 | + | }); | |
| 330 | + | ||
| 331 | + | let mut instances = serde_json::Map::new(); | |
| 332 | + | instances.insert(instance.name.clone(), self_entry); | |
| 333 | + | ||
| 334 | + | for (name, status_data, fallback) in peer_entries { | |
| 335 | + | instances.insert(name, status_data.unwrap_or(fallback)); | |
| 336 | + | } | |
| 337 | + | ||
| 338 | + | Ok(Json(serde_json::json!({ | |
| 339 | + | "instances": instances, | |
| 340 | + | }))) | |
| 341 | + | } | |
| 342 | + | ||
| 343 | + | // --- Trends endpoint --- | |
| 344 | + | ||
| 345 | + | #[derive(Serialize)] | |
| 346 | + | struct TrendResponse { | |
| 347 | + | target: String, | |
| 348 | + | window_hours: u64, | |
| 349 | + | bucket_minutes: u64, | |
| 350 | + | buckets: Vec<LatencyBucket>, | |
| 351 | + | overall: Option<LatencyStats>, | |
| 352 | + | baseline: Option<LatencyStats>, | |
| 353 | + | } | |
| 354 | + | ||
| 355 | + | /// `GET /api/trends/{target}?hours=24&bucket_minutes=60` — latency trend data. | |
| 356 | + | async fn trends( | |
| 357 | + | AxumState(state): AxumState<ApiState>, | |
| 358 | + | Path(target): Path<String>, | |
| 359 | + | axum::extract::Query(params): axum::extract::Query<TrendQueryParams>, | |
| 360 | + | ) -> impl IntoResponse { | |
| 361 | + | let Some(_target_config) = state.config.get_target(&target) else { | |
| 362 | + | return Err((StatusCode::NOT_FOUND, Json(serde_json::json!({ | |
| 363 | + | "error": format!("unknown target: {target}") | |
| 364 | + | })))); | |
| 365 | + | }; | |
| 366 | + | ||
| 367 | + | let hours = params.hours.unwrap_or(24); | |
| 368 | + | let bucket_minutes = params.bucket_minutes.unwrap_or(60); | |
| 369 | + | ||
| 370 | + | let cutoff = (chrono::Utc::now() - chrono::Duration::hours(hours as i64)).to_rfc3339(); | |
| 371 | + | let times = db::get_response_times(&state.pool, &target, &cutoff) | |
| 372 | + | .await | |
| 373 | + | .unwrap_or_default(); | |
| 374 | + | ||
| 375 | + | let operational_times: Vec<i64> = times.iter() | |
| 376 | + | .filter(|(_, ms)| *ms > 0) | |
| 377 | + | .map(|(_, ms)| *ms) | |
| 378 | + | .collect(); | |
| 379 | + | let overall = LatencyStats::from_times(&operational_times); | |
| 380 | + | ||
| 381 | + | let operational_data: Vec<(String, i64)> = times.into_iter() | |
| 382 | + | .filter(|(_, ms)| *ms > 0) | |
| 383 | + | .collect(); | |
| 384 | + | let buckets = LatencyStats::bucket_by_time(&operational_data, bucket_minutes); | |
| 385 | + | ||
| 386 | + | // 7d baseline for reference | |
| 387 | + | let baseline_cutoff = (chrono::Utc::now() - chrono::Duration::hours(168)).to_rfc3339(); | |
| 388 | + | let baseline_times = db::get_response_times(&state.pool, &target, &baseline_cutoff) | |
| 389 | + | .await | |
| 390 | + | .unwrap_or_default(); | |
| 391 | + | let baseline_operational: Vec<i64> = baseline_times.iter() | |
| 392 | + | .filter(|(_, ms)| *ms > 0) | |
| 393 | + | .map(|(_, ms)| *ms) | |
| 394 | + | .collect(); | |
| 395 | + | let baseline = LatencyStats::from_times(&baseline_operational); | |
| 396 | + | ||
| 397 | + | Ok(Json(TrendResponse { | |
| 398 | + | target, | |
| 399 | + | window_hours: hours, | |
| 400 | + | bucket_minutes, | |
| 401 | + | buckets, | |
| 402 | + | overall, | |
| 403 | + | baseline, | |
| 404 | + | })) | |
| 405 | + | } | |
| 406 | + | ||
| 407 | + | #[derive(serde::Deserialize)] | |
| 408 | + | struct TrendQueryParams { | |
| 409 | + | hours: Option<u64>, | |
| 410 | + | bucket_minutes: Option<u64>, | |
| 411 | + | } |
| @@ -1,11 +1,12 @@ | |||
| 1 | 1 | use std::time::Instant; | |
| 2 | 2 | ||
| 3 | - | use crate::config::HealthConfig; | |
| 3 | + | use crate::config::{HealthConfig, HealthExpectation}; | |
| 4 | 4 | use crate::types::{HealthDetails, HealthSnapshot, HealthStatus}; | |
| 5 | 5 | ||
| 6 | 6 | pub async fn check_health( | |
| 7 | 7 | target_name: &str, | |
| 8 | 8 | config: &HealthConfig, | |
| 9 | + | expect: Option<&HealthExpectation>, | |
| 9 | 10 | ) -> HealthSnapshot { | |
| 10 | 11 | let client = reqwest::Client::builder() | |
| 11 | 12 | .timeout(std::time::Duration::from_secs(config.timeout_secs)) | |
| @@ -18,51 +19,46 @@ pub async fn check_health( | |||
| 18 | 19 | match client.get(&config.url).send().await { | |
| 19 | 20 | Ok(response) => { | |
| 20 | 21 | let response_time_ms = start.elapsed().as_millis() as i64; | |
| 21 | - | let status_code = response.status(); | |
| 22 | - | ||
| 23 | - | match response.json::<serde_json::Value>().await { | |
| 24 | - | Ok(json) => { | |
| 25 | - | let api_status = json | |
| 26 | - | .get("status") | |
| 27 | - | .and_then(|s| s.as_str()) | |
| 28 | - | .unwrap_or("unknown"); | |
| 29 | - | ||
| 30 | - | let status = match api_status { | |
| 31 | - | "operational" => HealthStatus::Operational, | |
| 32 | - | "degraded" => HealthStatus::Degraded, | |
| 33 | - | _ if status_code.is_success() => HealthStatus::Degraded, | |
| 34 | - | _ => HealthStatus::Error, | |
| 35 | - | }; | |
| 22 | + | let status_code = response.status().as_u16(); | |
| 23 | + | ||
| 24 | + | match response.text().await { | |
| 25 | + | Ok(body) => { | |
| 26 | + | let json: Option<serde_json::Value> = serde_json::from_str(&body).ok(); | |
| 36 | 27 | ||
| 37 | - | let details = HealthDetails { | |
| 38 | - | version: json.get("version").and_then(|v| v.as_str()).map(String::from), | |
| 39 | - | uptime: json.get("uptime").and_then(|v| v.as_str()).map(String::from), | |
| 40 | - | checks: json.get("checks").cloned(), | |
| 41 | - | monitoring: json.get("monitoring").cloned(), | |
| 28 | + | let (mut status, details, mut error) = if let Some(ref json) = json { | |
| 29 | + | let (s, d) = classify_json_response(status_code, json); | |
| 30 | + | (s, Some(d), None) | |
| 31 | + | } else { | |
| 32 | + | (classify_non_json(status_code), None, Some("Failed to parse response as JSON".to_string())) | |
| 42 | 33 | }; | |
| 43 | 34 | ||
| 35 | + | // Apply expectation validation | |
| 36 | + | if let Some(exp) = expect { | |
| 37 | + | let failures = validate_expectations(exp, status_code, &body, json.as_ref()); | |
| 38 | + | if !failures.is_empty() { | |
| 39 | + | status = HealthStatus::Degraded; | |
| 40 | + | error = Some(failures.join("; ")); | |
| 41 | + | } | |
| 42 | + | } | |
| 43 | + | ||
| 44 | 44 | HealthSnapshot { | |
| 45 | 45 | id: None, | |
| 46 | 46 | target: target_name.to_string(), | |
| 47 | 47 | status, | |
| 48 | 48 | checked_at, | |
| 49 | 49 | response_time_ms, | |
| 50 | - | details: Some(details), | |
| 51 | - | error: None, | |
| 50 | + | details, | |
| 51 | + | error, | |
| 52 | 52 | } | |
| 53 | 53 | } | |
| 54 | 54 | Err(e) => HealthSnapshot { | |
| 55 | 55 | id: None, | |
| 56 | 56 | target: target_name.to_string(), | |
| 57 | - | status: if status_code.is_success() { | |
| 58 | - | HealthStatus::Degraded | |
| 59 | - | } else { | |
| 60 | - | HealthStatus::Error | |
| 61 | - | }, | |
| 57 | + | status: classify_non_json(status_code), | |
| 62 | 58 | checked_at, | |
| 63 | 59 | response_time_ms, | |
| 64 | 60 | details: None, | |
| 65 | - | error: Some(format!("Failed to parse response: {e}")), | |
| 61 | + | error: Some(format!("Failed to read response body: {e}")), | |
| 66 | 62 | }, | |
| 67 | 63 | } | |
| 68 | 64 | } | |
| @@ -80,3 +76,524 @@ pub async fn check_health( | |||
| 80 | 76 | } | |
| 81 | 77 | } | |
| 82 | 78 | } | |
| 79 | + | ||
| 80 | + | /// Walk a dot-separated path through nested JSON objects. | |
| 81 | + | pub fn resolve_json_path<'a>(value: &'a serde_json::Value, path: &str) -> Option<&'a serde_json::Value> { | |
| 82 | + | let mut current = value; | |
| 83 | + | for key in path.split('.') { | |
| 84 | + | current = current.get(key)?; | |
| 85 | + | } | |
| 86 | + | Some(current) | |
| 87 | + | } | |
| 88 | + | ||
| 89 | + | /// Validate response against expectations. Returns a list of failure descriptions. | |
| 90 | + | pub fn validate_expectations( | |
| 91 | + | expect: &HealthExpectation, | |
| 92 | + | status_code: u16, | |
| 93 | + | body: &str, | |
| 94 | + | json: Option<&serde_json::Value>, | |
| 95 | + | ) -> Vec<String> { | |
| 96 | + | let mut failures = Vec::new(); | |
| 97 | + | ||
| 98 | + | if let Some(expected_code) = expect.status_code | |
| 99 | + | && status_code != expected_code | |
| 100 | + | { | |
| 101 | + | failures.push(format!("expected status {expected_code}, got {status_code}")); | |
| 102 | + | } | |
| 103 | + | ||
| 104 | + | if let Some(ref substring) = expect.body_contains | |
| 105 | + | && !body.contains(substring.as_str()) | |
| 106 | + | { | |
| 107 | + | failures.push(format!("body missing expected substring \"{substring}\"")); | |
| 108 | + | } | |
| 109 | + | ||
| 110 | + | if !expect.json_fields.is_empty() { | |
| 111 | + | if let Some(json) = json { | |
| 112 | + | for (path, expected_value) in &expect.json_fields { | |
| 113 | + | match resolve_json_path(json, path) { | |
| 114 | + | Some(actual) => { | |
| 115 | + | let actual_str = match actual { | |
| 116 | + | serde_json::Value::String(s) => s.clone(), | |
| 117 | + | other => other.to_string(), | |
| 118 | + | }; | |
| 119 | + | if actual_str != *expected_value { | |
| 120 | + | failures.push(format!("json field \"{path}\": expected \"{expected_value}\", got \"{actual_str}\"")); | |
| 121 | + | } | |
| 122 | + | } | |
| 123 | + | None => { | |
| 124 | + | failures.push(format!("json field \"{path}\" not found")); | |
| 125 | + | } | |
| 126 | + | } | |
| 127 | + | } | |
| 128 | + | } else { | |
| 129 | + | failures.push("expected JSON response for field validation, got non-JSON".to_string()); | |
| 130 | + | } | |
| 131 | + | } | |
| 132 | + | ||
| 133 | + | failures | |
| 134 | + | } | |
| 135 | + | ||
| 136 | + | /// Classify a JSON health response into status + details. | |
| 137 | + | pub fn classify_json_response( | |
| 138 | + | status_code: u16, | |
| 139 | + | json: &serde_json::Value, | |
| 140 | + | ) -> (HealthStatus, HealthDetails) { | |
| 141 | + | let api_status = json | |
| 142 | + | .get("status") | |
| 143 | + | .and_then(|s| s.as_str()) | |
| 144 | + | .unwrap_or("unknown"); | |
| 145 | + | ||
| 146 | + | let status = match api_status { | |
| 147 | + | "operational" => HealthStatus::Operational, | |
| 148 | + | "degraded" => HealthStatus::Degraded, | |
| 149 | + | _ if (200..300).contains(&status_code) => HealthStatus::Degraded, | |
| 150 | + | _ => HealthStatus::Error, | |
| 151 | + | }; | |
| 152 | + | ||
| 153 | + | let details = HealthDetails { | |
| 154 | + | version: json.get("version").and_then(|v| v.as_str()).map(String::from), | |
| 155 | + | uptime: json.get("uptime").and_then(|v| v.as_str()).map(String::from), | |
| 156 | + | checks: json.get("checks").cloned(), | |
| 157 | + | monitoring: json.get("monitoring").cloned(), | |
| 158 | + | }; | |
| 159 | + | ||
| 160 | + | (status, details) | |
| 161 | + | } | |
| 162 | + | ||
| 163 | + | /// Classify a response that couldn't be parsed as JSON. | |
| 164 | + | pub fn classify_non_json(status_code: u16) -> HealthStatus { | |
| 165 | + | if (200..300).contains(&status_code) { | |
| 166 | + | HealthStatus::Degraded | |
| 167 | + | } else { | |
| 168 | + | HealthStatus::Error | |
| 169 | + | } | |
| 170 | + | } | |
| 171 | + | ||
| 172 | + | /// Detect sustained latency drift by checking if all recent response times | |
| 173 | + | /// exceed the baseline average by the given threshold multiplier. | |
| 174 | + | /// | |
| 175 | + | /// Returns a description string if drift is detected, `None` otherwise. | |
| 176 | + | /// Requires at least 10 baseline samples to avoid false positives. | |
| 177 | + | pub fn detect_latency_drift( | |
| 178 | + | recent_times: &[i64], | |
| 179 | + | baseline: &crate::types::LatencyStats, | |
| 180 | + | threshold: f64, | |
| 181 | + | ) -> Option<String> { | |
| 182 | + | if baseline.sample_count < 10 || recent_times.is_empty() { | |
| 183 | + | return None; | |
| 184 | + | } | |
| 185 | + | let drift_threshold = baseline.avg_ms * threshold; | |
| 186 | + | let all_over = recent_times.iter().all(|&t| t as f64 > drift_threshold); | |
| 187 | + | if all_over { | |
| 188 | + | let avg_recent: f64 = recent_times.iter().sum::<i64>() as f64 / recent_times.len() as f64; | |
| 189 | + | Some(format!( | |
| 190 | + | "latency drift: last {} checks avg {:.0}ms (baseline avg {:.0}ms, threshold {:.0}ms)", | |
| 191 | + | recent_times.len(), | |
| 192 | + | avg_recent, | |
| 193 | + | baseline.avg_ms, | |
| 194 | + | drift_threshold, | |
| 195 | + | )) | |
| 196 | + | } else { | |
| 197 | + | None | |
| 198 | + | } | |
| 199 | + | } | |
| 200 | + | ||
| 201 | + | /// Compute test staleness from version and timing data. | |
| 202 | + | /// | |
| 203 | + | /// A target's tests are considered stale when: | |
| 204 | + | /// 1. No tests have ever been run | |
| 205 | + | /// 2. Tests are older than `staleness_days` | |
| 206 | + | /// 3. The deployed version has changed since the last test run | |
| 207 | + | pub fn compute_test_staleness( | |
| 208 | + | current_version: Option<&str>, | |
| 209 | + | tested_version: Option<&str>, | |
| 210 | + | last_test_at: Option<&str>, | |
| 211 | + | staleness_days: u64, | |
| 212 | + | ) -> crate::types::TestStaleness { | |
| 213 | + | let Some(last_test_at) = last_test_at else { | |
| 214 | + | return crate::types::TestStaleness { | |
| 215 | + | stale: true, | |
| 216 | + | reason: Some("no tests have been run".to_string()), | |
| 217 | + | current_version: current_version.map(String::from), | |
| 218 | + | tested_version: None, | |
| 219 | + | last_test_at: None, | |
| 220 | + | days_since_test: None, | |
| 221 | + | }; | |
| 222 | + | }; | |
| 223 | + | ||
| 224 | + | let days_since = chrono::DateTime::parse_from_rfc3339(last_test_at) | |
| 225 | + | .ok() | |
| 226 | + | .map(|dt| { | |
| 227 | + | let now = chrono::Utc::now(); | |
| 228 | + | (now - dt.with_timezone(&chrono::Utc)).num_days() | |
| 229 | + | }); | |
| 230 | + | ||
| 231 | + | if let Some(days) = days_since | |
| 232 | + | && days >= staleness_days as i64 | |
| 233 | + | { | |
| 234 | + | return crate::types::TestStaleness { | |
| 235 | + | stale: true, | |
| 236 | + | reason: Some(format!("tests are {days} days old (threshold: {staleness_days}d)")), | |
| 237 | + | current_version: current_version.map(String::from), | |
| 238 | + | tested_version: tested_version.map(String::from), | |
| 239 | + | last_test_at: Some(last_test_at.to_string()), | |
| 240 | + | days_since_test: Some(days), | |
| 241 | + | }; | |
| 242 | + | } | |
| 243 | + | ||
| 244 | + | if let (Some(current), Some(tested)) = (current_version, tested_version) | |
| 245 | + | && current != tested | |
| 246 | + | { | |
| 247 | + | return crate::types::TestStaleness { | |
| 248 | + | stale: true, | |
| 249 | + | reason: Some(format!("version changed: {tested} -> {current}")), | |
| 250 | + | current_version: Some(current.to_string()), | |
| 251 | + | tested_version: Some(tested.to_string()), | |
| 252 | + | last_test_at: Some(last_test_at.to_string()), | |
| 253 | + | days_since_test: days_since, | |
| 254 | + | }; | |
| 255 | + | } | |
| 256 | + | ||
| 257 | + | crate::types::TestStaleness { | |
| 258 | + | stale: false, | |
| 259 | + | reason: None, | |
| 260 | + | current_version: current_version.map(String::from), | |
| 261 | + | tested_version: tested_version.map(String::from), | |
| 262 | + | last_test_at: Some(last_test_at.to_string()), | |
| 263 | + | days_since_test: days_since, | |
| 264 | + | } | |
| 265 | + | } | |
| 266 | + | ||
| 267 | + | #[cfg(test)] | |
| 268 | + | mod tests { | |
| 269 | + | use super::*; | |
| 270 | + | use std::collections::HashMap; | |
| 271 | + | ||
| 272 | + | #[test] | |
| 273 | + | fn classify_operational() { | |
| 274 | + | let json = serde_json::json!({ | |
| 275 | + | "status": "operational", | |
| 276 | + | "version": "2.1.0", | |
| 277 | + | "uptime": "3d 12h", | |
| 278 | + | }); | |
| 279 | + | let (status, details) = classify_json_response(200, &json); | |
| 280 | + | assert_eq!(status, HealthStatus::Operational); | |
| 281 | + | assert_eq!(details.version.as_deref(), Some("2.1.0")); | |
| 282 | + | assert_eq!(details.uptime.as_deref(), Some("3d 12h")); | |
| 283 | + | } | |
| 284 | + | ||
| 285 | + | #[test] | |
| 286 | + | fn classify_degraded_explicit() { | |
| 287 | + | let json = serde_json::json!({ "status": "degraded" }); | |
| 288 | + | let (status, _) = classify_json_response(200, &json); | |
| 289 | + | assert_eq!(status, HealthStatus::Degraded); | |
| 290 | + | } | |
| 291 | + | ||
| 292 | + | #[test] | |
| 293 | + | fn classify_unknown_status_with_success_code() { | |
| 294 | + | let json = serde_json::json!({ "status": "starting_up" }); | |
| 295 | + | let (status, _) = classify_json_response(200, &json); | |
| 296 | + | assert_eq!(status, HealthStatus::Degraded); | |
| 297 | + | } | |
| 298 | + | ||
| 299 | + | #[test] | |
| 300 | + | fn classify_unknown_status_with_error_code() { | |
| 301 | + | let json = serde_json::json!({ "status": "starting_up" }); | |
| 302 | + | let (status, _) = classify_json_response(503, &json); | |
| 303 | + | assert_eq!(status, HealthStatus::Error); | |
| 304 | + | } | |
| 305 | + | ||
| 306 | + | #[test] | |
| 307 | + | fn classify_missing_status_field() { | |
| 308 | + | let json = serde_json::json!({ "version": "1.0.0" }); | |
| 309 | + | let (status, details) = classify_json_response(200, &json); | |
| 310 | + | assert_eq!(status, HealthStatus::Degraded); // "unknown" falls through | |
| 311 | + | assert_eq!(details.version.as_deref(), Some("1.0.0")); | |
| 312 | + | } | |
| 313 | + | ||
| 314 | + | #[test] | |
| 315 | + | fn classify_extracts_checks_and_monitoring() { | |
| 316 | + | let json = serde_json::json!({ | |
| 317 | + | "status": "operational", | |
| 318 | + | "checks": { "db": "ok", "redis": "ok" }, | |
| 319 | + | "monitoring": { "external": true }, | |
| 320 | + | }); | |
| 321 | + | let (_, details) = classify_json_response(200, &json); | |
| 322 | + | assert!(details.checks.is_some()); | |
| 323 | + | assert!(details.monitoring.is_some()); | |
| 324 | + | } | |
| 325 | + | ||
| 326 | + | #[test] | |
| 327 | + | fn classify_non_json_success() { | |
| 328 | + | assert_eq!(classify_non_json(200), HealthStatus::Degraded); | |
| 329 | + | assert_eq!(classify_non_json(204), HealthStatus::Degraded); | |
| 330 | + | } | |
| 331 | + | ||
| 332 | + | #[test] | |
| 333 | + | fn classify_non_json_error() { | |
| 334 | + | assert_eq!(classify_non_json(500), HealthStatus::Error); | |
| 335 | + | assert_eq!(classify_non_json(404), HealthStatus::Error); | |
| 336 | + | } | |
| 337 | + | ||
| 338 | + | // --- resolve_json_path --- | |
| 339 | + | ||
| 340 | + | #[test] | |
| 341 | + | fn resolve_json_path_top_level() { | |
| 342 | + | let json = serde_json::json!({"status": "operational"}); | |
| 343 | + | let val = resolve_json_path(&json, "status").unwrap(); | |
| 344 | + | assert_eq!(val, "operational"); | |
| 345 | + | } | |
| 346 | + | ||
| 347 | + | #[test] | |
| 348 | + | fn resolve_json_path_nested() { | |
| 349 | + | let json = serde_json::json!({"checks": {"db": "ok", "redis": "warn"}}); | |
| 350 | + | let val = resolve_json_path(&json, "checks.db").unwrap(); | |
| 351 | + | assert_eq!(val, "ok"); | |
| 352 | + | } | |
| 353 | + | ||
| 354 | + | #[test] | |
| 355 | + | fn resolve_json_path_deeply_nested() { | |
| 356 | + | let json = serde_json::json!({"a": {"b": {"c": 42}}}); | |
| 357 | + | let val = resolve_json_path(&json, "a.b.c").unwrap(); | |
| 358 | + | assert_eq!(val, 42); | |
| 359 | + | } | |
| 360 | + | ||
| 361 | + | #[test] | |
| 362 | + | fn resolve_json_path_missing() { | |
| 363 | + | let json = serde_json::json!({"status": "operational"}); | |
| 364 | + | assert!(resolve_json_path(&json, "missing").is_none()); | |
| 365 | + | } | |
| 366 | + | ||
| 367 | + | #[test] | |
| 368 | + | fn resolve_json_path_partial_missing() { | |
| 369 | + | let json = serde_json::json!({"checks": {"db": "ok"}}); | |
| 370 | + | assert!(resolve_json_path(&json, "checks.redis").is_none()); | |
| 371 | + | } | |
| 372 | + | ||
| 373 | + | // --- validate_expectations --- | |
| 374 | + | ||
| 375 | + | #[test] | |
| 376 | + | fn validate_status_code_match() { | |
| 377 | + | let expect = HealthExpectation { | |
| 378 | + | status_code: Some(200), | |
| 379 | + | ..Default::default() | |
| 380 | + | }; | |
| 381 | + | let failures = validate_expectations(&expect, 200, "", None); | |
| 382 | + | assert!(failures.is_empty()); | |
| 383 | + | } | |
| 384 | + | ||
| 385 | + | #[test] | |
| 386 | + | fn validate_status_code_mismatch() { | |
| 387 | + | let expect = HealthExpectation { | |
| 388 | + | status_code: Some(200), | |
| 389 | + | ..Default::default() | |
| 390 | + | }; | |
| 391 | + | let failures = validate_expectations(&expect, 503, "", None); | |
| 392 | + | assert_eq!(failures.len(), 1); | |
| 393 | + | assert!(failures[0].contains("expected status 200")); | |
| 394 | + | assert!(failures[0].contains("got 503")); | |
| 395 | + | } | |
| 396 | + | ||
| 397 | + | #[test] | |
| 398 | + | fn validate_body_contains_match() { | |
| 399 | + | let expect = HealthExpectation { | |
| 400 | + | body_contains: Some("operational".to_string()), | |
| 401 | + | ..Default::default() | |
| 402 | + | }; | |
| 403 | + | let failures = validate_expectations(&expect, 200, r#"{"status":"operational"}"#, None); | |
| 404 | + | assert!(failures.is_empty()); | |
| 405 | + | } | |
| 406 | + | ||
| 407 | + | #[test] | |
| 408 | + | fn validate_body_contains_mismatch() { | |
| 409 | + | let expect = HealthExpectation { | |
| 410 | + | body_contains: Some("operational".to_string()), | |
| 411 | + | ..Default::default() | |
| 412 | + | }; | |
| 413 | + | let failures = validate_expectations(&expect, 200, r#"{"status":"error"}"#, None); | |
| 414 | + | assert_eq!(failures.len(), 1); | |
| 415 | + | assert!(failures[0].contains("body missing")); | |
| 416 | + | } | |
| 417 | + | ||
| 418 | + | #[test] | |
| 419 | + | fn validate_json_fields_match() { | |
| 420 | + | let mut fields = HashMap::new(); | |
| 421 | + | fields.insert("status".to_string(), "operational".to_string()); | |
| 422 | + | fields.insert("checks.db".to_string(), "ok".to_string()); | |
| 423 | + | let expect = HealthExpectation { | |
| 424 | + | json_fields: fields, | |
| 425 | + | ..Default::default() | |
| 426 | + | }; | |
| 427 | + | let json = serde_json::json!({"status": "operational", "checks": {"db": "ok"}}); | |
| 428 | + | let failures = validate_expectations(&expect, 200, "", Some(&json)); | |
| 429 | + | assert!(failures.is_empty()); | |
| 430 | + | } | |
| 431 | + | ||
| 432 | + | #[test] | |
| 433 | + | fn validate_json_fields_mismatch() { | |
| 434 | + | let mut fields = HashMap::new(); | |
| 435 | + | fields.insert("status".to_string(), "operational".to_string()); | |
| 436 | + | let expect = HealthExpectation { | |
| 437 | + | json_fields: fields, | |
| 438 | + | ..Default::default() | |
| 439 | + | }; | |
| 440 | + | let json = serde_json::json!({"status": "degraded"}); | |
| 441 | + | let failures = validate_expectations(&expect, 200, "", Some(&json)); | |
| 442 | + | assert_eq!(failures.len(), 1); | |
| 443 | + | assert!(failures[0].contains("expected \"operational\"")); | |
| 444 | + | assert!(failures[0].contains("got \"degraded\"")); | |
| 445 | + | } | |
| 446 | + | ||
| 447 | + | #[test] | |
| 448 | + | fn validate_json_field_missing() { | |
| 449 | + | let mut fields = HashMap::new(); | |
| 450 | + | fields.insert("checks.redis".to_string(), "ok".to_string()); | |
| 451 | + | let expect = HealthExpectation { | |
| 452 | + | json_fields: fields, | |
| 453 | + | ..Default::default() | |
| 454 | + | }; | |
| 455 | + | let json = serde_json::json!({"checks": {"db": "ok"}}); | |
| 456 | + | let failures = validate_expectations(&expect, 200, "", Some(&json)); | |
| 457 | + | assert_eq!(failures.len(), 1); | |
| 458 | + | assert!(failures[0].contains("not found")); | |
| 459 | + | } | |
| 460 | + | ||
| 461 | + | #[test] | |
| 462 | + | fn validate_json_fields_on_non_json() { | |
| 463 | + | let mut fields = HashMap::new(); | |
| 464 | + | fields.insert("status".to_string(), "ok".to_string()); | |
| 465 | + | let expect = HealthExpectation { | |
| 466 | + | json_fields: fields, | |
| 467 | + | ..Default::default() | |
| 468 | + | }; | |
| 469 | + | let failures = validate_expectations(&expect, 200, "not json", None); | |
| 470 | + | assert_eq!(failures.len(), 1); | |
| 471 | + | assert!(failures[0].contains("non-JSON")); | |
| 472 | + | } | |
| 473 | + | ||
| 474 | + | #[test] | |
| 475 | + | fn validate_mixed_failures() { | |
| 476 | + | let mut fields = HashMap::new(); | |
| 477 | + | fields.insert("status".to_string(), "operational".to_string()); | |
| 478 | + | let expect = HealthExpectation { | |
| 479 | + | status_code: Some(200), | |
| 480 | + | body_contains: Some("healthy".to_string()), | |
| 481 | + | json_fields: fields, | |
| 482 | + | }; | |
| 483 | + | let json = serde_json::json!({"status": "degraded"}); | |
| 484 | + | let failures = validate_expectations(&expect, 503, r#"{"status":"degraded"}"#, Some(&json)); | |
| 485 | + | assert_eq!(failures.len(), 3); // status code + body + json field | |
| 486 | + | } | |
| 487 | + | ||
| 488 | + | #[test] |
Lines truncated
| @@ -1,3 +1,4 @@ | |||
| 1 | 1 | pub mod http; | |
| 2 | 2 | pub mod parse; | |
| 3 | 3 | pub mod ssh; | |
| 4 | + | pub mod tls; |
| @@ -0,0 +1,153 @@ | |||
| 1 | + | //! TLS certificate probing — connect to a host, inspect the leaf cert, track expiry. | |
| 2 | + | ||
| 3 | + | use std::sync::Arc; | |
| 4 | + | ||
| 5 | + | use tokio::net::TcpStream; | |
| 6 | + | use tokio_rustls::rustls; | |
| 7 | + | use tokio_rustls::TlsConnector; | |
| 8 | + | ||
| 9 | + | use crate::config::TlsConfig; | |
| 10 | + | use crate::types::TlsStatus; | |
| 11 | + | ||
| 12 | + | /// Connect to host:port, complete TLS handshake, and extract leaf cert fields. | |
| 13 | + | pub async fn check_tls(target_name: &str, config: &TlsConfig) -> TlsStatus { | |
| 14 | + | let checked_at = chrono::Utc::now().to_rfc3339(); | |
| 15 | + | let addr = format!("{}:{}", config.host, config.port); | |
| 16 | + | ||
| 17 | + | // TCP connect with timeout | |
| 18 | + | let tcp = match tokio::time::timeout( | |
| 19 | + | std::time::Duration::from_secs(10), | |
| 20 | + | TcpStream::connect(&addr), | |
| 21 | + | ) | |
| 22 | + | .await | |
| 23 | + | { | |
| 24 | + | Ok(Ok(stream)) => stream, | |
| 25 | + | Ok(Err(e)) => return tls_error(target_name, config, &checked_at, &format!("TCP connect failed: {e}")), | |
| 26 | + | Err(_) => return tls_error(target_name, config, &checked_at, "TCP connect timed out"), | |
| 27 | + | }; | |
| 28 | + | ||
| 29 | + | // Build rustls config with webpki trust store | |
| 30 | + | let mut root_store = rustls::RootCertStore::empty(); | |
| 31 | + | root_store.extend(webpki_roots::TLS_SERVER_ROOTS.iter().cloned()); | |
| 32 | + | let tls_config = rustls::ClientConfig::builder() | |
| 33 | + | .with_root_certificates(root_store) | |
| 34 | + | .with_no_client_auth(); | |
| 35 | + | ||
| 36 | + | let connector = TlsConnector::from(Arc::new(tls_config)); | |
| 37 | + | let server_name = match rustls_pki_types::ServerName::try_from(config.host.clone()) { | |
| 38 | + | Ok(name) => name, | |
| 39 | + | Err(e) => return tls_error(target_name, config, &checked_at, &format!("invalid server name: {e}")), | |
| 40 | + | }; | |
| 41 | + | ||
| 42 | + | // TLS handshake with timeout | |
| 43 | + | let tls_stream = match tokio::time::timeout( | |
| 44 | + | std::time::Duration::from_secs(10), | |
| 45 | + | connector.connect(server_name, tcp), | |
| 46 | + | ) | |
| 47 | + | .await | |
| 48 | + | { | |
| 49 | + | Ok(Ok(stream)) => stream, | |
| 50 | + | Ok(Err(e)) => return tls_error(target_name, config, &checked_at, &format!("TLS handshake failed: {e}")), | |
| 51 | + | Err(_) => return tls_error(target_name, config, &checked_at, "TLS handshake timed out"), | |
| 52 | + | }; | |
| 53 | + | ||
| 54 | + | // Extract peer certificates | |
| 55 | + | let (_io, client_conn) = tls_stream.into_inner(); | |
| 56 | + | let certs = match client_conn.peer_certificates() { | |
| 57 | + | Some(certs) if !certs.is_empty() => certs, | |
| 58 | + | _ => return tls_error(target_name, config, &checked_at, "no peer certificates"), | |
| 59 | + | }; | |
| 60 | + | ||
| 61 | + | // Parse the leaf (first) certificate | |
| 62 | + | parse_leaf_cert(target_name, config, &checked_at, certs[0].as_ref()) | |
| 63 | + | } | |
| 64 | + | ||
| 65 | + | /// Parse DER-encoded leaf cert bytes into a TlsStatus. | |
| 66 | + | pub fn parse_leaf_cert( | |
| 67 | + | target_name: &str, | |
| 68 | + | config: &TlsConfig, | |
| 69 | + | checked_at: &str, | |
| 70 | + | der_bytes: &[u8], | |
| 71 | + | ) -> TlsStatus { | |
| 72 | + | use x509_parser::prelude::FromDer; | |
| 73 | + | let (_, cert) = match x509_parser::prelude::X509Certificate::from_der(der_bytes) { | |
| 74 | + | Ok(result) => result, | |
| 75 | + | Err(e) => return tls_error(target_name, config, checked_at, &format!("cert parse error: {e}")), | |
| 76 | + | }; | |
| 77 | + | ||
| 78 | + | let not_before_ts = cert.validity().not_before.timestamp(); | |
| 79 | + | let not_after_ts = cert.validity().not_after.timestamp(); | |
| 80 | + | ||
| 81 | + | let now = chrono::Utc::now(); | |
| 82 | + | let not_after_chrono = chrono::DateTime::from_timestamp(not_after_ts, 0) | |
| 83 | + | .unwrap_or(now); | |
| 84 | + | let not_before_chrono = chrono::DateTime::from_timestamp(not_before_ts, 0) | |
| 85 | + | .unwrap_or(now); | |
| 86 | + | let days_remaining = (not_after_chrono - now).num_days(); | |
| 87 | + | ||
| 88 | + | let subject = cert.subject().to_string(); | |
| 89 | + | let issuer = cert.issuer().to_string(); | |
| 90 | + | ||
| 91 | + | TlsStatus { | |
| 92 | + | target: target_name.to_string(), | |
| 93 | + | host: config.host.clone(), | |
| 94 | + | port: config.port, | |
| 95 | + | valid: days_remaining > 0, | |
| 96 | + | days_remaining, | |
| 97 | + | not_before: not_before_chrono.to_rfc3339(), | |
| 98 | + | not_after: not_after_chrono.to_rfc3339(), | |
| 99 | + | subject, | |
| 100 | + | issuer, | |
| 101 | + | checked_at: checked_at.to_string(), | |
| 102 | + | error: None, | |
| 103 | + | } | |
| 104 | + | } | |
| 105 | + | ||
| 106 | + | fn tls_error(target_name: &str, config: &TlsConfig, checked_at: &str, error: &str) -> TlsStatus { | |
| 107 | + | TlsStatus { | |
| 108 | + | target: target_name.to_string(), | |
| 109 | + | host: config.host.clone(), | |
| 110 | + | port: config.port, | |
| 111 | + | valid: false, | |
| 112 | + | days_remaining: 0, | |
| 113 | + | not_before: String::new(), | |
| 114 | + | not_after: String::new(), | |
| 115 | + | subject: String::new(), | |
| 116 | + | issuer: String::new(), | |
| 117 | + | checked_at: checked_at.to_string(), | |
| 118 | + | error: Some(error.to_string()), | |
| 119 | + | } | |
| 120 | + | } | |
| 121 | + | ||
| 122 | + | #[cfg(test)] | |
| 123 | + | mod tests { | |
| 124 | + | use super::*; | |
| 125 | + | ||
| 126 | + | fn test_config() -> TlsConfig { | |
| 127 | + | TlsConfig { | |
| 128 | + | host: "example.com".to_string(), | |
| 129 | + | port: 443, | |
| 130 | + | warn_days: 14, | |
| 131 | + | } | |
| 132 | + | } | |
| 133 | + | ||
| 134 | + | #[test] | |
| 135 | + | fn parse_leaf_cert_with_invalid_der() { | |
| 136 | + | let config = test_config(); | |
| 137 | + | let result = parse_leaf_cert("test", &config, "2026-03-11T00:00:00Z", b"not-a-cert"); | |
| 138 | + | assert!(!result.valid); | |
| 139 | + | assert!(result.error.as_ref().unwrap().contains("cert parse error")); | |
| 140 | + | } | |
| 141 | + | ||
| 142 | + | #[test] | |
| 143 | + | fn tls_error_populates_all_fields() { | |
| 144 | + | let config = test_config(); | |
| 145 | + | let result = tls_error("test", &config, "2026-03-11T00:00:00Z", "connection refused"); | |
| 146 | + | assert_eq!(result.target, "test"); | |
| 147 | + | assert_eq!(result.host, "example.com"); | |
| 148 | + | assert_eq!(result.port, 443); | |
| 149 | + | assert!(!result.valid); | |
| 150 | + | assert_eq!(result.days_remaining, 0); | |
| 151 | + | assert_eq!(result.error.as_deref(), Some("connection refused")); | |
| 152 | + | } | |
| 153 | + | } |
| @@ -0,0 +1,564 @@ | |||
| 1 | + | //! CLI command handlers for PoM subcommands. | |
| 2 | + | ||
| 3 | + | use clap::Subcommand; | |
| 4 | + | use tracing::info; | |
| 5 | + | ||
| 6 | + | use pom::alerts::Alerter; | |
| 7 | + | use pom::checks::{http, ssh, tls}; | |
| 8 | + | use pom::config::Config; | |
| 9 | + | use pom::db; | |
| 10 | + | use pom::display; | |
| 11 | + | use pom::error::{PomError, Result}; | |
| 12 | + | use pom::peer; | |
| 13 | + | use pom::types::LatencyStats; | |
| 14 | + | use pom::types::TestStaleness; | |
| 15 | + | ||
| 16 | + | #[derive(Subcommand)] | |
| 17 | + | pub(crate) enum HistoryKind { | |
| 18 | + | /// Health check history | |
| 19 | + | Health { | |
| 20 | + | /// Filter by target | |
| 21 | + | target: Option<String>, | |
| 22 | + | /// Number of results | |
| 23 | + | #[arg(short, default_value = "10")] | |
| 24 | + | n: i64, | |
| 25 | + | /// Output as JSON | |
| 26 | + | #[arg(long)] | |
| 27 | + | json: bool, | |
| 28 | + | }, | |
| 29 | + | /// Test run history | |
| 30 | + | Tests { | |
| 31 | + | /// Filter by target | |
| 32 | + | target: Option<String>, | |
| 33 | + | /// Number of results | |
| 34 | + | #[arg(short, default_value = "10")] | |
| 35 | + | n: i64, | |
| 36 | + | /// Output as JSON | |
| 37 | + | #[arg(long)] | |
| 38 | + | json: bool, | |
| 39 | + | }, | |
| 40 | + | } | |
| 41 | + | ||
| 42 | + | pub(crate) async fn cmd_health( | |
| 43 | + | pool: &sqlx::SqlitePool, | |
| 44 | + | config: &Config, | |
| 45 | + | target: Option<&str>, | |
| 46 | + | json: bool, | |
| 47 | + | ) -> Result<()> { | |
| 48 | + | let targets: Vec<String> = match target { | |
| 49 | + | Some(t) => { | |
| 50 | + | if config.get_target(t).is_none() { | |
| 51 | + | eprintln!("Unknown target: {t}"); | |
| 52 | + | std::process::exit(1); | |
| 53 | + | } | |
| 54 | + | vec![t.to_string()] | |
| 55 | + | } | |
| 56 | + | None => config.target_names(), | |
| 57 | + | }; | |
| 58 | + | ||
| 59 | + | let mut snapshots = Vec::new(); | |
| 60 | + | ||
| 61 | + | for name in &targets { | |
| 62 | + | let target_config = config.get_target(name).unwrap(); | |
| 63 | + | if let Some(health_config) = &target_config.health { | |
| 64 | + | let snapshot = http::check_health(name, health_config, health_config.expect.as_ref()).await; | |
| 65 | + | db::insert_health_check(pool, &snapshot).await?; | |
| 66 | + | snapshots.push(snapshot); | |
| 67 | + | } else { | |
| 68 | + | eprintln!("{name}: no health endpoint configured"); | |
| 69 | + | } | |
| 70 | + | } | |
| 71 | + | ||
| 72 | + | if json { | |
| 73 | + | println!("{}", serde_json::to_string_pretty(&snapshots)?); | |
| 74 | + | } else { | |
| 75 | + | print!("{}", display::format_health_snapshots(&snapshots)); | |
| 76 | + | } | |
| 77 | + | ||
| 78 | + | Ok(()) | |
| 79 | + | } | |
| 80 | + | ||
| 81 | + | pub(crate) async fn cmd_test( | |
| 82 | + | pool: &sqlx::SqlitePool, | |
| 83 | + | config: &Config, | |
| 84 | + | target_name: &str, | |
| 85 | + | filter: Option<&str>, | |
| 86 | + | json: bool, | |
| 87 | + | ) -> Result<()> { | |
| 88 | + | let target = config.get_target(target_name).ok_or_else(|| { | |
| 89 | + | PomError::Config(format!("Unknown target: {target_name}")) | |
| 90 | + | })?; | |
| 91 | + | let tests_config = target.tests.as_ref().ok_or_else(|| { | |
| 92 | + | PomError::Config(format!("Target '{target_name}' has no test configuration")) | |
| 93 | + | })?; | |
| 94 | + | ||
| 95 | + | eprintln!("Running tests on {target_name}..."); | |
| 96 | + | let run = ssh::run_tests(target_name, tests_config, filter).await; | |
| 97 | + | db::insert_test_run(pool, &run).await?; | |
| 98 | + | ||
| 99 | + | if json { | |
| 100 | + | let summary = serde_json::json!({ | |
| 101 | + | "target": run.target, | |
| 102 | + | "passed": run.passed, | |
| 103 | + | "exit_code": run.exit_code, | |
| 104 | + | "duration_secs": run.duration_secs, | |
| 105 | + | "started_at": run.started_at, | |
| 106 | + | "finished_at": run.finished_at, | |
| 107 | + | "filter": run.filter, | |
| 108 | + | "summary": run.summary, | |
| 109 | + | }); | |
| 110 | + | println!("{}", serde_json::to_string_pretty(&summary)?); | |
| 111 | + | } else { | |
| 112 | + | print!("{}", display::format_test_result(target_name, &run)); | |
| 113 | + | } | |
| 114 | + | ||
| 115 | + | Ok(()) | |
| 116 | + | } | |
| 117 | + | ||
| 118 | + | pub(crate) async fn cmd_status( | |
| 119 | + | pool: &sqlx::SqlitePool, | |
| 120 | + | config: &Config, | |
| 121 | + | json: bool, | |
| 122 | + | ) -> Result<()> { | |
| 123 | + | let mut target_statuses = Vec::new(); | |
| 124 | + | ||
| 125 | + | for name in config.target_names() { | |
| 126 | + | let target = config.get_target(&name).unwrap(); | |
| 127 | + | let health = db::get_latest_health(pool, &name).await?; | |
| 128 | + | let tls_check = db::get_latest_tls_check(pool, &name).await?; | |
| 129 | + | let test = db::get_latest_test_run(pool, &name).await?; | |
| 130 | + | let incident = db::get_open_incident(pool, &name).await?; | |
| 131 | + | ||
| 132 | + | // Compute 24h latency stats | |
| 133 | + | let latency_24h = { | |
| 134 | + | let cutoff = (chrono::Utc::now() - chrono::Duration::hours(24)).to_rfc3339(); | |
| 135 | + | let times = db::get_response_times(pool, &name, &cutoff).await.unwrap_or_default(); | |
| 136 | + | let operational_times: Vec<i64> = times.iter() | |
| 137 | + | .filter(|(_, ms)| *ms > 0) | |
| 138 | + | .map(|(_, ms)| *ms) | |
| 139 | + | .collect(); | |
| 140 | + | LatencyStats::from_times(&operational_times) | |
| 141 | + | }; | |
| 142 | + | ||
| 143 | + | // Compute test staleness | |
| 144 | + | let staleness: Option<TestStaleness> = if let Some(tests_config) = &target.tests { | |
| 145 | + | let current_version = health.as_ref() | |
| 146 | + | .and_then(|h| h.details.as_ref()) | |
| 147 | + | .and_then(|d| d.version.clone()); | |
| 148 | + | ||
| 149 | + | let tested_version = if let Some(ref t) = test { | |
| 150 | + | db::get_version_at_time(pool, &name, &t.started_at).await.unwrap_or(None) | |
| 151 | + | } else { | |
| 152 | + | None | |
| 153 | + | }; | |
| 154 | + | ||
| 155 | + | Some(http::compute_test_staleness( | |
| 156 | + | current_version.as_deref(), | |
| 157 | + | tested_version.as_deref(), | |
| 158 | + | test.as_ref().map(|t| t.started_at.as_str()), | |
| 159 | + | tests_config.staleness_days, | |
| 160 | + | )) | |
| 161 | + | } else { | |
| 162 | + | None | |
| 163 | + | }; | |
| 164 | + | ||
| 165 | + | if json { | |
| 166 | + | target_statuses.push(serde_json::json!({ | |
| 167 | + | "target": name, | |
| 168 | + | "label": target.label, | |
| 169 | + | "health": health, | |
| 170 | + | "tls": tls_check, | |
| 171 | + | "latency_24h": latency_24h, | |
| 172 | + | "last_test": test.map(|t| serde_json::json!({ | |
| 173 | + | "passed": t.passed, | |
| 174 | + | "exit_code": t.exit_code, | |
| 175 | + | "duration_secs": t.duration_secs, | |
| 176 | + | "started_at": t.started_at, | |
| 177 | + | "summary": t.summary, | |
| 178 | + | })), | |
| 179 | + | "test_staleness": staleness, | |
| 180 | + | "incident": incident, | |
| 181 | + | })); | |
| 182 | + | } else { | |
| 183 | + | print!( | |
| 184 | + | "{}", | |
| 185 | + | display::format_status_target( | |
| 186 | + | &name, | |
| 187 | + | &target.label, | |
| 188 | + | health.as_ref(), | |
| 189 | + | latency_24h.as_ref(), | |
| 190 | + | tls_check.as_ref(), | |
| 191 | + | test.as_ref(), | |
| 192 | + | staleness.as_ref(), | |
| 193 | + | incident.as_ref(), | |
| 194 | + | ) | |
| 195 | + | ); | |
| 196 | + | } | |
| 197 | + | } | |
| 198 | + | ||
| 199 | + | if json { | |
| 200 | + | println!("{}", serde_json::to_string_pretty(&target_statuses)?); | |
| 201 | + | } | |
| 202 | + | ||
| 203 | + | Ok(()) | |
| 204 | + | } | |
| 205 | + | ||
| 206 | + | pub(crate) async fn cmd_history( | |
| 207 | + | pool: &sqlx::SqlitePool, | |
| 208 | + | kind: HistoryKind, | |
| 209 | + | ) -> Result<()> { | |
| 210 | + | match kind { | |
| 211 | + | HistoryKind::Health { target, n, json } => { | |
| 212 | + | let history = db::get_health_history(pool, target.as_deref(), n).await?; | |
| 213 | + | if json { | |
| 214 | + | println!("{}", serde_json::to_string_pretty(&history)?); | |
| 215 | + | } else { | |
| 216 | + | print!("{}", display::format_health_history(&history)); | |
| 217 | + | } | |
| 218 | + | } | |
| 219 | + | HistoryKind::Tests { target, n, json } => { | |
| 220 | + | let history = db::get_test_history(pool, target.as_deref(), n).await?; | |
| 221 | + | if json { | |
| 222 | + | let summaries: Vec<serde_json::Value> = history | |
| 223 | + | .iter() | |
| 224 | + | .map(|r| serde_json::json!({ | |
| 225 | + | "id": r.id, | |
| 226 | + | "target": r.target, | |
| 227 | + | "passed": r.passed, | |
| 228 | + | "exit_code": r.exit_code, | |
| 229 | + | "duration_secs": r.duration_secs, | |
| 230 | + | "started_at": r.started_at, | |
| 231 | + | "summary": r.summary, | |
| 232 | + | })) | |
| 233 | + | .collect(); | |
| 234 | + | println!("{}", serde_json::to_string_pretty(&summaries)?); | |
| 235 | + | } else { | |
| 236 | + | print!("{}", display::format_test_history(&history)); | |
| 237 | + | } | |
| 238 | + | } | |
| 239 | + | } | |
| 240 | + | ||
| 241 | + | Ok(()) | |
| 242 | + | } | |
| 243 | + | ||
| 244 | + | pub(crate) async fn cmd_prune( | |
| 245 | + | pool: &sqlx::SqlitePool, | |
| 246 | + | days: i64, | |
| 247 | + | ) -> Result<()> { | |
| 248 | + | let (health_pruned, test_pruned, heartbeat_pruned, alerts_pruned, tls_pruned, incidents_pruned) = db::prune_old_records(pool, days).await?; | |
| 249 | + | print!( | |
| 250 | + | "{}", | |
| 251 | + | display::format_prune(health_pruned, test_pruned, heartbeat_pruned, alerts_pruned, tls_pruned, incidents_pruned, days), | |
| 252 | + | ); | |
| 253 | + | Ok(()) | |
| 254 | + | } | |
| 255 | + | ||
| 256 | + | pub(crate) async fn cmd_serve( | |
| 257 | + | pool: &sqlx::SqlitePool, | |
| 258 | + | config: &Config, | |
| 259 | + | ) -> Result<()> { | |
| 260 | + | let default_interval = config.serve.interval_secs; | |
| 261 | + | let prune_days = config.serve.prune_days; | |
| 262 | + | let listen_addr = config.serve.listen.clone(); | |
| 263 | + | ||
| 264 | + | // --- Instance identity --- | |
| 265 | + | let instance_id = peer::load_or_create_instance_id(config.instance.id.as_deref())?; | |
| 266 | + | let instance_name = config.instance_name(); | |
| 267 | + | let instance_info = peer::InstanceInfo { | |
| 268 | + | id: instance_id.clone(), | |
| 269 | + | name: instance_name.clone(), | |
| 270 | + | version: env!("CARGO_PKG_VERSION").to_string(), | |
| 271 | + | targets: config.target_names(), | |
| 272 | + | started_at: chrono::Utc::now().to_rfc3339(), | |
| 273 | + | }; | |
| 274 | + | ||
| 275 | + | // --- Alerter --- | |
| 276 | + | let alerter = config.alerts.as_ref().map(|alert_config| { | |
| 277 | + | info!("Alerts enabled (to: {})", alert_config.to); | |
| 278 | + | Alerter::new(alert_config.clone(), pool.clone(), instance_name.clone()) | |
| 279 | + | }); | |
| 280 | + | ||
| 281 | + | info!("Instance: {instance_name} (id={instance_id})"); | |
| 282 | + | info!("Starting serve mode (default interval: {default_interval}s, prune: {prune_days}d)"); | |
| 283 | + | ||
| 284 | + | // --- Mesh state --- | |
| 285 | + | let mesh = peer::new_mesh_state(instance_info, &config.peers); | |
| 286 | + | ||
| 287 | + | // Load known peer identities from DB | |
| 288 | + | { | |
| 289 | + | let mut state = mesh.write().await; | |
| 290 | + | for (name, peer) in state.peers.iter_mut() { | |
| 291 | + | if let Ok(Some(known_id)) = db::get_peer_identity(pool, name).await { | |
| 292 | + | peer.known_id = Some(known_id); | |
| 293 | + | } | |
| 294 | + | } | |
| 295 | + | } | |
| 296 | + | ||
| 297 | + | // Spawn a health check task per target | |
| 298 | + | let mut handles = Vec::new(); | |
| 299 | + | ||
| 300 | + | for name in config.target_names() { | |
| 301 | + | let target_config = config.get_target(&name).unwrap().clone(); | |
| 302 | + | if let Some(health_config) = target_config.health { | |
| 303 | + | let interval_secs = health_config.interval_secs.unwrap_or(default_interval); | |
| 304 | + | let pool = pool.clone(); | |
| 305 | + | let name = name.clone(); | |
| 306 | + | let label = target_config.label.clone(); | |
| 307 | + | let alerter = alerter.clone(); | |
| 308 | + | ||
| 309 | + | info!("{name}: health check every {interval_secs}s"); | |
| 310 | + | ||
| 311 | + | let trending_config = health_config.trending.clone(); | |
| 312 | + | ||
| 313 | + | handles.push(tokio::spawn(async move { | |
| 314 | + | let mut interval = tokio::time::interval( | |
| 315 | + | std::time::Duration::from_secs(interval_secs), | |
| 316 | + | ); | |
| 317 | + | let expect = health_config.expect.as_ref(); | |
| 318 | + | let mut in_drift = false; | |
| 319 | + | loop { | |
| 320 | + | interval.tick().await; | |
| 321 | + | let previous = db::get_latest_health(&pool, &name).await.ok().flatten(); | |
| 322 | + | let snapshot = http::check_health(&name, &health_config, expect).await; | |
| 323 | + | info!("{}: {} ({}ms)", name, snapshot.status, snapshot.response_time_ms); | |
| 324 | + | if let Err(e) = db::insert_health_check(&pool, &snapshot).await { | |
| 325 | + | tracing::error!("{name}: failed to store health check: {e}"); | |
| 326 | + | } | |
| 327 | + | ||
| 328 | + | // Fire alerts on status transitions | |
| 329 | + | if let Some(ref alerter) = alerter | |
| 330 | + | && let Some(ref prev) = previous | |
| 331 | + | && prev.status != snapshot.status | |
| 332 | + | { | |
| 333 | + | let from = prev.status.to_string(); | |
| 334 | + | let to = snapshot.status.to_string(); | |
| 335 | + | if snapshot.status == pom::types::HealthStatus::Operational { | |
| 336 | + | alerter.send_health_recovery(&name, &label, &from).await; | |
| 337 | + | } else { | |
| 338 | + | alerter.send_health_alert( | |
| 339 | + | &name, | |
| 340 | + | &label, | |
| 341 | + | &from, | |
| 342 | + | &to, | |
| 343 | + | snapshot.error.as_deref(), | |
| 344 | + | ).await; | |
| 345 | + | } | |
| 346 | + | } | |
| 347 | + | ||
| 348 | + | // Track incidents on status transitions | |
| 349 | + | if let Some(ref prev) = previous | |
| 350 | + | && prev.status != snapshot.status | |
| 351 | + | { | |
| 352 | + | let prev_op = prev.status == pom::types::HealthStatus::Operational; | |
| 353 | + | let now_op = snapshot.status == pom::types::HealthStatus::Operational; | |
| 354 | + | ||
| 355 | + | if prev_op && !now_op { | |
| 356 | + | // Was operational, now unhealthy — open incident | |
| 357 | + | if let Err(e) = db::insert_incident(&pool, &name, &prev.status.to_string(), &snapshot.status.to_string()).await { | |
| 358 | + | tracing::error!("{name}: failed to open incident: {e}"); | |
| 359 | + | } | |
| 360 | + | } else if !prev_op && now_op { | |
| 361 | + | // Was unhealthy, now operational — close incidents | |
| 362 | + | if let Err(e) = db::close_open_incidents(&pool, &name).await { | |
| 363 | + | tracing::error!("{name}: failed to close incidents: {e}"); | |
| 364 | + | } | |
| 365 | + | } else { | |
| 366 | + | // Status changed between non-operational states — close old, open new | |
| 367 | + | if let Err(e) = db::close_open_incidents(&pool, &name).await { | |
| 368 | + | tracing::error!("{name}: failed to close incidents: {e}"); | |
| 369 | + | } | |
| 370 | + | if let Err(e) = db::insert_incident(&pool, &name, &prev.status.to_string(), &snapshot.status.to_string()).await { | |
| 371 | + | tracing::error!("{name}: failed to open incident: {e}"); | |
| 372 | + | } | |
| 373 | + | } | |
| 374 | + | } | |
| 375 | + | ||
| 376 | + | // Latency drift detection | |
| 377 | + | if let Some(ref trending) = trending_config | |
| 378 | + | && snapshot.status == pom::types::HealthStatus::Operational | |
| 379 | + | { | |
| 380 | + | let baseline_cutoff = (chrono::Utc::now() | |
| 381 | + | - chrono::Duration::hours(trending.baseline_window_hours as i64)) | |
| 382 | + | .to_rfc3339(); | |
| 383 | + | let baseline_data = db::get_response_times(&pool, &name, &baseline_cutoff) | |
| 384 | + | .await | |
| 385 | + | .unwrap_or_default(); | |
| 386 | + | let operational_times: Vec<i64> = baseline_data.iter() | |
| 387 | + | .filter(|(_, ms)| *ms > 0) | |
| 388 | + | .map(|(_, ms)| *ms) | |
| 389 | + | .collect(); | |
| 390 | + | let baseline = LatencyStats::from_times(&operational_times); | |
| 391 | + | let recent = db::get_recent_response_times(&pool, &name, 3) | |
| 392 | + | .await | |
| 393 | + | .unwrap_or_default(); | |
| 394 | + | ||
| 395 | + | if let Some(ref bl) = baseline { | |
| 396 | + | if let Some(msg) = http::detect_latency_drift(&recent, bl, trending.spike_threshold) { | |
| 397 | + | if !in_drift { | |
| 398 | + | info!("{name}: {msg}"); | |
| 399 | + | if let Some(ref alerter) = alerter { | |
| 400 | + | alerter.send_latency_drift_alert(&name, &label, &msg).await; | |
| 401 | + | } | |
| 402 | + | in_drift = true; | |
| 403 | + | } | |
| 404 | + | } else if in_drift { | |
| 405 | + | info!("{name}: latency drift recovered"); | |
| 406 | + | if let Some(ref alerter) = alerter { | |
| 407 | + | alerter.send_latency_recovery(&name, &label).await; | |
| 408 | + | } | |
| 409 | + | in_drift = false; | |
| 410 | + | } | |
| 411 | + | } | |
| 412 | + | } | |
| 413 | + | } | |
| 414 | + | })); | |
| 415 | + | } | |
| 416 | + | } | |
| 417 | + | ||
| 418 | + | // Spawn TLS check tasks | |
| 419 | + | let tls_interval_secs = config.serve.tls_check_interval_secs; | |
| 420 | + | for name in config.target_names() { | |
| 421 | + | let target_config = config.get_target(&name).unwrap().clone(); | |
| 422 | + | if let Some(tls_config) = target_config.tls { | |
| 423 | + | let pool = pool.clone(); | |
| 424 | + | let name = name.clone(); | |
| 425 | + | let label = target_config.label.clone(); | |
| 426 | + | let alerter = alerter.clone(); | |
| 427 | + | let warn_days = tls_config.warn_days; | |
| 428 | + | ||
| 429 | + | info!("{name}: TLS check every {tls_interval_secs}s (host={})", tls_config.host); | |
| 430 | + | ||
| 431 | + | handles.push(tokio::spawn(async move { | |
| 432 | + | let mut interval = tokio::time::interval( | |
| 433 | + | std::time::Duration::from_secs(tls_interval_secs), | |
| 434 | + | ); | |
| 435 | + | loop { | |
| 436 | + | interval.tick().await; | |
| 437 | + | let previous = db::get_latest_tls_check(&pool, &name).await.ok().flatten(); | |
| 438 | + | let status = tls::check_tls(&name, &tls_config).await; | |
| 439 | + | info!("{}: TLS {} — {}d remaining", name, if status.valid { "valid" } else { "invalid" }, status.days_remaining); | |
| 440 | + | if let Err(e) = db::insert_tls_check(&pool, &status).await { | |
| 441 | + | tracing::error!("{name}: failed to store TLS check: {e}"); | |
| 442 | + | } | |
| 443 | + | ||
| 444 | + | // Fire alerts on TLS state transitions | |
| 445 | + | if let Some(ref alerter) = alerter { | |
| 446 | + | let was_ok = previous.as_ref().is_none_or(|p| p.valid && p.error.is_none()); | |
| 447 | + | let now_warn = status.valid && status.days_remaining <= warn_days as i64; | |
| 448 | + | let now_error = !status.valid || status.error.is_some(); | |
| 449 | + | ||
| 450 | + | if was_ok && now_error { | |
| 451 | + | alerter.send_tls_error_alert( | |
| 452 | + | &name, | |
| 453 | + | &tls_config.host, | |
| 454 | + | status.error.as_deref().unwrap_or("certificate invalid"), | |
| 455 | + | ).await; | |
| 456 | + | } else if was_ok && now_warn { | |
| 457 | + | alerter.send_tls_expiry_alert( | |
| 458 | + | &name, | |
| 459 | + | &tls_config.host, | |
| 460 | + | status.days_remaining, | |
| 461 | + | &status.not_after, | |
| 462 | + | ).await; | |
| 463 | + | } else if let Some(ref prev) = previous { | |
| 464 | + | let was_bad = !prev.valid || prev.error.is_some() || prev.days_remaining <= warn_days as i64; | |
| 465 | + | let now_ok = status.valid && status.error.is_none() && status.days_remaining > warn_days as i64; | |
| 466 | + | if was_bad && now_ok { | |
| 467 | + | alerter.send_tls_recovery( | |
| 468 | + | &name, | |
| 469 | + | &label, | |
| 470 | + | status.days_remaining, | |
| 471 | + | ).await; | |
| 472 | + | } | |
| 473 | + | } | |
| 474 | + | } | |
| 475 | + | } | |
| 476 | + | })); | |
| 477 | + | } | |
| 478 | + | } | |
| 479 | + | ||
| 480 | + | // Spawn daily prune task | |
| 481 | + | let prune_pool = pool.clone(); | |
| 482 | + | handles.push(tokio::spawn(async move { | |
| 483 | + | let mut interval = tokio::time::interval( | |
| 484 | + | std::time::Duration::from_secs(86400), | |
| 485 | + | ); | |
| 486 | + | loop { | |
| 487 | + | interval.tick().await; | |
| 488 | + | match db::prune_old_records(&prune_pool, prune_days).await { | |
| 489 | + | Ok((h, t, p, a, tl, inc)) => info!("Pruned {h} health checks, {t} test runs, {p} peer heartbeats, {a} alerts, {tl} TLS checks, {inc} incidents"), | |
| 490 | + | Err(e) => tracing::error!("Prune failed: {e}"), | |
| 491 | + | } | |
| 492 | + | } | |
| 493 | + | })); | |
| 494 | + | ||
| 495 | + | // Spawn peer heartbeat tasks | |
| 496 | + | if !config.peers.is_empty() { | |
| 497 | + | let heartbeat_secs = config.serve.peer_heartbeat_secs; | |
| 498 | + | info!("Peer mesh: {} peers, heartbeat every {heartbeat_secs}s", config.peers.len()); | |
| 499 | + | let hb_handles = peer::spawn_heartbeat_tasks( | |
| 500 | + | mesh.clone(), |
Lines truncated
| @@ -1,13 +1,47 @@ | |||
| 1 | + | //! TOML configuration loading and types. | |
| 2 | + | ||
| 1 | 3 | use serde::Deserialize; | |
| 2 | 4 | use std::collections::HashMap; | |
| 3 | 5 | use std::path::{Path, PathBuf}; | |
| 4 | 6 | ||
| 7 | + | use crate::error::{PomError, Result}; | |
| 8 | + | use crate::peer::OnMissing; | |
| 9 | + | ||
| 5 | 10 | #[derive(Debug, Clone, Deserialize)] | |
| 6 | 11 | pub struct Config { | |
| 7 | 12 | #[serde(default)] | |
| 8 | 13 | pub serve: ServeConfig, | |
| 9 | 14 | #[serde(default)] | |
| 15 | + | pub instance: InstanceConfig, | |
| 16 | + | #[serde(default)] | |
| 10 | 17 | pub targets: HashMap<String, TargetConfig>, | |
| 18 | + | #[serde(default)] | |
| 19 | + | pub peers: HashMap<String, PeerConfig>, | |
| 20 | + | pub alerts: Option<AlertConfig>, | |
| 21 | + | } | |
| 22 | + | ||
| 23 | + | #[derive(Debug, Clone, Deserialize)] | |
| 24 | + | pub struct AlertConfig { | |
| 25 | + | pub postmark_token: Option<String>, | |
| 26 | + | pub to: String, | |
| 27 | + | #[serde(default = "default_alert_from")] | |
| 28 | + | pub from: String, | |
| 29 | + | #[serde(default = "default_cooldown_secs")] | |
| 30 | + | pub cooldown_secs: u64, | |
| 31 | + | } | |
| 32 | + | ||
| 33 | + | #[derive(Debug, Clone, Default, Deserialize)] | |
| 34 | + | pub struct InstanceConfig { | |
| 35 | + | pub name: Option<String>, | |
| 36 | + | pub id: Option<String>, | |
| 37 | + | } | |
| 38 | + | ||
| 39 | + | #[derive(Debug, Clone, Deserialize)] | |
| 40 | + | pub struct PeerConfig { | |
| 41 | + | pub address: String, | |
| 42 | + | #[serde(default)] | |
| 43 | + | pub on_missing: OnMissing, | |
| 44 | + | pub grace_count: Option<u32>, | |
| 11 | 45 | } | |
| 12 | 46 | ||
| 13 | 47 | #[derive(Debug, Clone, Deserialize)] | |
| @@ -16,6 +50,12 @@ pub struct ServeConfig { | |||
| 16 | 50 | pub interval_secs: u64, | |
| 17 | 51 | #[serde(default = "default_prune_days")] | |
| 18 | 52 | pub prune_days: i64, | |
| 53 | + | #[serde(default = "default_listen")] | |
| 54 | + | pub listen: String, | |
| 55 | + | #[serde(default = "default_peer_heartbeat")] | |
| 56 | + | pub peer_heartbeat_secs: u64, | |
| 57 | + | #[serde(default = "default_tls_check_interval")] | |
| 58 | + | pub tls_check_interval_secs: u64, | |
| 19 | 59 | } | |
| 20 | 60 | ||
| 21 | 61 | impl Default for ServeConfig { | |
| @@ -23,10 +63,21 @@ impl Default for ServeConfig { | |||
| 23 | 63 | Self { | |
| 24 | 64 | interval_secs: 300, | |
| 25 | 65 | prune_days: 30, | |
| 66 | + | listen: default_listen(), | |
| 67 | + | peer_heartbeat_secs: 60, | |
| 68 | + | tls_check_interval_secs: 3600, | |
| 26 | 69 | } | |
| 27 | 70 | } | |
| 28 | 71 | } | |
| 29 | 72 | ||
| 73 | + | fn default_peer_heartbeat() -> u64 { | |
| 74 | + | 60 | |
| 75 | + | } | |
| 76 | + | ||
| 77 | + | fn default_tls_check_interval() -> u64 { | |
| 78 | + | 3600 | |
| 79 | + | } | |
| 80 | + | ||
| 30 | 81 | fn default_serve_interval() -> u64 { | |
| 31 | 82 | 300 | |
| 32 | 83 | } | |
| @@ -35,11 +86,33 @@ fn default_prune_days() -> i64 { | |||
| 35 | 86 | 30 | |
| 36 | 87 | } | |
| 37 | 88 | ||
| 89 | + | fn default_listen() -> String { | |
| 90 | + | "127.0.0.1:9100".to_string() | |
| 91 | + | } | |
| 92 | + | ||
| 38 | 93 | #[derive(Debug, Clone, Deserialize)] | |
| 39 | 94 | pub struct TargetConfig { | |
| 40 | 95 | pub label: String, | |
| 41 | 96 | pub health: Option<HealthConfig>, | |
| 42 | 97 | pub tests: Option<TestsConfig>, | |
| 98 | + | pub tls: Option<TlsConfig>, | |
| 99 | + | } | |
| 100 | + | ||
| 101 | + | #[derive(Debug, Clone, Deserialize)] | |
| 102 | + | pub struct TlsConfig { | |
| 103 | + | pub host: String, | |
| 104 | + | #[serde(default = "default_tls_port")] | |
| 105 | + | pub port: u16, | |
| 106 | + | #[serde(default = "default_tls_warn_days")] | |
| 107 | + | pub warn_days: u32, | |
| 108 | + | } | |
| 109 | + | ||
| 110 | + | fn default_tls_port() -> u16 { | |
| 111 | + | 443 | |
| 112 | + | } | |
| 113 | + | ||
| 114 | + | fn default_tls_warn_days() -> u32 { | |
| 115 | + | 14 | |
| 43 | 116 | } | |
| 44 | 117 | ||
| 45 | 118 | #[derive(Debug, Clone, Deserialize)] | |
| @@ -49,6 +122,34 @@ pub struct HealthConfig { | |||
| 49 | 122 | pub timeout_secs: u64, | |
| 50 | 123 | /// Per-target interval override for serve mode | |
| 51 | 124 | pub interval_secs: Option<u64>, | |
| 125 | + | /// Response validation expectations | |
| 126 | + | pub expect: Option<HealthExpectation>, | |
| 127 | + | /// Latency trending and drift detection | |
| 128 | + | pub trending: Option<TrendingConfig>, | |
| 129 | + | } | |
| 130 | + | ||
| 131 | + | #[derive(Debug, Clone, Deserialize)] | |
| 132 | + | pub struct TrendingConfig { | |
| 133 | + | #[serde(default = "default_baseline_window_hours")] | |
| 134 | + | pub baseline_window_hours: u64, | |
| 135 | + | #[serde(default = "default_spike_threshold")] | |
| 136 | + | pub spike_threshold: f64, | |
| 137 | + | } | |
| 138 | + | ||
| 139 | + | fn default_baseline_window_hours() -> u64 { | |
| 140 | + | 168 | |
| 141 | + | } | |
| 142 | + | ||
| 143 | + | fn default_spike_threshold() -> f64 { | |
| 144 | + | 2.0 | |
| 145 | + | } | |
| 146 | + | ||
| 147 | + | #[derive(Debug, Clone, Deserialize, Default)] | |
| 148 | + | pub struct HealthExpectation { | |
| 149 | + | pub status_code: Option<u16>, | |
| 150 | + | #[serde(default)] | |
| 151 | + | pub json_fields: HashMap<String, String>, | |
| 152 | + | pub body_contains: Option<String>, | |
| 52 | 153 | } | |
| 53 | 154 | ||
| 54 | 155 | #[derive(Debug, Clone, Deserialize)] | |
| @@ -57,6 +158,12 @@ pub struct TestsConfig { | |||
| 57 | 158 | pub command: String, | |
| 58 | 159 | #[serde(default = "default_test_timeout")] | |
| 59 | 160 | pub timeout_secs: u64, | |
| 161 | + | #[serde(default = "default_staleness_days")] | |
| 162 | + | pub staleness_days: u64, | |
| 163 | + | } | |
| 164 | + | ||
| 165 | + | fn default_staleness_days() -> u64 { | |
| 166 | + | 7 | |
| 60 | 167 | } | |
| 61 | 168 | ||
| 62 | 169 | fn default_health_timeout() -> u64 { | |
| @@ -67,15 +174,26 @@ fn default_test_timeout() -> u64 { | |||
| 67 | 174 | 600 | |
| 68 | 175 | } | |
| 69 | 176 | ||
| 177 | + | fn default_alert_from() -> String { | |
| 178 | + | "PoM Alerts <pom-alerts@makenot.work>".to_string() | |
| 179 | + | } | |
| 180 | + | ||
| 181 | + | fn default_cooldown_secs() -> u64 { | |
| 182 | + | 300 | |
| 183 | + | } | |
| 184 | + | ||
| 70 | 185 | impl Config { | |
| 71 | - | pub fn load(path: Option<&Path>) -> Result<Self, Box<dyn std::error::Error + Send + Sync>> { | |
| 186 | + | pub fn load(path: Option<&Path>) -> Result<Self> { | |
| 72 | 187 | let config_path = match path { | |
| 73 | 188 | Some(p) => p.to_path_buf(), | |
| 74 | 189 | None => default_config_path()?, | |
| 75 | 190 | }; | |
| 76 | 191 | ||
| 77 | 192 | if !config_path.exists() { | |
| 78 | - | return Err(format!("Config file not found: {}", config_path.display()).into()); | |
| 193 | + | return Err(PomError::Config(format!( | |
| 194 | + | "Config file not found: {}", | |
| 195 | + | config_path.display() | |
| 196 | + | ))); | |
| 79 | 197 | } | |
| 80 | 198 | ||
| 81 | 199 | let contents = std::fs::read_to_string(&config_path)?; | |
| @@ -92,16 +210,318 @@ impl Config { | |||
| 92 | 210 | names.sort(); | |
| 93 | 211 | names | |
| 94 | 212 | } | |
| 213 | + | ||
| 214 | + | pub fn instance_name(&self) -> String { | |
| 215 | + | self.instance | |
| 216 | + | .name | |
| 217 | + | .clone() | |
| 218 | + | .unwrap_or_else(|| hostname::get().map(|h| h.to_string_lossy().into_owned()).unwrap_or_else(|_| "unknown".to_string())) | |
| 219 | + | } | |
| 95 | 220 | } | |
| 96 | 221 | ||
| 97 | - | pub fn default_config_path() -> Result<PathBuf, Box<dyn std::error::Error + Send + Sync>> { | |
| 98 | - | let config_dir = dirs::config_dir().ok_or("Could not determine config directory")?; | |
| 99 | - | Ok(config_dir.join("pom").join("pom.toml")) | |
| 222 | + | pub fn default_config_path() -> Result<PathBuf> { | |
| 223 | + | let config_dir = | |
| 224 | + | dirs::config_dir().ok_or_else(|| PomError::Config("Could not determine config directory".into())); | |
| 225 | + | Ok(config_dir?.join("pom").join("pom.toml")) | |
| 100 | 226 | } | |
| 101 | 227 | ||
| 102 | - | pub fn db_path() -> Result<PathBuf, Box<dyn std::error::Error + Send + Sync>> { | |
| 103 | - | let data_dir = dirs::data_local_dir().ok_or("Could not determine data directory")?; | |
| 104 | - | let pom_dir = data_dir.join("pom"); | |
| 228 | + | pub fn db_path() -> Result<PathBuf> { | |
| 229 | + | let data_dir = | |
| 230 | + | dirs::data_local_dir().ok_or_else(|| PomError::Config("Could not determine data directory".into())); | |
| 231 | + | let pom_dir = data_dir?.join("pom"); | |
| 105 | 232 | std::fs::create_dir_all(&pom_dir)?; | |
| 106 | 233 | Ok(pom_dir.join("pom.db")) | |
| 107 | 234 | } | |
| 235 | + | ||
| 236 | + | #[cfg(test)] | |
| 237 | + | mod tests { | |
| 238 | + | use super::*; | |
| 239 | + | ||
| 240 | + | #[test] | |
| 241 | + | fn parse_full_config() { | |
| 242 | + | let toml = r#" | |
| 243 | + | [serve] | |
| 244 | + | interval_secs = 120 | |
| 245 | + | listen = "0.0.0.0:9100" | |
| 246 | + | peer_heartbeat_secs = 30 | |
| 247 | + | ||
| 248 | + | [instance] | |
| 249 | + | name = "hetzner" | |
| 250 | + | ||
| 251 | + | [targets.mnw] | |
| 252 | + | label = "MakeNotWork" | |
| 253 | + | [targets.mnw.health] | |
| 254 | + | url = "https://makenot.work/health" | |
| 255 | + | timeout_secs = 5 | |
| 256 | + | [targets.mnw.tests] | |
| 257 | + | ssh = "hetzner" | |
| 258 | + | command = "cd /srv/mnw && ./ci.sh" | |
| 259 | + | ||
| 260 | + | [peers.astra] | |
| 261 | + | address = "100.0.0.1:9100" | |
| 262 | + | on_missing = "alert" | |
| 263 | + | grace_count = 5 | |
| 264 | + | "#; | |
| 265 | + | ||
| 266 | + | let config: Config = toml::from_str(toml).unwrap(); | |
| 267 | + | assert_eq!(config.serve.interval_secs, 120); | |
| 268 | + | assert_eq!(config.serve.listen, "0.0.0.0:9100"); | |
| 269 | + | assert_eq!(config.serve.peer_heartbeat_secs, 30); | |
| 270 | + | assert_eq!(config.instance.name.as_deref(), Some("hetzner")); | |
| 271 | + | assert_eq!(config.target_names(), vec!["mnw"]); | |
| 272 | + | ||
| 273 | + | let mnw = config.get_target("mnw").unwrap(); | |
| 274 | + | assert_eq!(mnw.label, "MakeNotWork"); | |
| 275 | + | assert_eq!(mnw.health.as_ref().unwrap().timeout_secs, 5); | |
| 276 | + | assert_eq!(mnw.tests.as_ref().unwrap().ssh, "hetzner"); | |
| 277 | + | ||
| 278 | + | let astra = config.peers.get("astra").unwrap(); | |
| 279 | + | assert_eq!(astra.address, "100.0.0.1:9100"); | |
| 280 | + | assert_eq!(astra.on_missing, OnMissing::Alert); | |
| 281 | + | assert_eq!(astra.grace_count, Some(5)); | |
| 282 | + | } | |
| 283 | + | ||
| 284 | + | #[test] | |
| 285 | + | fn empty_config_uses_defaults() { | |
| 286 | + | let config: Config = toml::from_str("").unwrap(); | |
| 287 | + | assert_eq!(config.serve.interval_secs, 300); | |
| 288 | + | assert_eq!(config.serve.prune_days, 30); | |
| 289 | + | assert_eq!(config.serve.listen, "127.0.0.1:9100"); | |
| 290 | + | assert_eq!(config.serve.peer_heartbeat_secs, 60); | |
| 291 | + | assert!(config.targets.is_empty()); | |
| 292 | + | assert!(config.peers.is_empty()); | |
| 293 | + | assert!(config.instance.name.is_none()); | |
| 294 | + | } | |
| 295 | + | ||
| 296 | + | #[test] | |
| 297 | + | fn peer_on_missing_defaults_to_log() { | |
| 298 | + | let toml = r#" | |
| 299 | + | [peers.test] | |
| 300 | + | address = "10.0.0.1:9100" | |
| 301 | + | "#; | |
| 302 | + | let config: Config = toml::from_str(toml).unwrap(); | |
| 303 | + | let peer = config.peers.get("test").unwrap(); | |
| 304 | + | assert_eq!(peer.on_missing, OnMissing::Log); | |
| 305 | + | assert_eq!(peer.grace_count, None); | |
| 306 | + | } | |
| 307 | + | ||
| 308 | + | #[test] | |
| 309 | + | fn instance_name_falls_back_to_hostname() { | |
| 310 | + | let config: Config = toml::from_str("").unwrap(); | |
| 311 | + | let name = config.instance_name(); | |
| 312 | + | assert!(!name.is_empty()); | |
| 313 | + | } | |
| 314 | + | ||
| 315 | + | #[test] | |
| 316 | + | fn config_without_alerts_section() { | |
| 317 | + | let config: Config = toml::from_str("").unwrap(); | |
| 318 | + | assert!(config.alerts.is_none()); | |
| 319 | + | } | |
| 320 | + | ||
| 321 | + | #[test] | |
| 322 | + | fn config_with_alerts_section() { | |
| 323 | + | let toml = r#" | |
| 324 | + | [alerts] | |
| 325 | + | postmark_token = "test-token" | |
| 326 | + | to = "alerts@example.com" | |
| 327 | + | "#; | |
| 328 | + | let config: Config = toml::from_str(toml).unwrap(); | |
| 329 | + | let alerts = config.alerts.unwrap(); | |
| 330 | + | assert_eq!(alerts.postmark_token.as_deref(), Some("test-token")); | |
| 331 | + | assert_eq!(alerts.to, "alerts@example.com"); | |
| 332 | + | assert_eq!(alerts.from, "PoM Alerts <pom-alerts@makenot.work>"); | |
| 333 | + | assert_eq!(alerts.cooldown_secs, 300); | |
| 334 | + | } | |
| 335 | + | ||
| 336 | + | #[test] | |
| 337 | + | fn config_with_tls() { | |
| 338 | + | let toml = r#" | |
| 339 | + | [targets.mnw] | |
| 340 | + | label = "MakeNotWork" | |
| 341 | + | [targets.mnw.tls] | |
| 342 | + | host = "makenot.work" | |
| 343 | + | port = 8443 | |
| 344 | + | warn_days = 30 | |
| 345 | + | "#; | |
| 346 | + | let config: Config = toml::from_str(toml).unwrap(); | |
| 347 | + | let mnw = config.get_target("mnw").unwrap(); | |
| 348 | + | let tls = mnw.tls.as_ref().unwrap(); | |
| 349 | + | assert_eq!(tls.host, "makenot.work"); | |
| 350 | + | assert_eq!(tls.port, 8443); | |
| 351 | + | assert_eq!(tls.warn_days, 30); | |
| 352 | + | } | |
| 353 | + | ||
| 354 | + | #[test] | |
| 355 | + | fn config_tls_defaults() { | |
| 356 | + | let toml = r#" | |
| 357 | + | [targets.mnw] | |
| 358 | + | label = "MakeNotWork" | |
| 359 | + | [targets.mnw.tls] | |
| 360 | + | host = "makenot.work" | |
| 361 | + | "#; | |
| 362 | + | let config: Config = toml::from_str(toml).unwrap(); | |
| 363 | + | let tls = config.get_target("mnw").unwrap().tls.as_ref().unwrap(); | |
| 364 | + | assert_eq!(tls.port, 443); | |
| 365 | + | assert_eq!(tls.warn_days, 14); | |
| 366 | + | } | |
| 367 | + | ||
| 368 | + | #[test] | |
| 369 | + | fn config_without_tls() { | |
| 370 | + | let toml = r#" | |
| 371 | + | [targets.mnw] | |
| 372 | + | label = "MakeNotWork" | |
| 373 | + | "#; | |
| 374 | + | let config: Config = toml::from_str(toml).unwrap(); | |
| 375 | + | assert!(config.get_target("mnw").unwrap().tls.is_none()); | |
| 376 | + | } | |
| 377 | + | ||
| 378 | + | #[test] | |
| 379 | + | fn config_tls_check_interval_default() { | |
| 380 | + | let config: Config = toml::from_str("").unwrap(); | |
| 381 | + | assert_eq!(config.serve.tls_check_interval_secs, 3600); | |
| 382 | + | } | |
| 383 | + | ||
| 384 | + | #[test] | |
| 385 | + | fn config_tls_check_interval_custom() { | |
| 386 | + | let toml = r#" | |
| 387 | + | [serve] | |
| 388 | + | tls_check_interval_secs = 1800 | |
| 389 | + | "#; | |
| 390 | + | let config: Config = toml::from_str(toml).unwrap(); | |
| 391 | + | assert_eq!(config.serve.tls_check_interval_secs, 1800); | |
| 392 | + | } | |
| 393 | + | ||
| 394 | + | #[test] | |
| 395 | + | fn config_with_health_expect() { | |
| 396 | + | let toml = r#" | |
| 397 | + | [targets.mnw] | |
| 398 | + | label = "MakeNotWork" | |
| 399 | + | [targets.mnw.health] | |
| 400 | + | url = "https://makenot.work/health" | |
| 401 | + | [targets.mnw.health.expect] | |
| 402 | + | status_code = 200 | |
| 403 | + | body_contains = "operational" | |
| 404 | + | json_fields = { "status" = "operational", "checks.db" = "ok" } | |
| 405 | + | "#; | |
| 406 | + | let config: Config = toml::from_str(toml).unwrap(); | |
| 407 | + | let expect = config.get_target("mnw").unwrap().health.as_ref().unwrap().expect.as_ref().unwrap(); | |
| 408 | + | assert_eq!(expect.status_code, Some(200)); | |
| 409 | + | assert_eq!(expect.body_contains.as_deref(), Some("operational")); | |
| 410 | + | assert_eq!(expect.json_fields.get("status").unwrap(), "operational"); | |
| 411 | + | assert_eq!(expect.json_fields.get("checks.db").unwrap(), "ok"); | |
| 412 | + | } | |
| 413 | + | ||
| 414 | + | #[test] | |
| 415 | + | fn config_health_without_expect() { | |
| 416 | + | let toml = r#" | |
| 417 | + | [targets.mnw] | |
| 418 | + | label = "MakeNotWork" | |
| 419 | + | [targets.mnw.health] | |
| 420 | + | url = "https://makenot.work/health" | |
| 421 | + | "#; | |
| 422 | + | let config: Config = toml::from_str(toml).unwrap(); | |
| 423 | + | assert!(config.get_target("mnw").unwrap().health.as_ref().unwrap().expect.is_none()); | |
| 424 | + | } | |
| 425 | + | ||
| 426 | + | #[test] | |
| 427 | + | fn config_with_trending() { | |
| 428 | + | let toml = r#" | |
| 429 | + | [targets.mnw] | |
| 430 | + | label = "MakeNotWork" | |
| 431 | + | [targets.mnw.health] | |
| 432 | + | url = "https://makenot.work/health" | |
| 433 | + | [targets.mnw.health.trending] | |
| 434 | + | baseline_window_hours = 48 | |
| 435 | + | spike_threshold = 1.5 | |
| 436 | + | "#; | |
| 437 | + | let config: Config = toml::from_str(toml).unwrap(); | |
| 438 | + | let trending = config.get_target("mnw").unwrap().health.as_ref().unwrap().trending.as_ref().unwrap(); | |
| 439 | + | assert_eq!(trending.baseline_window_hours, 48); | |
| 440 | + | assert_eq!(trending.spike_threshold, 1.5); | |
| 441 | + | } | |
| 442 | + | ||
| 443 | + | #[test] | |
| 444 | + | fn config_trending_defaults() { | |
| 445 | + | let toml = r#" | |
| 446 | + | [targets.mnw] | |
| 447 | + | label = "MakeNotWork" | |
| 448 | + | [targets.mnw.health] | |
| 449 | + | url = "https://makenot.work/health" | |
| 450 | + | [targets.mnw.health.trending] | |
| 451 | + | "#; | |
| 452 | + | let config: Config = toml::from_str(toml).unwrap(); | |
| 453 | + | let trending = config.get_target("mnw").unwrap().health.as_ref().unwrap().trending.as_ref().unwrap(); | |
| 454 | + | assert_eq!(trending.baseline_window_hours, 168); | |
| 455 | + | assert_eq!(trending.spike_threshold, 2.0); | |
| 456 | + | } | |
| 457 | + | ||
| 458 | + | #[test] | |
| 459 | + | fn config_without_trending() { | |
| 460 | + | let toml = r#" | |
| 461 | + | [targets.mnw] | |
| 462 | + | label = "MakeNotWork" | |
| 463 | + | [targets.mnw.health] | |
| 464 | + | url = "https://makenot.work/health" | |
| 465 | + | "#; | |
| 466 | + | let config: Config = toml::from_str(toml).unwrap(); | |
| 467 | + | assert!(config.get_target("mnw").unwrap().health.as_ref().unwrap().trending.is_none()); | |
| 468 | + | } | |
| 469 | + | ||
| 470 | + | #[test] | |
| 471 | + | fn config_health_expect_empty() { | |
| 472 | + | let toml = r#" | |
| 473 | + | [targets.mnw] | |
| 474 | + | label = "MakeNotWork" | |
| 475 | + | [targets.mnw.health] | |
| 476 | + | url = "https://makenot.work/health" | |
| 477 | + | [targets.mnw.health.expect] | |
| 478 | + | "#; | |
| 479 | + | let config: Config = toml::from_str(toml).unwrap(); | |
| 480 | + | let expect = config.get_target("mnw").unwrap().health.as_ref().unwrap().expect.as_ref().unwrap(); | |
| 481 | + | assert_eq!(expect.status_code, None); | |
| 482 | + | assert!(expect.json_fields.is_empty()); | |
| 483 | + | assert_eq!(expect.body_contains, None); | |
| 484 | + | } | |
| 485 | + | ||
| 486 | + | #[test] | |
| 487 | + | fn config_staleness_days_default() { | |
| 488 | + | let toml = r#" | |
| 489 | + | [targets.mnw] | |
| 490 | + | label = "MakeNotWork" | |
| 491 | + | [targets.mnw.tests] | |
| 492 | + | ssh = "host" | |
| 493 | + | command = "./ci.sh" | |
| 494 | + | "#; | |
| 495 | + | let config: Config = toml::from_str(toml).unwrap(); | |
| 496 | + | assert_eq!(config.get_target("mnw").unwrap().tests.as_ref().unwrap().staleness_days, 7); | |
| 497 | + | } | |
| 498 | + | ||
| 499 | + | #[test] | |
| 500 | + | fn config_staleness_days_custom() { | |
| 501 | + | let toml = r#" | |
| 502 | + | [targets.mnw] | |
| 503 | + | label = "MakeNotWork" | |
| 504 | + | [targets.mnw.tests] | |
| 505 | + | ssh = "host" | |
| 506 | + | command = "./ci.sh" | |
| 507 | + | staleness_days = 14 | |
| 508 | + | "#; | |
| 509 | + | let config: Config = toml::from_str(toml).unwrap(); | |
| 510 | + | assert_eq!(config.get_target("mnw").unwrap().tests.as_ref().unwrap().staleness_days, 14); | |
| 511 | + | } | |
| 512 | + | ||
| 513 | + | #[test] | |
| 514 | + | fn config_with_alerts_custom_defaults() { | |
| 515 | + | let toml = r#" | |
| 516 | + | [alerts] |
Lines truncated
| @@ -1,10 +1,103 @@ | |||
| 1 | + | //! SQLite persistence — schema, health checks, test runs, and peer data. | |
| 2 | + | //! | |
| 3 | + | //! Uses a migration versioning system: each schema change is a numbered migration | |
| 4 | + | //! stored in [`MIGRATIONS`]. On startup, [`run_migrations`] checks the current | |
| 5 | + | //! version and runs any pending migrations. Existing databases (pre-migration) | |
| 6 | + | //! are detected by the presence of the `health_checks` table and marked as v1. | |
| 7 | + | ||
| 1 | 8 | use sqlx::sqlite::{SqliteConnectOptions, SqlitePool, SqlitePoolOptions}; | |
| 2 | 9 | use std::path::Path; | |
| 3 | 10 | use std::str::FromStr; | |
| 11 | + | use tracing::info; | |
| 4 | 12 | ||
| 5 | - | use crate::types::{HealthDetails, HealthSnapshot, HealthStatus, TestRun, TestSummary}; | |
| 13 | + | use crate::error::Result; | |
| 14 | + | use crate::types::{HealthDetails, HealthSnapshot, HealthStatus, TestRun, TestSummary, TlsStatus}; | |
| 6 | 15 | ||
| 7 | - | pub async fn connect(path: &Path) -> Result<SqlitePool, Box<dyn std::error::Error + Send + Sync>> { | |
| 16 | + | /// Each migration is a (version, description, SQL) tuple. Versions start at 1. | |
| 17 | + | /// The SQL may contain multiple statements separated by semicolons. | |
| 18 | + | const MIGRATIONS: &[(i64, &str, &str)] = &[ | |
| 19 | + | (1, "initial schema", r#" | |
| 20 | + | CREATE TABLE IF NOT EXISTS health_checks ( | |
| 21 | + | id INTEGER PRIMARY KEY AUTOINCREMENT, | |
| 22 | + | target TEXT NOT NULL, | |
| 23 | + | status TEXT NOT NULL, | |
| 24 | + | checked_at TEXT NOT NULL, | |
| 25 | + | response_time_ms INTEGER NOT NULL, | |
| 26 | + | details_json TEXT, | |
| 27 | + | error TEXT | |
| 28 | + | ); | |
| 29 | + | CREATE TABLE IF NOT EXISTS test_runs ( | |
| 30 | + | id INTEGER PRIMARY KEY AUTOINCREMENT, | |
| 31 | + | target TEXT NOT NULL, | |
| 32 | + | started_at TEXT NOT NULL, | |
| 33 | + | finished_at TEXT, | |
| 34 | + | duration_secs INTEGER, | |
| 35 | + | exit_code INTEGER, | |
| 36 | + | passed INTEGER NOT NULL, | |
| 37 | + | summary_json TEXT NOT NULL, | |
| 38 | + | raw_output TEXT NOT NULL, | |
| 39 | + | filter TEXT | |
| 40 | + | ); | |
| 41 | + | CREATE TABLE IF NOT EXISTS peer_identities ( | |
| 42 | + | peer_name TEXT PRIMARY KEY, | |
| 43 | + | instance_id TEXT NOT NULL, | |
| 44 | + | first_seen TEXT NOT NULL | |
| 45 | + | ); | |
| 46 | + | CREATE TABLE IF NOT EXISTS peer_heartbeats ( | |
| 47 | + | id INTEGER PRIMARY KEY AUTOINCREMENT, | |
| 48 | + | peer_name TEXT NOT NULL, | |
| 49 | + | status TEXT NOT NULL, | |
| 50 | + | latency_ms INTEGER NOT NULL, | |
| 51 | + | checked_at TEXT NOT NULL | |
| 52 | + | ); | |
| 53 | + | CREATE INDEX IF NOT EXISTS idx_health_checks_target_id ON health_checks(target, id DESC); | |
| 54 | + | CREATE INDEX IF NOT EXISTS idx_health_checks_target_checked ON health_checks(target, checked_at); | |
| 55 | + | CREATE INDEX IF NOT EXISTS idx_test_runs_target_id ON test_runs(target, id DESC); | |
| 56 | + | CREATE INDEX IF NOT EXISTS idx_peer_heartbeats_peer_id ON peer_heartbeats(peer_name, id DESC); | |
| 57 | + | "#), | |
| 58 | + | (2, "add alerts table", r#" | |
| 59 | + | CREATE TABLE IF NOT EXISTS alerts ( | |
| 60 | + | id INTEGER PRIMARY KEY AUTOINCREMENT, | |
| 61 | + | target TEXT NOT NULL, | |
| 62 | + | alert_type TEXT NOT NULL, | |
| 63 | + | from_status TEXT, | |
| 64 | + | to_status TEXT, | |
| 65 | + | sent_at TEXT NOT NULL, | |
| 66 | + | error TEXT | |
| 67 | + | ); | |
| 68 | + | CREATE INDEX IF NOT EXISTS idx_alerts_target_sent ON alerts(target, sent_at); | |
| 69 | + | "#), | |
| 70 | + | (3, "add tls_checks table", r#" | |
| 71 | + | CREATE TABLE tls_checks ( | |
| 72 | + | id INTEGER PRIMARY KEY AUTOINCREMENT, | |
| 73 | + | target TEXT NOT NULL, | |
| 74 | + | host TEXT NOT NULL, | |
| 75 | + | valid INTEGER NOT NULL, | |
| 76 | + | days_remaining INTEGER NOT NULL, | |
| 77 | + | not_before TEXT NOT NULL, | |
| 78 | + | not_after TEXT NOT NULL, | |
| 79 | + | subject TEXT NOT NULL, | |
| 80 | + | issuer TEXT NOT NULL, | |
| 81 | + | checked_at TEXT NOT NULL, | |
| 82 | + | error TEXT | |
| 83 | + | ); | |
| 84 | + | CREATE INDEX idx_tls_checks_target_id ON tls_checks(target, id DESC); | |
| 85 | + | "#), | |
| 86 | + | (4, "add incidents table", r#" | |
| 87 | + | CREATE TABLE incidents ( | |
| 88 | + | id INTEGER PRIMARY KEY AUTOINCREMENT, | |
| 89 | + | target TEXT NOT NULL, | |
| 90 | + | started_at TEXT NOT NULL, | |
| 91 | + | ended_at TEXT, | |
| 92 | + | duration_secs INTEGER, | |
| 93 | + | from_status TEXT NOT NULL, | |
| 94 | + | to_status TEXT NOT NULL | |
| 95 | + | ); | |
| 96 | + | CREATE INDEX idx_incidents_target_id ON incidents(target, id DESC); | |
| 97 | + | "#), | |
| 98 | + | ]; | |
| 99 | + | ||
| 100 | + | pub async fn connect(path: &Path) -> Result<SqlitePool> { | |
| 8 | 101 | let opts = SqliteConnectOptions::from_str(&format!("sqlite:{}", path.display()))? | |
| 9 | 102 | .create_if_missing(true) | |
| 10 | 103 | .journal_mode(sqlx::sqlite::SqliteJournalMode::Wal); | |
| @@ -14,53 +107,113 @@ pub async fn connect(path: &Path) -> Result<SqlitePool, Box<dyn std::error::Erro | |||
| 14 | 107 | .connect_with(opts) | |
| 15 | 108 | .await?; | |
| 16 | 109 | ||
| 17 | - | init_schema(&pool).await?; | |
| 110 | + | run_migrations(&pool).await?; | |
| 18 | 111 | Ok(pool) | |
| 19 | 112 | } | |
| 20 | 113 | ||
| 21 | - | pub async fn connect_in_memory() -> Result<SqlitePool, Box<dyn std::error::Error + Send + Sync>> { | |
| 114 | + | pub async fn connect_in_memory() -> Result<SqlitePool> { | |
| 22 | 115 | let opts = SqliteConnectOptions::from_str("sqlite::memory:")?; | |
| 23 | 116 | let pool = SqlitePoolOptions::new() | |
| 24 | 117 | .max_connections(1) | |
| 25 | 118 | .connect_with(opts) | |
| 26 | 119 | .await?; | |
| 27 | 120 | ||
| 28 | - | init_schema(&pool).await?; | |
| 121 | + | run_migrations(&pool).await?; | |
| 29 | 122 | Ok(pool) | |
| 30 | 123 | } | |
| 31 | 124 | ||
| 32 | - | async fn init_schema(pool: &SqlitePool) -> Result<(), sqlx::Error> { | |
| 125 | + | /// Run pending schema migrations. Detects pre-migration databases by checking | |
| 126 | + | /// for existing tables and stamps them as version 1 without re-running. | |
| 127 | + | pub async fn run_migrations(pool: &SqlitePool) -> Result<()> { | |
| 128 | + | // Ensure the schema_version table exists | |
| 33 | 129 | sqlx::query( | |
| 34 | - | "CREATE TABLE IF NOT EXISTS health_checks ( | |
| 35 | - | id INTEGER PRIMARY KEY AUTOINCREMENT, | |
| 36 | - | target TEXT NOT NULL, | |
| 37 | - | status TEXT NOT NULL, | |
| 38 | - | checked_at TEXT NOT NULL, | |
| 39 | - | response_time_ms INTEGER NOT NULL, | |
| 40 | - | details_json TEXT, | |
| 41 | - | error TEXT | |
| 130 | + | "CREATE TABLE IF NOT EXISTS schema_version ( | |
| 131 | + | version INTEGER NOT NULL, | |
| 132 | + | description TEXT NOT NULL, | |
| 133 | + | applied_at TEXT NOT NULL | |
| 42 | 134 | )", | |
| 43 | 135 | ) | |
| 44 | 136 | .execute(pool) | |
| 45 | 137 | .await?; | |
| 46 | 138 | ||
| 139 | + | let current_version = get_schema_version(pool).await?; | |
| 140 | + | ||
| 141 | + | // Detect pre-migration databases: if schema_version is empty but tables exist, | |
| 142 | + | // this is an existing database that predates the migration system. | |
| 143 | + | if current_version == 0 && has_existing_tables(pool).await? { | |
| 144 | + | info!("detected pre-migration database, stamping as version 1"); | |
| 145 | + | stamp_version(pool, 1, "initial schema (pre-existing)").await?; | |
| 146 | + | // Run remaining migrations (2+) if any | |
| 147 | + | for &(version, description, sql) in MIGRATIONS { | |
| 148 | + | if version > 1 { | |
| 149 | + | run_one_migration(pool, version, description, sql).await?; | |
| 150 | + | } | |
| 151 | + | } | |
| 152 | + | return Ok(()); | |
| 153 | + | } | |
| 154 | + | ||
| 155 | + | // Run all migrations newer than current version | |
| 156 | + | for &(version, description, sql) in MIGRATIONS { | |
| 157 | + | if version > current_version { | |
| 158 | + | run_one_migration(pool, version, description, sql).await?; | |
| 159 | + | } | |
| 160 | + | } | |
| 161 | + | ||
| 162 | + | Ok(()) | |
| 163 | + | } | |
| 164 | + | ||
| 165 | + | /// Get the current schema version (0 if no migrations have been applied). | |
| 166 | + | pub async fn get_schema_version(pool: &SqlitePool) -> Result<i64> { | |
| 167 | + | let row = sqlx::query_as::<_, (i64,)>( | |
| 168 | + | "SELECT COALESCE(MAX(version), 0) FROM schema_version", | |
| 169 | + | ) | |
| 170 | + | .fetch_one(pool) | |
| 171 | + | .await?; | |
| 172 | + | Ok(row.0) | |
| 173 | + | } | |
| 174 | + | ||
| 175 | + | /// Check whether the database has existing tables from before the migration system. | |
| 176 | + | async fn has_existing_tables(pool: &SqlitePool) -> Result<bool> { | |
| 177 | + | let row = sqlx::query_as::<_, (i64,)>( | |
| 178 | + | "SELECT COUNT(*) FROM sqlite_master WHERE type = 'table' AND name = 'health_checks'", | |
| 179 | + | ) | |
| 180 | + | .fetch_one(pool) | |
| 181 | + | .await?; | |
| 182 | + | Ok(row.0 > 0) | |
| 183 | + | } | |
| 184 | + | ||
| 185 | + | /// Execute a single migration's SQL and record it in schema_version. | |
| 186 | + | async fn run_one_migration( | |
| 187 | + | pool: &SqlitePool, | |
| 188 | + | version: i64, | |
| 189 | + | description: &str, | |
| 190 | + | sql: &str, | |
| 191 | + | ) -> Result<()> { | |
| 192 | + | info!(version, description, "running migration"); | |
| 193 | + | ||
| 194 | + | // Execute each statement in the migration SQL | |
| 195 | + | for statement in sql.split(';') { | |
| 196 | + | let trimmed = statement.trim(); | |
| 197 | + | if !trimmed.is_empty() { | |
| 198 | + | sqlx::query(trimmed).execute(pool).await?; | |
| 199 | + | } | |
| 200 | + | } | |
| 201 | + | ||
| 202 | + | stamp_version(pool, version, description).await?; | |
| 203 | + | Ok(()) | |
| 204 | + | } | |
| 205 | + | ||
| 206 | + | /// Record a version in the schema_version table. | |
| 207 | + | async fn stamp_version(pool: &SqlitePool, version: i64, description: &str) -> Result<()> { | |
| 208 | + | let now = chrono::Utc::now().to_rfc3339(); | |
| 47 | 209 | sqlx::query( | |
| 48 | - | "CREATE TABLE IF NOT EXISTS test_runs ( | |
| 49 | - | id INTEGER PRIMARY KEY AUTOINCREMENT, | |
| 50 | - | target TEXT NOT NULL, | |
| 51 | - | started_at TEXT NOT NULL, | |
| 52 | - | finished_at TEXT, | |
| 53 | - | duration_secs INTEGER, | |
| 54 | - | exit_code INTEGER, | |
| 55 | - | passed INTEGER NOT NULL, | |
| 56 | - | summary_json TEXT NOT NULL, | |
| 57 | - | raw_output TEXT NOT NULL, | |
| 58 | - | filter TEXT | |
| 59 | - | )", | |
| 210 | + | "INSERT INTO schema_version (version, description, applied_at) VALUES (?, ?, ?)", | |
| 60 | 211 | ) | |
| 212 | + | .bind(version) | |
| 213 | + | .bind(description) | |
| 214 | + | .bind(&now) | |
| 61 | 215 | .execute(pool) | |
| 62 | 216 | .await?; | |
| 63 | - | ||
| 64 | 217 | Ok(()) | |
| 65 | 218 | } | |
| 66 | 219 | ||
| @@ -69,7 +222,7 @@ async fn init_schema(pool: &SqlitePool) -> Result<(), sqlx::Error> { | |||
| 69 | 222 | pub async fn insert_health_check( | |
| 70 | 223 | pool: &SqlitePool, | |
| 71 | 224 | snapshot: &HealthSnapshot, | |
| 72 | - | ) -> Result<i64, sqlx::Error> { | |
| 225 | + | ) -> Result<i64> { | |
| 73 | 226 | let status = snapshot.status.to_string(); | |
| 74 | 227 | let details_json = snapshot | |
| 75 | 228 | .details | |
| @@ -96,7 +249,7 @@ pub async fn get_health_history( | |||
| 96 | 249 | pool: &SqlitePool, | |
| 97 | 250 | target: Option<&str>, | |
| 98 | 251 | limit: i64, | |
| 99 | - | ) -> Result<Vec<HealthSnapshot>, sqlx::Error> { | |
| 252 | + | ) -> Result<Vec<HealthSnapshot>> { | |
| 100 | 253 | let rows = match target { | |
| 101 | 254 | Some(t) => { | |
| 102 | 255 | sqlx::query_as::<_, HealthCheckRow>( | |
| @@ -125,7 +278,7 @@ pub async fn get_health_history( | |||
| 125 | 278 | pub async fn get_latest_health( | |
| 126 | 279 | pool: &SqlitePool, | |
| 127 | 280 | target: &str, | |
| 128 | - | ) -> Result<Option<HealthSnapshot>, sqlx::Error> { | |
| 281 | + | ) -> Result<Option<HealthSnapshot>> { | |
| 129 | 282 | let row = sqlx::query_as::<_, HealthCheckRow>( | |
| 130 | 283 | "SELECT id, target, status, checked_at, response_time_ms, details_json, error | |
| 131 | 284 | FROM health_checks WHERE target = ? ORDER BY id DESC LIMIT 1", | |
| @@ -142,7 +295,7 @@ pub async fn get_latest_health( | |||
| 142 | 295 | pub async fn insert_test_run( | |
| 143 | 296 | pool: &SqlitePool, | |
| 144 | 297 | run: &TestRun, | |
| 145 | - | ) -> Result<i64, sqlx::Error> { | |
| 298 | + | ) -> Result<i64> { | |
| 146 | 299 | let summary_json = serde_json::to_string(&run.summary).unwrap_or_default(); | |
| 147 | 300 | ||
| 148 | 301 | let result = sqlx::query( | |
| @@ -168,7 +321,7 @@ pub async fn get_test_history( | |||
| 168 | 321 | pool: &SqlitePool, | |
| 169 | 322 | target: Option<&str>, | |
| 170 | 323 | limit: i64, | |
| 171 | - | ) -> Result<Vec<TestRun>, sqlx::Error> { | |
| 324 | + | ) -> Result<Vec<TestRun>> { | |
| 172 | 325 | let rows = match target { | |
| 173 | 326 | Some(t) => { | |
| 174 | 327 | sqlx::query_as::<_, TestRunRow>( | |
| @@ -197,7 +350,7 @@ pub async fn get_test_history( | |||
| 197 | 350 | pub async fn get_latest_test_run( | |
| 198 | 351 | pool: &SqlitePool, | |
| 199 | 352 | target: &str, | |
| 200 | - | ) -> Result<Option<TestRun>, sqlx::Error> { | |
| 353 | + | ) -> Result<Option<TestRun>> { | |
| 201 | 354 | let row = sqlx::query_as::<_, TestRunRow>( | |
| 202 | 355 | "SELECT id, target, started_at, finished_at, duration_secs, exit_code, passed, summary_json, raw_output, filter | |
| 203 | 356 | FROM test_runs WHERE target = ? ORDER BY id DESC LIMIT 1", | |
| @@ -209,10 +362,285 @@ pub async fn get_latest_test_run( | |||
| 209 | 362 | Ok(row.map(|r| r.into_test_run())) | |
| 210 | 363 | } | |
| 211 | 364 | ||
| 365 | + | /// Get the version from the health check closest to (but before) a given timestamp. | |
| 366 | + | pub async fn get_version_at_time( | |
| 367 | + | pool: &SqlitePool, | |
| 368 | + | target: &str, | |
| 369 | + | before_rfc3339: &str, | |
| 370 | + | ) -> Result<Option<String>> { | |
| 371 | + | let row = sqlx::query_as::<_, (Option<String>,)>( | |
| 372 | + | "SELECT details_json FROM health_checks | |
| 373 | + | WHERE target = ? AND checked_at <= ? | |
| 374 | + | ORDER BY checked_at DESC LIMIT 1", | |
| 375 | + | ) | |
| 376 | + | .bind(target) | |
| 377 | + | .bind(before_rfc3339) | |
| 378 | + | .fetch_optional(pool) | |
| 379 | + | .await?; | |
| 380 | + | ||
| 381 | + | let version = row | |
| 382 | + | .and_then(|r| r.0) | |
| 383 | + | .and_then(|json_str| serde_json::from_str::<serde_json::Value>(&json_str).ok()) | |
| 384 | + | .and_then(|json| json.get("version").and_then(|v| v.as_str()).map(String::from)); | |
| 385 | + | ||
| 386 | + | Ok(version) | |
| 387 | + | } | |
| 388 | + | ||
| 389 | + | /// Calculate uptime percentage for a target over the given number of hours. | |
| 390 | + | /// Returns the percentage of health checks with "operational" status. | |
| 391 | + | pub async fn get_uptime_percent( | |
| 392 | + | pool: &SqlitePool, | |
| 393 | + | target: &str, | |
| 394 | + | hours: i64, | |
| 395 | + | ) -> Result<Option<f64>> { | |
| 396 | + | let cutoff = chrono::Utc::now() - chrono::Duration::hours(hours); | |
| 397 | + | let cutoff_str = cutoff.to_rfc3339(); | |
| 398 | + | ||
| 399 | + | let row = sqlx::query_as::<_, (i64, i64)>( | |
| 400 | + | "SELECT | |
| 401 | + | COUNT(*) as total, | |
| 402 | + | SUM(CASE WHEN status = 'operational' THEN 1 ELSE 0 END) as operational | |
| 403 | + | FROM health_checks | |
| 404 | + | WHERE target = ? AND checked_at >= ?", | |
| 405 | + | ) | |
| 406 | + | .bind(target) | |
| 407 | + | .bind(&cutoff_str) | |
| 408 | + | .fetch_one(pool) | |
| 409 | + | .await?; | |
| 410 | + | ||
| 411 | + | if row.0 == 0 { | |
| 412 | + | Ok(None) | |
| 413 | + | } else { | |
| 414 | + | Ok(Some(row.1 as f64 / row.0 as f64 * 100.0)) | |
| 415 | + | } | |
| 416 | + | } | |
| 417 | + | ||
| 418 | + | // --- Latency trending queries --- | |
| 419 | + | ||
| 420 | + | /// Fetch all response times for a target since a given timestamp, ordered ASC. | |
| 421 | + | pub async fn get_response_times( | |
| 422 | + | pool: &SqlitePool, | |
| 423 | + | target: &str, | |
| 424 | + | since_rfc3339: &str, | |
| 425 | + | ) -> Result<Vec<(String, i64)>> { | |
| 426 | + | let rows = sqlx::query_as::<_, (String, i64)>( | |
| 427 | + | "SELECT checked_at, response_time_ms FROM health_checks | |
| 428 | + | WHERE target = ? AND checked_at >= ? | |
| 429 | + | ORDER BY checked_at ASC", | |
| 430 | + | ) | |
| 431 | + | .bind(target) | |
| 432 | + | .bind(since_rfc3339) | |
| 433 | + | .fetch_all(pool) | |
| 434 | + | .await?; | |
| 435 | + | Ok(rows) | |
| 436 | + | } | |
| 437 | + | ||
| 438 | + | /// Fetch the last N response times for **operational** checks only (most recent first). | |
| 439 | + | pub async fn get_recent_response_times( | |
| 440 | + | pool: &SqlitePool, | |
| 441 | + | target: &str, | |
| 442 | + | count: i64, | |
| 443 | + | ) -> Result<Vec<i64>> { | |
| 444 | + | let rows = sqlx::query_as::<_, (i64,)>( | |
| 445 | + | "SELECT response_time_ms FROM health_checks | |
| 446 | + | WHERE target = ? AND status = 'operational' | |
| 447 | + | ORDER BY id DESC LIMIT ?", | |
| 448 | + | ) | |
| 449 | + | .bind(target) | |
| 450 | + | .bind(count) | |
| 451 | + | .fetch_all(pool) | |
| 452 | + | .await?; | |
| 453 | + | Ok(rows.into_iter().map(|r| r.0).collect()) | |
| 454 | + | } | |
| 455 | + | ||
| 456 | + | // --- Alert queries --- | |
| 457 | + | ||
| 458 | + | #[derive(Debug, sqlx::FromRow)] | |
| 459 | + | pub struct AlertRow { | |
| 460 | + | pub id: i64, | |
| 461 | + | pub target: String, | |
| 462 | + | pub alert_type: String, | |
| 463 | + | pub from_status: Option<String>, | |
| 464 | + | pub to_status: Option<String>, | |
| 465 | + | pub sent_at: String, | |
| 466 | + | pub error: Option<String>, | |
| 467 | + | } | |
| 468 | + | ||
| 469 | + | pub async fn insert_alert( | |
| 470 | + | pool: &SqlitePool, | |
| 471 | + | target: &str, | |
| 472 | + | alert_type: &str, | |
| 473 | + | from_status: Option<&str>, | |
| 474 | + | to_status: Option<&str>, | |
| 475 | + | error: Option<&str>, | |
| 476 | + | ) -> Result<i64> { | |
| 477 | + | let now = chrono::Utc::now().to_rfc3339(); | |
| 478 | + | let result = sqlx::query( | |
| 479 | + | "INSERT INTO alerts (target, alert_type, from_status, to_status, sent_at, error) | |
| 480 | + | VALUES (?, ?, ?, ?, ?, ?)", | |
| 481 | + | ) | |
| 482 | + | .bind(target) | |
| 483 | + | .bind(alert_type) | |
| 484 | + | .bind(from_status) | |
| 485 | + | .bind(to_status) | |
| 486 | + | .bind(&now) | |
| 487 | + | .bind(error) | |
| 488 | + | .execute(pool) | |
| 489 | + | .await?; | |
| 490 | + | Ok(result.last_insert_rowid()) | |
| 491 | + | } | |
| 492 | + | ||
| 493 | + | pub async fn get_latest_alert_for_target( | |
| 494 | + | pool: &SqlitePool, | |
| 495 | + | target: &str, | |
| 496 | + | ) -> Result<Option<AlertRow>> { | |
| 497 | + | Ok(sqlx::query_as::<_, AlertRow>( | |
| 498 | + | "SELECT id, target, alert_type, from_status, to_status, sent_at, error | |
| 499 | + | FROM alerts WHERE target = ? ORDER BY id DESC LIMIT 1", | |
| 500 | + | ) | |
| 501 | + | .bind(target) | |
| 502 | + | .fetch_optional(pool) | |
| 503 | + | .await?) | |
| 504 | + | } | |
| 505 | + | ||
| 506 | + | // --- TLS check queries --- | |
| 507 | + | ||
| 508 | + | #[derive(Debug, sqlx::FromRow, serde::Serialize)] | |
| 509 | + | pub struct TlsCheckRow { | |
| 510 | + | pub id: i64, | |
| 511 | + | pub target: String, | |
| 512 | + | pub host: String, | |
| 513 | + | pub valid: bool, | |
| 514 | + | pub days_remaining: i64, | |
| 515 | + | pub not_before: String, | |
| 516 | + | pub not_after: String, | |
| 517 | + | pub subject: String, | |
| 518 | + | pub issuer: String, | |
| 519 | + | pub checked_at: String, | |
| 520 | + | pub error: Option<String>, | |
| 521 | + | } | |
| 522 | + | ||
| 523 | + | pub async fn insert_tls_check( | |
| 524 | + | pool: &SqlitePool, | |
| 525 | + | status: &TlsStatus, | |
| 526 | + | ) -> Result<i64> { | |
| 527 | + | let result = sqlx::query( | |
| 528 | + | "INSERT INTO tls_checks (target, host, valid, days_remaining, not_before, not_after, subject, issuer, checked_at, error) | |
| 529 | + | VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", | |
| 530 | + | ) | |
| 531 | + | .bind(&status.target) | |
| 532 | + | .bind(&status.host) | |
| 533 | + | .bind(status.valid) | |
| 534 | + | .bind(status.days_remaining) | |
| 535 | + | .bind(&status.not_before) | |
| 536 | + | .bind(&status.not_after) | |
| 537 | + | .bind(&status.subject) | |
| 538 | + | .bind(&status.issuer) | |
| 539 | + | .bind(&status.checked_at) | |
| 540 | + | .bind(&status.error) | |
| 541 | + | .execute(pool) | |
| 542 | + | .await?; | |
| 543 | + | ||
| 544 | + | Ok(result.last_insert_rowid()) | |
| 545 | + | } | |
| 546 | + | ||
| 547 | + | pub async fn get_latest_tls_check( | |
| 548 | + | pool: &SqlitePool, | |
| 549 | + | target: &str, | |
| 550 | + | ) -> Result<Option<TlsCheckRow>> { | |
| 551 | + | Ok(sqlx::query_as::<_, TlsCheckRow>( | |
| 552 | + | "SELECT id, target, host, valid, days_remaining, not_before, not_after, subject, issuer, checked_at, error | |
| 553 | + | FROM tls_checks WHERE target = ? ORDER BY id DESC LIMIT 1", | |
| 554 | + | ) | |
| 555 | + | .bind(target) | |
| 556 | + | .fetch_optional(pool) | |
| 557 | + | .await?) | |
| 558 | + | } | |
| 559 | + | ||
| 560 | + | // --- Incident queries --- | |
| 561 | + | ||
| 562 | + | #[derive(Debug, Clone, sqlx::FromRow, serde::Serialize)] | |
| 563 | + | pub struct IncidentRow { | |
| 564 | + | pub id: i64, | |
| 565 | + | pub target: String, | |
| 566 | + | pub started_at: String, | |
| 567 | + | pub ended_at: Option<String>, | |
| 568 | + | pub duration_secs: Option<i64>, | |
| 569 | + | pub from_status: String, | |
| 570 | + | pub to_status: String, |
Lines truncated
| @@ -0,0 +1,955 @@ | |||
| 1 | + | //! Pure formatting functions for CLI display output. | |
| 2 | + | //! | |
| 3 | + | //! Each function takes data types and writes formatted output to a `String`, | |
| 4 | + | //! keeping display logic separate from async I/O for testability. | |
| 5 | + | ||
| 6 | + | use std::fmt::Write; | |
| 7 | + | ||
| 8 | + | use crate::db::{IncidentRow, TlsCheckRow}; | |
| 9 | + | use crate::types::{HealthSnapshot, LatencyStats, TestRun, TestStaleness}; | |
| 10 | + | ||
| 11 | + | /// Format a single health snapshot as a human-readable line. | |
| 12 | + | pub fn format_health_snapshot(s: &HealthSnapshot) -> String { | |
| 13 | + | let mut out = String::new(); | |
| 14 | + | write!(out, "[{}] {} \u{2014} {}", s.status.icon(), s.target, s.status).unwrap(); | |
| 15 | + | write!(out, " ({}ms)", s.response_time_ms).unwrap(); | |
| 16 | + | if let Some(details) = &s.details { | |
| 17 | + | if let Some(v) = &details.version { | |
| 18 | + | write!(out, " v{v}").unwrap(); | |
| 19 | + | } | |
| 20 | + | if let Some(u) = &details.uptime { | |
| 21 | + | write!(out, " up {u}").unwrap(); | |
| 22 | + | } | |
| 23 | + | } | |
| 24 | + | writeln!(out).unwrap(); | |
| 25 | + | if let Some(err) = &s.error { | |
| 26 | + | writeln!(out, " {err}").unwrap(); | |
| 27 | + | } | |
| 28 | + | out | |
| 29 | + | } | |
| 30 | + | ||
| 31 | + | /// Format a list of health snapshots for CLI display. | |
| 32 | + | pub fn format_health_snapshots(snapshots: &[HealthSnapshot]) -> String { | |
| 33 | + | let mut out = String::new(); | |
| 34 | + | for s in snapshots { | |
| 35 | + | out.push_str(&format_health_snapshot(s)); | |
| 36 | + | } | |
| 37 | + | out | |
| 38 | + | } | |
| 39 | + | ||
| 40 | + | /// Format a test run result for CLI display. | |
| 41 | + | pub fn format_test_result(target_name: &str, run: &TestRun) -> String { | |
| 42 | + | let mut out = String::new(); | |
| 43 | + | let result = if run.passed { "PASSED" } else { "FAILED" }; | |
| 44 | + | writeln!(out, "{target_name}: {result}").unwrap(); | |
| 45 | + | if let Some(d) = run.duration_secs { | |
| 46 | + | writeln!(out, "Duration: {d}s").unwrap(); | |
| 47 | + | } | |
| 48 | + | if let (Some(p), Some(f)) = (run.summary.total_passed, run.summary.total_failed) { | |
| 49 | + | writeln!(out, "Tests: {p} passed, {f} failed").unwrap(); | |
| 50 | + | } | |
| 51 | + | for step in &run.summary.steps { | |
| 52 | + | let mark = if step.passed { "PASS" } else { "FAIL" }; | |
| 53 | + | writeln!(out, " {mark} {}", step.name).unwrap(); | |
| 54 | + | } | |
| 55 | + | if !run.passed { | |
| 56 | + | writeln!(out, "\nRaw output:\n{}", run.raw_output).unwrap(); | |
| 57 | + | } | |
| 58 | + | out | |
| 59 | + | } | |
| 60 | + | ||
| 61 | + | /// Format a single target's status block (health + latency + TLS + tests + staleness + incident) for CLI display. | |
| 62 | + | #[allow(clippy::too_many_arguments)] | |
| 63 | + | pub fn format_status_target( | |
| 64 | + | name: &str, | |
| 65 | + | label: &str, | |
| 66 | + | health: Option<&HealthSnapshot>, | |
| 67 | + | latency: Option<&LatencyStats>, | |
| 68 | + | tls: Option<&TlsCheckRow>, | |
| 69 | + | test: Option<&TestRun>, | |
| 70 | + | staleness: Option<&TestStaleness>, | |
| 71 | + | incident: Option<&IncidentRow>, | |
| 72 | + | ) -> String { | |
| 73 | + | let mut out = String::new(); | |
| 74 | + | writeln!(out, "=== {name} ({label}) ===").unwrap(); | |
| 75 | + | ||
| 76 | + | if let Some(h) = health { | |
| 77 | + | write!(out, " Health: [{}] {}", h.status.icon(), h.status).unwrap(); | |
| 78 | + | write!(out, " ({}ms)", h.response_time_ms).unwrap(); | |
| 79 | + | if let Some(d) = &h.details | |
| 80 | + | && let Some(v) = &d.version | |
| 81 | + | { | |
| 82 | + | write!(out, " v{v}").unwrap(); | |
| 83 | + | } | |
| 84 | + | writeln!(out).unwrap(); | |
| 85 | + | } else { | |
| 86 | + | writeln!(out, " Health: no data").unwrap(); | |
| 87 | + | } | |
| 88 | + | ||
| 89 | + | if let Some(l) = latency { | |
| 90 | + | writeln!( | |
| 91 | + | out, | |
| 92 | + | " Latency (24h): avg {:.0}ms, p95 {}ms, range {}-{}ms ({} samples)", | |
| 93 | + | l.avg_ms, l.p95_ms, l.min_ms, l.max_ms, l.sample_count | |
| 94 | + | ) | |
| 95 | + | .unwrap(); | |
| 96 | + | } | |
| 97 | + | ||
| 98 | + | if let Some(t) = tls { | |
| 99 | + | if let Some(ref err) = t.error { | |
| 100 | + | writeln!(out, " TLS: [ERR] {} \u{2014} {err}", t.host).unwrap(); | |
| 101 | + | } else if t.days_remaining <= 0 { | |
| 102 | + | writeln!(out, " TLS: [ERR] {} \u{2014} EXPIRED (expired {})", t.host, t.not_after).unwrap(); | |
| 103 | + | } else if t.days_remaining <= 14 { | |
| 104 | + | writeln!(out, " TLS: [WARN] {} \u{2014} {}d remaining (expires {})", t.host, t.days_remaining, t.not_after).unwrap(); | |
| 105 | + | } else { | |
| 106 | + | writeln!(out, " TLS: [OK] {} \u{2014} {}d remaining (expires {})", t.host, t.days_remaining, t.not_after).unwrap(); | |
| 107 | + | } | |
| 108 | + | } | |
| 109 | + | ||
| 110 | + | if let Some(t) = test { | |
| 111 | + | let result = if t.passed { "PASSED" } else { "FAILED" }; | |
| 112 | + | write!(out, " Tests: {result}").unwrap(); | |
| 113 | + | if let Some(d) = t.duration_secs { | |
| 114 | + | write!(out, " ({d}s)").unwrap(); | |
| 115 | + | } | |
| 116 | + | writeln!(out).unwrap(); | |
| 117 | + | if let (Some(p), Some(f)) = (t.summary.total_passed, t.summary.total_failed) { | |
| 118 | + | writeln!(out, " {p} passed, {f} failed").unwrap(); | |
| 119 | + | } | |
| 120 | + | } else { | |
| 121 | + | writeln!(out, " Tests: no data").unwrap(); | |
| 122 | + | } | |
| 123 | + | ||
| 124 | + | if let Some(s) = staleness | |
| 125 | + | && s.stale | |
| 126 | + | && let Some(reason) = &s.reason | |
| 127 | + | { | |
| 128 | + | writeln!(out, " Tests: STALE \u{2014} {reason}").unwrap(); | |
| 129 | + | } | |
| 130 | + | ||
| 131 | + | if let Some(inc) = incident { | |
| 132 | + | writeln!(out, " Incident: [ACTIVE] {} since {}", inc.to_status, inc.started_at).unwrap(); | |
| 133 | + | } | |
| 134 | + | ||
| 135 | + | writeln!(out).unwrap(); | |
| 136 | + | out | |
| 137 | + | } | |
| 138 | + | ||
| 139 | + | /// Format health check history for CLI display. | |
| 140 | + | pub fn format_health_history(history: &[HealthSnapshot]) -> String { | |
| 141 | + | if history.is_empty() { | |
| 142 | + | return "No health check history.\n".to_string(); | |
| 143 | + | } | |
| 144 | + | let mut out = String::new(); | |
| 145 | + | for h in history { | |
| 146 | + | writeln!( | |
| 147 | + | out, | |
| 148 | + | "[{}] {} \u{2014} {} ({}ms) {}", | |
| 149 | + | h.status.icon(), | |
| 150 | + | h.target, | |
| 151 | + | h.status, | |
| 152 | + | h.response_time_ms, | |
| 153 | + | h.checked_at | |
| 154 | + | ) | |
| 155 | + | .unwrap(); | |
| 156 | + | } | |
| 157 | + | out | |
| 158 | + | } | |
| 159 | + | ||
| 160 | + | /// Format test run history for CLI display. | |
| 161 | + | pub fn format_test_history(history: &[TestRun]) -> String { | |
| 162 | + | if history.is_empty() { | |
| 163 | + | return "No test run history.\n".to_string(); | |
| 164 | + | } | |
| 165 | + | let mut out = String::new(); | |
| 166 | + | for r in history { | |
| 167 | + | let result = if r.passed { "PASS" } else { "FAIL" }; | |
| 168 | + | write!(out, "[{result}] {}", r.target).unwrap(); | |
| 169 | + | if let Some(d) = r.duration_secs { | |
| 170 | + | write!(out, " ({d}s)").unwrap(); | |
| 171 | + | } | |
| 172 | + | write!(out, " {}", r.started_at).unwrap(); | |
| 173 | + | if let (Some(p), Some(f)) = (r.summary.total_passed, r.summary.total_failed) { | |
| 174 | + | write!(out, " \u{2014} {p} passed, {f} failed").unwrap(); | |
| 175 | + | } | |
| 176 | + | writeln!(out).unwrap(); | |
| 177 | + | } | |
| 178 | + | out | |
| 179 | + | } | |
| 180 | + | ||
| 181 | + | /// Format prune results for CLI display. | |
| 182 | + | pub fn format_prune(health_pruned: u64, test_pruned: u64, heartbeat_pruned: u64, alerts_pruned: u64, tls_pruned: u64, incidents_pruned: u64, days: i64) -> String { | |
| 183 | + | format!("Pruned {health_pruned} health checks, {test_pruned} test runs, {heartbeat_pruned} peer heartbeats, {alerts_pruned} alerts, {tls_pruned} TLS checks, {incidents_pruned} incidents older than {days} days.\n") | |
| 184 | + | } | |
| 185 | + | ||
| 186 | + | /// Format mesh data (from JSON) for human-readable CLI display. | |
| 187 | + | pub fn format_mesh(data: &serde_json::Value) -> String { | |
| 188 | + | let Some(instances) = data.get("instances").and_then(|v| v.as_object()) else { | |
| 189 | + | return "No mesh data available.\n".to_string(); | |
| 190 | + | }; | |
| 191 | + | ||
| 192 | + | let mut out = String::new(); | |
| 193 | + | for (name, instance_data) in instances { | |
| 194 | + | let instance = instance_data.get("instance"); | |
| 195 | + | let id = instance | |
| 196 | + | .and_then(|i| i.get("id")) | |
| 197 | + | .and_then(|v| v.as_str()) | |
| 198 | + | .unwrap_or("?"); | |
| 199 | + | let version = instance | |
| 200 | + | .and_then(|i| i.get("version")) | |
| 201 | + | .and_then(|v| v.as_str()) | |
| 202 | + | .unwrap_or("?"); | |
| 203 | + | ||
| 204 | + | writeln!(out, "=== {name} ===").unwrap(); | |
| 205 | + | writeln!(out, " ID: {id}").unwrap(); | |
| 206 | + | writeln!(out, " Version: {version}").unwrap(); | |
| 207 | + | ||
| 208 | + | // Targets | |
| 209 | + | if let Some(targets) = instance_data.get("targets").and_then(|v| v.as_object()) { | |
| 210 | + | for (target_name, target_data) in targets { | |
| 211 | + | let status = target_data | |
| 212 | + | .get("status") | |
| 213 | + | .and_then(|v| v.as_str()) | |
| 214 | + | .unwrap_or("?"); | |
| 215 | + | let ms = target_data | |
| 216 | + | .get("response_time_ms") | |
| 217 | + | .and_then(|v| v.as_i64()); | |
| 218 | + | let ms_str = ms.map(|m| format!(" ({m}ms)")).unwrap_or_default(); | |
| 219 | + | writeln!(out, " Target {target_name}: {status}{ms_str}").unwrap(); | |
| 220 | + | } | |
| 221 | + | } | |
| 222 | + | ||
| 223 | + | // Peers | |
| 224 | + | if let Some(peers) = instance_data.get("peers").and_then(|v| v.as_object()) { | |
| 225 | + | for (peer_name, peer_data) in peers { | |
| 226 | + | let status = peer_data | |
| 227 | + | .get("status") | |
| 228 | + | .and_then(|v| v.as_str()) | |
| 229 | + | .unwrap_or("?"); | |
| 230 | + | let latency = peer_data | |
| 231 | + | .get("latency_ms") | |
| 232 | + | .and_then(|v| v.as_u64()) | |
| 233 | + | .map(|ms| format!(" ({ms}ms)")) | |
| 234 | + | .unwrap_or_default(); | |
| 235 | + | writeln!(out, " Peer {peer_name}: {status}{latency}").unwrap(); | |
| 236 | + | } | |
| 237 | + | } | |
| 238 | + | ||
| 239 | + | // Error fallback | |
| 240 | + | if let Some(err) = instance_data.get("error").and_then(|v| v.as_str()) { | |
| 241 | + | writeln!(out, " ({err})").unwrap(); | |
| 242 | + | } | |
| 243 | + | ||
| 244 | + | writeln!(out).unwrap(); | |
| 245 | + | } | |
| 246 | + | out | |
| 247 | + | } | |
| 248 | + | ||
| 249 | + | #[cfg(test)] | |
| 250 | + | mod tests { | |
| 251 | + | use super::*; | |
| 252 | + | use crate::types::*; | |
| 253 | + | ||
| 254 | + | // --- format_health_snapshot --- | |
| 255 | + | ||
| 256 | + | #[test] | |
| 257 | + | fn health_snapshot_operational_with_details() { | |
| 258 | + | let s = HealthSnapshot { | |
| 259 | + | id: None, | |
| 260 | + | target: "mnw".to_string(), | |
| 261 | + | status: HealthStatus::Operational, | |
| 262 | + | checked_at: "2026-03-10T00:00:00Z".to_string(), | |
| 263 | + | response_time_ms: 95, | |
| 264 | + | details: Some(HealthDetails { | |
| 265 | + | version: Some("1.2.0".to_string()), | |
| 266 | + | uptime: Some("5d 3h".to_string()), | |
| 267 | + | checks: None, | |
| 268 | + | monitoring: None, | |
| 269 | + | }), | |
| 270 | + | error: None, | |
| 271 | + | }; | |
| 272 | + | let out = format_health_snapshot(&s); | |
| 273 | + | assert!(out.contains("[OK]")); | |
| 274 | + | assert!(out.contains("mnw")); | |
| 275 | + | assert!(out.contains("operational")); | |
| 276 | + | assert!(out.contains("(95ms)")); | |
| 277 | + | assert!(out.contains("v1.2.0")); | |
| 278 | + | assert!(out.contains("up 5d 3h")); | |
| 279 | + | } | |
| 280 | + | ||
| 281 | + | #[test] | |
| 282 | + | fn health_snapshot_unreachable_with_error() { | |
| 283 | + | let s = HealthSnapshot { | |
| 284 | + | id: None, | |
| 285 | + | target: "api".to_string(), | |
| 286 | + | status: HealthStatus::Unreachable, | |
| 287 | + | checked_at: "2026-03-10T00:00:00Z".to_string(), | |
| 288 | + | response_time_ms: 0, | |
| 289 | + | details: None, | |
| 290 | + | error: Some("connection refused".to_string()), | |
| 291 | + | }; | |
| 292 | + | let out = format_health_snapshot(&s); | |
| 293 | + | assert!(out.contains("[DOWN]")); | |
| 294 | + | assert!(out.contains("unreachable")); | |
| 295 | + | assert!(out.contains("connection refused")); | |
| 296 | + | } | |
| 297 | + | ||
| 298 | + | #[test] | |
| 299 | + | fn health_snapshot_degraded_no_details() { | |
| 300 | + | let s = HealthSnapshot { | |
| 301 | + | id: None, | |
| 302 | + | target: "svc".to_string(), | |
| 303 | + | status: HealthStatus::Degraded, | |
| 304 | + | checked_at: "2026-03-10T00:00:00Z".to_string(), | |
| 305 | + | response_time_ms: 2500, | |
| 306 | + | details: None, | |
| 307 | + | error: None, | |
| 308 | + | }; | |
| 309 | + | let out = format_health_snapshot(&s); | |
| 310 | + | assert!(out.contains("[WARN]")); | |
| 311 | + | assert!(out.contains("degraded")); | |
| 312 | + | assert!(out.contains("(2500ms)")); | |
| 313 | + | assert!(!out.contains("up ")); | |
| 314 | + | assert!(!out.contains(" v")); | |
| 315 | + | } | |
| 316 | + | ||
| 317 | + | #[test] | |
| 318 | + | fn health_snapshot_error_status() { | |
| 319 | + | let s = HealthSnapshot { | |
| 320 | + | id: None, | |
| 321 | + | target: "db".to_string(), | |
| 322 | + | status: HealthStatus::Error, | |
| 323 | + | checked_at: "2026-03-10T00:00:00Z".to_string(), | |
| 324 | + | response_time_ms: 500, | |
| 325 | + | details: None, | |
| 326 | + | error: Some("500 internal server error".to_string()), | |
| 327 | + | }; | |
| 328 | + | let out = format_health_snapshot(&s); | |
| 329 | + | assert!(out.contains("[ERR]")); | |
| 330 | + | assert!(out.contains("error")); | |
| 331 | + | assert!(out.contains("500 internal server error")); | |
| 332 | + | } | |
| 333 | + | ||
| 334 | + | #[test] | |
| 335 | + | fn health_snapshots_multiple() { | |
| 336 | + | let snapshots = vec![ | |
| 337 | + | HealthSnapshot { | |
| 338 | + | id: None, | |
| 339 | + | target: "a".to_string(), | |
| 340 | + | status: HealthStatus::Operational, | |
| 341 | + | checked_at: "2026-03-10T00:00:00Z".to_string(), | |
| 342 | + | response_time_ms: 50, | |
| 343 | + | details: None, | |
| 344 | + | error: None, | |
| 345 | + | }, | |
| 346 | + | HealthSnapshot { | |
| 347 | + | id: None, | |
| 348 | + | target: "b".to_string(), | |
| 349 | + | status: HealthStatus::Degraded, | |
| 350 | + | checked_at: "2026-03-10T00:00:00Z".to_string(), | |
| 351 | + | response_time_ms: 3000, | |
| 352 | + | details: None, | |
| 353 | + | error: None, | |
| 354 | + | }, | |
| 355 | + | ]; | |
| 356 | + | let out = format_health_snapshots(&snapshots); | |
| 357 | + | assert!(out.contains("[OK]")); | |
| 358 | + | assert!(out.contains("[WARN]")); | |
| 359 | + | assert!(out.contains("a")); | |
| 360 | + | assert!(out.contains("b")); | |
| 361 | + | } | |
| 362 | + | ||
| 363 | + | // --- format_test_result --- | |
| 364 | + | ||
| 365 | + | #[test] | |
| 366 | + | fn test_result_passed() { | |
| 367 | + | let run = TestRun { | |
| 368 | + | id: None, | |
| 369 | + | target: "mnw".to_string(), | |
| 370 | + | started_at: "2026-03-10T00:00:00Z".to_string(), | |
| 371 | + | finished_at: Some("2026-03-10T00:02:00Z".to_string()), | |
| 372 | + | duration_secs: Some(120), | |
| 373 | + | exit_code: Some(0), | |
| 374 | + | passed: true, | |
| 375 | + | summary: TestSummary { | |
| 376 | + | steps: vec![ | |
| 377 | + | StepResult { name: "cargo check".to_string(), passed: true }, | |
| 378 | + | StepResult { name: "cargo test".to_string(), passed: true }, | |
| 379 | + | ], | |
| 380 | + | total_passed: Some(759), | |
| 381 | + | total_failed: Some(0), | |
| 382 | + | }, | |
| 383 | + | raw_output: String::new(), | |
| 384 | + | filter: None, | |
| 385 | + | }; | |
| 386 | + | let out = format_test_result("mnw", &run); | |
| 387 | + | assert!(out.contains("mnw: PASSED")); | |
| 388 | + | assert!(out.contains("Duration: 120s")); | |
| 389 | + | assert!(out.contains("Tests: 759 passed, 0 failed")); | |
| 390 | + | assert!(out.contains("PASS cargo check")); | |
| 391 | + | assert!(out.contains("PASS cargo test")); | |
| 392 | + | assert!(!out.contains("Raw output")); | |
| 393 | + | } | |
| 394 | + | ||
| 395 | + | #[test] | |
| 396 | + | fn test_result_failed_shows_raw_output() { | |
| 397 | + | let run = TestRun { | |
| 398 | + | id: None, | |
| 399 | + | target: "mnw".to_string(), | |
| 400 | + | started_at: "2026-03-10T00:00:00Z".to_string(), | |
| 401 | + | finished_at: Some("2026-03-10T00:01:00Z".to_string()), | |
| 402 | + | duration_secs: Some(60), | |
| 403 | + | exit_code: Some(1), | |
| 404 | + | passed: false, | |
| 405 | + | summary: TestSummary { | |
| 406 | + | steps: vec![ | |
| 407 | + | StepResult { name: "cargo check".to_string(), passed: true }, | |
| 408 | + | StepResult { name: "cargo test".to_string(), passed: false }, | |
| 409 | + | ], | |
| 410 | + | total_passed: Some(750), | |
| 411 | + | total_failed: Some(9), | |
| 412 | + | }, | |
| 413 | + | raw_output: "thread 'test_foo' panicked at 'assertion failed'".to_string(), | |
| 414 | + | filter: None, | |
| 415 | + | }; | |
| 416 | + | let out = format_test_result("mnw", &run); | |
| 417 | + | assert!(out.contains("mnw: FAILED")); | |
| 418 | + | assert!(out.contains("PASS cargo check")); | |
| 419 | + | assert!(out.contains("FAIL cargo test")); | |
| 420 | + | assert!(out.contains("750 passed, 9 failed")); | |
| 421 | + | assert!(out.contains("Raw output:")); | |
| 422 | + | assert!(out.contains("assertion failed")); | |
| 423 | + | } | |
| 424 | + | ||
| 425 | + | #[test] | |
| 426 | + | fn test_result_no_duration_or_counts() { | |
| 427 | + | let run = TestRun { | |
| 428 | + | id: None, | |
| 429 | + | target: "svc".to_string(), | |
| 430 | + | started_at: "2026-03-10T00:00:00Z".to_string(), | |
| 431 | + | finished_at: None, | |
| 432 | + | duration_secs: None, | |
| 433 | + | exit_code: None, | |
| 434 | + | passed: true, | |
| 435 | + | summary: TestSummary { | |
| 436 | + | steps: vec![], | |
| 437 | + | total_passed: None, | |
| 438 | + | total_failed: None, | |
| 439 | + | }, | |
| 440 | + | raw_output: String::new(), | |
| 441 | + | filter: None, | |
| 442 | + | }; | |
| 443 | + | let out = format_test_result("svc", &run); | |
| 444 | + | assert!(out.contains("svc: PASSED")); | |
| 445 | + | assert!(!out.contains("Duration:")); | |
| 446 | + | assert!(!out.contains("Tests:")); | |
| 447 | + | } | |
| 448 | + | ||
| 449 | + | // --- format_status_target --- | |
| 450 | + | ||
| 451 | + | #[test] | |
| 452 | + | fn status_target_with_health_and_tests() { | |
| 453 | + | let health = HealthSnapshot { | |
| 454 | + | id: None, | |
| 455 | + | target: "mnw".to_string(), | |
| 456 | + | status: HealthStatus::Operational, | |
| 457 | + | checked_at: "2026-03-10T00:00:00Z".to_string(), | |
| 458 | + | response_time_ms: 95, | |
| 459 | + | details: Some(HealthDetails { | |
| 460 | + | version: Some("2.1.0".to_string()), | |
| 461 | + | uptime: None, | |
| 462 | + | checks: None, | |
| 463 | + | monitoring: None, | |
| 464 | + | }), | |
| 465 | + | error: None, | |
| 466 | + | }; | |
| 467 | + | let test = TestRun { | |
| 468 | + | id: None, | |
| 469 | + | target: "mnw".to_string(), | |
| 470 | + | started_at: "2026-03-10T00:00:00Z".to_string(), | |
| 471 | + | finished_at: Some("2026-03-10T00:01:00Z".to_string()), | |
| 472 | + | duration_secs: Some(60), | |
| 473 | + | exit_code: Some(0), | |
| 474 | + | passed: true, | |
| 475 | + | summary: TestSummary { | |
| 476 | + | steps: vec![], | |
| 477 | + | total_passed: Some(100), | |
| 478 | + | total_failed: Some(0), | |
| 479 | + | }, | |
| 480 | + | raw_output: String::new(), | |
| 481 | + | filter: None, | |
| 482 | + | }; | |
| 483 | + | let out = format_status_target("mnw", "MakeNotWork", Some(&health), None, None, Some(&test), None, None); | |
| 484 | + | assert!(out.contains("=== mnw (MakeNotWork) ===")); | |
| 485 | + | assert!(out.contains("Health: [OK] operational (95ms) v2.1.0")); | |
| 486 | + | assert!(out.contains("Tests: PASSED (60s)")); | |
| 487 | + | assert!(out.contains("100 passed, 0 failed")); | |
| 488 | + | } | |
| 489 | + | ||
| 490 | + | #[test] | |
| 491 | + | fn status_target_no_data() { | |
| 492 | + | let out = format_status_target("mnw", "MakeNotWork", None, None, None, None, None, None); | |
| 493 | + | assert!(out.contains("=== mnw (MakeNotWork) ===")); | |
| 494 | + | assert!(out.contains("Health: no data")); | |
| 495 | + | assert!(out.contains("Tests: no data")); | |
| 496 | + | } | |
| 497 | + | ||
| 498 | + | #[test] | |
| 499 | + | fn status_target_health_only() { | |
| 500 | + | let health = HealthSnapshot { |
Lines truncated
| @@ -0,0 +1,32 @@ | |||
| 1 | + | //! Typed error enum for PoM, replacing `Box<dyn Error>` throughout the crate. | |
| 2 | + | ||
| 3 | + | use thiserror::Error; | |
| 4 | + | ||
| 5 | + | #[derive(Debug, Error)] | |
| 6 | + | pub enum PomError { | |
| 7 | + | #[error(transparent)] | |
| 8 | + | Io(#[from] std::io::Error), | |
| 9 | + | ||
| 10 | + | #[error(transparent)] | |
| 11 | + | Db(#[from] sqlx::Error), | |
| 12 | + | ||
| 13 | + | #[error(transparent)] | |
| 14 | + | TomlParse(#[from] toml::de::Error), | |
| 15 | + | ||
| 16 | + | #[error(transparent)] | |
| 17 | + | Http(#[from] reqwest::Error), | |
| 18 | + | ||
| 19 | + | #[error(transparent)] | |
| 20 | + | Json(#[from] serde_json::Error), | |
| 21 | + | ||
| 22 | + | #[error(transparent)] | |
| 23 | + | LogFilter(#[from] tracing_subscriber::filter::ParseError), | |
| 24 | + | ||
| 25 | + | #[error(transparent)] | |
| 26 | + | Join(#[from] tokio::task::JoinError), | |
| 27 | + | ||
| 28 | + | #[error("{0}")] | |
| 29 | + | Config(String), | |
| 30 | + | } | |
| 31 | + | ||
| 32 | + | pub type Result<T> = std::result::Result<T, PomError>; |
| @@ -1,5 +1,14 @@ | |||
| 1 | + | //! PoM — Production Operations Monitor. | |
| 2 | + | //! | |
| 3 | + | //! Health checks, test orchestration, and peer mesh for monitoring deployed services. | |
| 4 | + | ||
| 5 | + | pub mod alerts; | |
| 6 | + | pub mod api; | |
| 1 | 7 | pub mod checks; | |
| 2 | 8 | pub mod config; | |
| 3 | 9 | pub mod db; | |
| 10 | + | pub mod display; | |
| 11 | + | pub mod error; | |
| 12 | + | pub mod peer; | |
| 4 | 13 | pub mod tools; | |
| 5 | 14 | pub mod types; |
| @@ -1,14 +1,17 @@ | |||
| 1 | + | //! PoM CLI entry point — parses subcommands and dispatches to handlers or MCP server. | |
| 2 | + | ||
| 1 | 3 | use clap::{Parser, Subcommand}; | |
| 2 | 4 | use rmcp::ServiceExt; | |
| 3 | 5 | use tokio::io::{stdin, stdout}; | |
| 4 | 6 | use tracing::info; | |
| 5 | 7 | use tracing_subscriber::{fmt, prelude::*, EnvFilter}; | |
| 6 | 8 | ||
| 7 | - | use pom::checks::{http, ssh}; | |
| 8 | 9 | use pom::config::{self, Config}; | |
| 9 | 10 | use pom::db; | |
| 11 | + | use pom::error::Result; | |
| 10 | 12 | use pom::tools::PomServer; | |
| 11 | - | use pom::types::HealthStatus; | |
| 13 | + | ||
| 14 | + | mod cli; | |
| 12 | 15 | ||
| 13 | 16 | #[derive(Parser)] | |
| 14 | 17 | #[command(name = "pom", about = "Peace of Mind — health checks and test orchestration")] | |
| @@ -51,7 +54,7 @@ enum Commands { | |||
| 51 | 54 | /// View history | |
| 52 | 55 | History { | |
| 53 | 56 | #[command(subcommand)] | |
| 54 | - | kind: HistoryKind, | |
| 57 | + | kind: cli::HistoryKind, | |
| 55 | 58 | }, | |
| 56 | 59 | /// Prune old records | |
| 57 | 60 | Prune { | |
| @@ -61,28 +64,8 @@ enum Commands { | |||
| 61 | 64 | }, | |
| 62 | 65 | /// Run as a daemon, checking health at intervals | |
| 63 | 66 | Serve, | |
| 64 | - | } | |
| 65 | - | ||
| 66 | - | #[derive(Subcommand)] | |
| 67 | - | enum HistoryKind { | |
| 68 | - | /// Health check history | |
| 69 | - | Health { | |
| 70 | - | /// Filter by target | |
| 71 | - | target: Option<String>, | |
| 72 | - | /// Number of results | |
| 73 | - | #[arg(short, default_value = "10")] | |
| 74 | - | n: i64, | |
| 75 | - | /// Output as JSON | |
| 76 | - | #[arg(long)] | |
| 77 | - | json: bool, | |
| 78 | - | }, | |
| 79 | - | /// Test run history | |
| 80 | - | Tests { | |
| 81 | - | /// Filter by target | |
| 82 | - | target: Option<String>, | |
| 83 | - | /// Number of results | |
| 84 | - | #[arg(short, default_value = "10")] | |
| 85 | - | n: i64, | |
| 67 | + | /// Show peer mesh status | |
| 68 | + | Mesh { | |
| 86 | 69 | /// Output as JSON | |
| 87 | 70 | #[arg(long)] | |
| 88 | 71 | json: bool, | |
| @@ -90,7 +73,12 @@ enum HistoryKind { | |||
| 90 | 73 | } | |
| 91 | 74 | ||
| 92 | 75 | #[tokio::main] | |
| 93 | - | async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> { | |
| 76 | + | async fn main() -> Result<()> { | |
| 77 | + | // Install the default rustls crypto provider before any TLS operations. | |
| 78 | + | // Both aws-lc-rs and ring are in the dependency tree (via reqwest and tokio-rustls), | |
| 79 | + | // so rustls can't auto-detect which to use. | |
| 80 | + | let _ = tokio_rustls::rustls::crypto::ring::default_provider().install_default(); | |
| 81 | + | ||
| 94 | 82 | let cli = Cli::parse(); | |
| 95 | 83 | ||
| 96 | 84 | let config_path = cli.config.as_deref(); | |
| @@ -102,7 +90,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> { | |||
| 102 | 90 | } | |
| 103 | 91 | } | |
| 104 | 92 | ||
| 105 | - | async fn run_mcp_server(config: Config) -> Result<(), Box<dyn std::error::Error + Send + Sync>> { | |
| 93 | + | async fn run_mcp_server(config: Config) -> Result<()> { | |
| 106 | 94 | tracing_subscriber::registry() | |
| 107 | 95 | .with(fmt::layer().with_writer(std::io::stderr)) | |
| 108 | 96 | .with(EnvFilter::from_default_env().add_directive("pom=info".parse()?)) | |
| @@ -128,7 +116,7 @@ async fn run_mcp_server(config: Config) -> Result<(), Box<dyn std::error::Error | |||
| 128 | 116 | async fn run_cli( | |
| 129 | 117 | cmd: Commands, | |
| 130 | 118 | config: Config, | |
| 131 | - | ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> { | |
| 119 | + | ) -> Result<()> { | |
| 132 | 120 | let log_level = if matches!(cmd, Commands::Serve) { "pom=info" } else { "pom=warn" }; | |
| 133 | 121 | tracing_subscriber::registry() | |
| 134 | 122 | .with(fmt::layer().with_writer(std::io::stderr)) | |
| @@ -139,335 +127,12 @@ async fn run_cli( | |||
| 139 | 127 | let pool = db::connect(&db_path).await?; | |
| 140 | 128 | ||
| 141 | 129 | match cmd { | |
| 142 | - | Commands::Health { target, json } => cmd_health(&pool, &config, target.as_deref(), json).await, | |
| 143 | - | Commands::Test { target, filter, json } => cmd_test(&pool, &config, &target, filter.as_deref(), json).await, | |
| 144 | - | Commands::Status { json } => cmd_status(&pool, &config, json).await, | |
| 145 | - | Commands::History { kind } => cmd_history(&pool, kind).await, | |
| 146 | - | Commands::Prune { days } => cmd_prune(&pool, days).await, | |
| 147 | - | Commands::Serve => cmd_serve(&pool, &config).await, | |
| 148 | - | } | |
| 149 | - | } | |
| 150 | - | ||
| 151 | - | async fn cmd_health( | |
| 152 | - | pool: &sqlx::SqlitePool, | |
| 153 | - | config: &Config, | |
| 154 | - | target: Option<&str>, | |
| 155 | - | json: bool, | |
| 156 | - | ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> { | |
| 157 | - | let targets: Vec<String> = match target { | |
| 158 | - | Some(t) => { | |
| 159 | - | if config.get_target(t).is_none() { | |
| 160 | - | eprintln!("Unknown target: {t}"); | |
| 161 | - | std::process::exit(1); | |
| 162 | - | } | |
| 163 | - | vec![t.to_string()] | |
| 164 | - | } | |
| 165 | - | None => config.target_names(), | |
| 166 | - | }; | |
| 167 | - | ||
| 168 | - | let mut snapshots = Vec::new(); | |
| 169 | - | ||
| 170 | - | for name in &targets { | |
| 171 | - | let target_config = config.get_target(name).unwrap(); | |
| 172 | - | if let Some(health_config) = &target_config.health { | |
| 173 | - | let snapshot = http::check_health(name, health_config).await; | |
| 174 | - | db::insert_health_check(pool, &snapshot).await?; | |
| 175 | - | snapshots.push(snapshot); | |
| 176 | - | } else { | |
| 177 | - | eprintln!("{name}: no health endpoint configured"); | |
| 178 | - | } | |
| 179 | - | } | |
| 180 | - | ||
| 181 | - | if json { | |
| 182 | - | println!("{}", serde_json::to_string_pretty(&snapshots)?); | |
| 183 | - | } else { | |
| 184 | - | for s in &snapshots { | |
| 185 | - | let icon = match s.status { | |
| 186 | - | HealthStatus::Operational => "OK", | |
| 187 | - | HealthStatus::Degraded => "WARN", | |
| 188 | - | HealthStatus::Error => "ERR", | |
| 189 | - | HealthStatus::Unreachable => "DOWN", | |
| 190 | - | }; | |
| 191 | - | print!("[{icon}] {} — {}", s.target, s.status); | |
| 192 | - | print!(" ({}ms)", s.response_time_ms); | |
| 193 | - | if let Some(details) = &s.details { | |
| 194 | - | if let Some(v) = &details.version { | |
| 195 | - | print!(" v{v}"); | |
| 196 | - | } | |
| 197 | - | if let Some(u) = &details.uptime { | |
| 198 | - | print!(" up {u}"); | |
| 199 | - | } | |
| 200 | - | } | |
| 201 | - | println!(); | |
| 202 | - | if let Some(err) = &s.error { | |
| 203 | - | println!(" {err}"); | |
| 204 | - | } | |
| 205 | - | } | |
| 206 | - | } | |
| 207 | - | ||
| 208 | - | Ok(()) | |
| 209 | - | } | |
| 210 | - | ||
| 211 | - | async fn cmd_test( | |
| 212 | - | pool: &sqlx::SqlitePool, | |
| 213 | - | config: &Config, | |
| 214 | - | target_name: &str, | |
| 215 | - | filter: Option<&str>, | |
| 216 | - | json: bool, | |
| 217 | - | ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> { | |
| 218 | - | let target = config.get_target(target_name).ok_or_else(|| { | |
| 219 | - | format!("Unknown target: {target_name}") | |
| 220 | - | })?; | |
| 221 | - | let tests_config = target.tests.as_ref().ok_or_else(|| { | |
| 222 | - | format!("Target '{target_name}' has no test configuration") | |
| 223 | - | })?; | |
| 224 | - | ||
| 225 | - | eprintln!("Running tests on {target_name}..."); | |
| 226 | - | let run = ssh::run_tests(target_name, tests_config, filter).await; | |
| 227 | - | db::insert_test_run(pool, &run).await?; | |
| 228 | - | ||
| 229 | - | if json { | |
| 230 | - | let summary = serde_json::json!({ | |
| 231 | - | "target": run.target, | |
| 232 | - | "passed": run.passed, | |
| 233 | - | "exit_code": run.exit_code, | |
| 234 | - | "duration_secs": run.duration_secs, | |
| 235 | - | "started_at": run.started_at, | |
| 236 | - | "finished_at": run.finished_at, | |
| 237 | - | "filter": run.filter, | |
| 238 | - | "summary": run.summary, | |
| 239 | - | }); | |
| 240 | - | println!("{}", serde_json::to_string_pretty(&summary)?); | |
| 241 | - | } else { | |
| 242 | - | let result = if run.passed { "PASSED" } else { "FAILED" }; | |
| 243 | - | println!("{target_name}: {result}"); | |
| 244 | - | if let Some(d) = run.duration_secs { | |
| 245 | - | println!("Duration: {d}s"); | |
| 246 | - | } | |
| 247 | - | if let (Some(p), Some(f)) = (run.summary.total_passed, run.summary.total_failed) { | |
| 248 | - | println!("Tests: {p} passed, {f} failed"); | |
| 249 | - | } | |
| 250 | - | for step in &run.summary.steps { | |
| 251 | - | let mark = if step.passed { "PASS" } else { "FAIL" }; | |
| 252 | - | println!(" {mark} {}", step.name); | |
| 253 | - | } | |
| 254 | - | if !run.passed { | |
| 255 | - | println!("\nRaw output:\n{}", run.raw_output); | |
| 256 | - | } | |
| 130 | + | Commands::Health { target, json } => cli::cmd_health(&pool, &config, target.as_deref(), json).await, | |
| 131 | + | Commands::Test { target, filter, json } => cli::cmd_test(&pool, &config, &target, filter.as_deref(), json).await, | |
| 132 | + | Commands::Status { json } => cli::cmd_status(&pool, &config, json).await, | |
| 133 | + | Commands::History { kind } => cli::cmd_history(&pool, kind).await, | |
| 134 | + | Commands::Prune { days } => cli::cmd_prune(&pool, days).await, | |
| 135 | + | Commands::Serve => cli::cmd_serve(&pool, &config).await, | |
| 136 | + | Commands::Mesh { json } => cli::cmd_mesh(&config, json).await, | |
| 257 | 137 | } | |
| 258 | - | ||
| 259 | - | Ok(()) | |
| 260 | - | } | |
| 261 | - | ||
| 262 | - | async fn cmd_status( | |
| 263 | - | pool: &sqlx::SqlitePool, | |
| 264 | - | config: &Config, | |
| 265 | - | json: bool, | |
| 266 | - | ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> { | |
| 267 | - | let mut target_statuses = Vec::new(); | |
| 268 | - | ||
| 269 | - | for name in config.target_names() { | |
| 270 | - | let target = config.get_target(&name).unwrap(); | |
| 271 | - | let health = db::get_latest_health(pool, &name).await?; | |
| 272 | - | let test = db::get_latest_test_run(pool, &name).await?; | |
| 273 | - | ||
| 274 | - | if json { | |
| 275 | - | target_statuses.push(serde_json::json!({ | |
| 276 | - | "target": name, | |
| 277 | - | "label": target.label, | |
| 278 | - | "health": health, | |
| 279 | - | "last_test": test.map(|t| serde_json::json!({ | |
| 280 | - | "passed": t.passed, | |
| 281 | - | "exit_code": t.exit_code, | |
| 282 | - | "duration_secs": t.duration_secs, | |
| 283 | - | "started_at": t.started_at, | |
| 284 | - | "summary": t.summary, | |
| 285 | - | })), | |
| 286 | - | })); | |
| 287 | - | } else { | |
| 288 | - | println!("=== {} ({}) ===", name, target.label); | |
| 289 | - | if let Some(h) = &health { | |
| 290 | - | let icon = match h.status { | |
| 291 | - | HealthStatus::Operational => "OK", | |
| 292 | - | HealthStatus::Degraded => "WARN", | |
| 293 | - | HealthStatus::Error => "ERR", | |
| 294 | - | HealthStatus::Unreachable => "DOWN", | |
| 295 | - | }; | |
| 296 | - | print!(" Health: [{icon}] {}", h.status); | |
| 297 | - | print!(" ({}ms)", h.response_time_ms); | |
| 298 | - | if let Some(d) = &h.details { | |
| 299 | - | if let Some(v) = &d.version { | |
| 300 | - | print!(" v{v}"); | |
| 301 | - | } | |
| 302 | - | } | |
| 303 | - | println!(); | |
| 304 | - | } else { | |
| 305 | - | println!(" Health: no data"); | |
| 306 | - | } | |
| 307 | - | ||
| 308 | - | if let Some(t) = &test { | |
| 309 | - | let result = if t.passed { "PASSED" } else { "FAILED" }; | |
| 310 | - | print!(" Tests: {result}"); | |
| 311 | - | if let Some(d) = t.duration_secs { | |
| 312 | - | print!(" ({d}s)"); | |
| 313 | - | } | |
| 314 | - | println!(); | |
| 315 | - | if let (Some(p), Some(f)) = (t.summary.total_passed, t.summary.total_failed) { | |
| 316 | - | println!(" {p} passed, {f} failed"); | |
| 317 | - | } | |
| 318 | - | } else { | |
| 319 | - | println!(" Tests: no data"); | |
| 320 | - | } | |
| 321 | - | println!(); | |
| 322 | - | } | |
| 323 | - | } | |
| 324 | - | ||
| 325 | - | if json { | |
| 326 | - | println!("{}", serde_json::to_string_pretty(&target_statuses)?); | |
| 327 | - | } | |
| 328 | - | ||
| 329 | - | Ok(()) | |
| 330 | - | } | |
| 331 | - | ||
| 332 | - | async fn cmd_history( | |
| 333 | - | pool: &sqlx::SqlitePool, | |
| 334 | - | kind: HistoryKind, | |
| 335 | - | ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> { | |
| 336 | - | match kind { | |
| 337 | - | HistoryKind::Health { target, n, json } => { | |
| 338 | - | let history = db::get_health_history(pool, target.as_deref(), n).await?; | |
| 339 | - | if json { | |
| 340 | - | println!("{}", serde_json::to_string_pretty(&history)?); | |
| 341 | - | } else if history.is_empty() { | |
| 342 | - | println!("No health check history."); | |
| 343 | - | } else { | |
| 344 | - | for h in &history { | |
| 345 | - | let icon = match h.status { | |
| 346 | - | HealthStatus::Operational => "OK", | |
| 347 | - | HealthStatus::Degraded => "WARN", | |
| 348 | - | HealthStatus::Error => "ERR", | |
| 349 | - | HealthStatus::Unreachable => "DOWN", | |
| 350 | - | }; | |
| 351 | - | println!("[{icon}] {} — {} ({}ms) {}", h.target, h.status, h.response_time_ms, h.checked_at); | |
| 352 | - | } | |
| 353 | - | } | |
| 354 | - | } | |
| 355 | - | HistoryKind::Tests { target, n, json } => { | |
| 356 | - | let history = db::get_test_history(pool, target.as_deref(), n).await?; | |
| 357 | - | if json { | |
| 358 | - | let summaries: Vec<serde_json::Value> = history | |
| 359 | - | .iter() | |
| 360 | - | .map(|r| serde_json::json!({ | |
| 361 | - | "id": r.id, | |
| 362 | - | "target": r.target, | |
| 363 | - | "passed": r.passed, | |
| 364 | - | "exit_code": r.exit_code, | |
| 365 | - | "duration_secs": r.duration_secs, | |
| 366 | - | "started_at": r.started_at, | |
| 367 | - | "summary": r.summary, | |
| 368 | - | })) | |
| 369 | - | .collect(); | |
| 370 | - | println!("{}", serde_json::to_string_pretty(&summaries)?); | |
| 371 | - | } else if history.is_empty() { | |
| 372 | - | println!("No test run history."); | |
| 373 | - | } else { | |
| 374 | - | for r in &history { | |
| 375 | - | let result = if r.passed { "PASS" } else { "FAIL" }; | |
| 376 | - | print!("[{result}] {}", r.target); | |
| 377 | - | if let Some(d) = r.duration_secs { | |
| 378 | - | print!(" ({d}s)"); | |
| 379 | - | } | |
| 380 | - | print!(" {}", r.started_at); | |
| 381 | - | if let (Some(p), Some(f)) = (r.summary.total_passed, r.summary.total_failed) { | |
| 382 | - | print!(" — {p} passed, {f} failed"); | |
| 383 | - | } | |
| 384 | - | println!(); | |
| 385 | - | } | |
| 386 | - | } | |
| 387 | - | } | |
| 388 | - | } | |
| 389 | - | ||
| 390 | - | Ok(()) | |
| 391 | - | } | |
| 392 | - | ||
| 393 | - | async fn cmd_prune( | |
| 394 | - | pool: &sqlx::SqlitePool, | |
| 395 | - | days: i64, | |
| 396 | - | ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> { | |
| 397 | - | let (health_pruned, test_pruned) = db::prune_old_records(pool, days).await?; | |
| 398 | - | println!("Pruned {health_pruned} health checks and {test_pruned} test runs older than {days} days."); | |
| 399 | - | Ok(()) | |
| 400 | - | } | |
| 401 | - | ||
| 402 | - | async fn cmd_serve( | |
| 403 | - | pool: &sqlx::SqlitePool, | |
| 404 | - | config: &Config, | |
| 405 | - | ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> { | |
| 406 | - | let default_interval = config.serve.interval_secs; | |
| 407 | - | let prune_days = config.serve.prune_days; | |
| 408 | - | ||
| 409 | - | info!("Starting serve mode (default interval: {default_interval}s, prune: {prune_days}d)"); | |
| 410 | - | ||
| 411 | - | // Spawn a health check task per target | |
| 412 | - | let mut handles = Vec::new(); | |
| 413 | - | ||
| 414 | - | for name in config.target_names() { | |
| 415 | - | let target_config = config.get_target(&name).unwrap().clone(); | |
| 416 | - | if let Some(health_config) = target_config.health { | |
| 417 | - | let interval_secs = health_config.interval_secs.unwrap_or(default_interval); | |
| 418 | - | let pool = pool.clone(); | |
| 419 | - | let name = name.clone(); | |
| 420 | - | ||
| 421 | - | info!("{name}: health check every {interval_secs}s"); | |
| 422 | - | ||
| 423 | - | handles.push(tokio::spawn(async move { | |
| 424 | - | let mut interval = tokio::time::interval( | |
| 425 | - | std::time::Duration::from_secs(interval_secs), | |
| 426 | - | ); | |
| 427 | - | loop { | |
| 428 | - | interval.tick().await; | |
| 429 | - | let snapshot = http::check_health(&name, &health_config).await; | |
| 430 | - | info!("{}: {} ({}ms)", name, snapshot.status, snapshot.response_time_ms); | |
| 431 | - | if let Err(e) = db::insert_health_check(&pool, &snapshot).await { | |
| 432 | - | tracing::error!("{name}: failed to store health check: {e}"); | |
| 433 | - | } | |
| 434 | - | } | |
| 435 | - | })); | |
| 436 | - | } | |
| 437 | - | } | |
| 438 | - | ||
| 439 | - | // Spawn daily prune task | |
| 440 | - | let prune_pool = pool.clone(); | |
| 441 | - | handles.push(tokio::spawn(async move { | |
| 442 | - | let mut interval = tokio::time::interval( | |
| 443 | - | std::time::Duration::from_secs(86400), | |
| 444 | - | ); | |
| 445 | - | loop { | |
| 446 | - | interval.tick().await; | |
| 447 | - | match db::prune_old_records(&prune_pool, prune_days).await { | |
| 448 | - | Ok((h, t)) => info!("Pruned {h} health checks, {t} test runs"), | |
| 449 | - | Err(e) => tracing::error!("Prune failed: {e}"), | |
| 450 | - | } | |
| 451 | - | } | |
| 452 | - | })); | |
| 453 | - | ||
| 454 | - | // Wait for shutdown signal | |
| 455 | - | let mut sigterm = tokio::signal::unix::signal( | |
| 456 | - | tokio::signal::unix::SignalKind::terminate(), | |
| 457 | - | )?; | |
| 458 | - | ||
| 459 | - | tokio::select! { | |
| 460 | - | _ = tokio::signal::ctrl_c() => { | |
| 461 | - | info!("Received SIGINT, shutting down"); | |
| 462 | - | } | |
| 463 | - | _ = sigterm.recv() => { | |
| 464 | - | info!("Received SIGTERM, shutting down"); | |
| 465 | - | } | |
| 466 | - | } | |
| 467 | - | ||
| 468 | - | for handle in handles { | |
| 469 | - | handle.abort(); | |
| 470 | - | } | |
| 471 | - | ||
| 472 | - | Ok(()) | |
| 473 | 138 | } |
| @@ -0,0 +1,536 @@ | |||
| 1 | + | //! Peer mesh — identity, heartbeat monitoring, and mesh state aggregation. | |
| 2 | + | ||
| 3 | + | use std::collections::HashMap; | |
| 4 | + | use std::sync::Arc; | |
| 5 | + | use tokio::sync::RwLock; | |
| 6 | + | use serde::Serialize; | |
| 7 | + | ||
| 8 | + | use crate::alerts::Alerter; | |
| 9 | + | use crate::config::PeerConfig; | |
| 10 | + | use crate::error::{PomError, Result}; | |
| 11 | + | ||
| 12 | + | // --- Identity --- | |
| 13 | + | ||
| 14 | + | /// Load or create a persistent instance ID (UUID v4). | |
| 15 | + | /// Stored at `~/.local/share/pom/instance_id`, same directory as `pom.db`. | |
| 16 | + | pub fn load_or_create_instance_id( | |
| 17 | + | override_id: Option<&str>, | |
| 18 | + | ) -> Result<String> { | |
| 19 | + | if let Some(id) = override_id { | |
| 20 | + | return Ok(id.to_string()); | |
| 21 | + | } | |
| 22 | + | ||
| 23 | + | let data_dir = dirs::data_local_dir() | |
| 24 | + | .ok_or_else(|| PomError::Config("Could not determine data directory".into()))?; | |
| 25 | + | let pom_dir = data_dir.join("pom"); | |
| 26 | + | std::fs::create_dir_all(&pom_dir)?; | |
| 27 | + | let id_path = pom_dir.join("instance_id"); | |
| 28 | + | ||
| 29 | + | if id_path.exists() { | |
| 30 | + | let id = std::fs::read_to_string(&id_path)?.trim().to_string(); | |
| 31 | + | if !id.is_empty() { | |
| 32 | + | return Ok(id); | |
| 33 | + | } | |
| 34 | + | } | |
| 35 | + | ||
| 36 | + | let id = uuid::Uuid::new_v4().to_string(); | |
| 37 | + | std::fs::write(&id_path, &id)?; | |
| 38 | + | Ok(id) | |
| 39 | + | } | |
| 40 | + | ||
| 41 | + | // --- Types --- | |
| 42 | + | ||
| 43 | + | #[derive(Debug, Clone, Serialize, serde::Deserialize)] | |
| 44 | + | pub struct InstanceInfo { | |
| 45 | + | pub id: String, | |
| 46 | + | pub name: String, | |
| 47 | + | pub version: String, | |
| 48 | + | pub targets: Vec<String>, | |
| 49 | + | pub started_at: String, | |
| 50 | + | } | |
| 51 | + | ||
| 52 | + | #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)] | |
| 53 | + | #[serde(rename_all = "lowercase")] | |
| 54 | + | pub enum PeerStatus { | |
| 55 | + | Online, | |
| 56 | + | GracePeriod, | |
| 57 | + | Missing, | |
| 58 | + | Unknown, | |
| 59 | + | } | |
| 60 | + | ||
| 61 | + | impl std::fmt::Display for PeerStatus { | |
| 62 | + | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | |
| 63 | + | match self { | |
| 64 | + | Self::Online => write!(f, "online"), | |
| 65 | + | Self::GracePeriod => write!(f, "grace_period"), | |
| 66 | + | Self::Missing => write!(f, "missing"), | |
| 67 | + | Self::Unknown => write!(f, "unknown"), | |
| 68 | + | } | |
| 69 | + | } | |
| 70 | + | } | |
| 71 | + | ||
| 72 | + | #[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, serde::Deserialize)] | |
| 73 | + | #[serde(rename_all = "lowercase")] | |
| 74 | + | pub enum OnMissing { | |
| 75 | + | Alert, | |
| 76 | + | #[default] | |
| 77 | + | Log, | |
| 78 | + | Ignore, | |
| 79 | + | } | |
| 80 | + | ||
| 81 | + | #[derive(Debug, Clone, Serialize)] | |
| 82 | + | pub struct PeerState { | |
| 83 | + | pub address: String, | |
| 84 | + | pub on_missing: OnMissing, | |
| 85 | + | pub grace_count: u32, | |
| 86 | + | pub status: PeerStatus, | |
| 87 | + | pub info: Option<InstanceInfo>, | |
| 88 | + | pub last_seen: Option<String>, | |
| 89 | + | pub latency_ms: Option<u64>, | |
| 90 | + | pub consecutive_failures: u32, | |
| 91 | + | #[serde(skip)] | |
| 92 | + | pub known_id: Option<String>, | |
| 93 | + | /// Cached status data from the peer's /api/peer/status endpoint. | |
| 94 | + | #[serde(skip)] | |
| 95 | + | pub status_data: Option<serde_json::Value>, | |
| 96 | + | } | |
| 97 | + | ||
| 98 | + | // --- Mesh State --- | |
| 99 | + | ||
| 100 | + | #[derive(Debug)] | |
| 101 | + | pub struct MeshState { | |
| 102 | + | pub instance: InstanceInfo, | |
| 103 | + | pub peers: HashMap<String, PeerState>, | |
| 104 | + | } | |
| 105 | + | ||
| 106 | + | pub type SharedMeshState = Arc<RwLock<MeshState>>; | |
| 107 | + | ||
| 108 | + | pub fn new_mesh_state( | |
| 109 | + | instance: InstanceInfo, | |
| 110 | + | peer_configs: &HashMap<String, PeerConfig>, | |
| 111 | + | ) -> SharedMeshState { | |
| 112 | + | let mut peers = HashMap::new(); | |
| 113 | + | for (name, cfg) in peer_configs { | |
| 114 | + | peers.insert( | |
| 115 | + | name.clone(), | |
| 116 | + | PeerState { | |
| 117 | + | address: cfg.address.clone(), | |
| 118 | + | on_missing: cfg.on_missing, | |
| 119 | + | grace_count: cfg.grace_count.unwrap_or(3), | |
| 120 | + | status: PeerStatus::Unknown, | |
| 121 | + | info: None, | |
| 122 | + | last_seen: None, | |
| 123 | + | latency_ms: None, | |
| 124 | + | consecutive_failures: 0, | |
| 125 | + | known_id: None, | |
| 126 | + | status_data: None, | |
| 127 | + | }, | |
| 128 | + | ); | |
| 129 | + | } | |
| 130 | + | Arc::new(RwLock::new(MeshState { instance, peers })) | |
| 131 | + | } | |
| 132 | + | ||
| 133 | + | // --- Heartbeat --- | |
| 134 | + | ||
| 135 | + | pub async fn spawn_heartbeat_tasks( | |
| 136 | + | mesh: SharedMeshState, | |
| 137 | + | pool: sqlx::SqlitePool, | |
| 138 | + | interval_secs: u64, | |
| 139 | + | alerter: Option<Alerter>, | |
| 140 | + | ) -> Vec<tokio::task::JoinHandle<()>> { | |
| 141 | + | let peer_names: Vec<String> = { | |
| 142 | + | let mesh_guard = mesh.read().await; | |
| 143 | + | mesh_guard.peers.keys().cloned().collect() | |
| 144 | + | }; | |
| 145 | + | ||
| 146 | + | let mut handles = Vec::new(); | |
| 147 | + | for peer_name in peer_names { | |
| 148 | + | let mesh = Arc::clone(&mesh); | |
| 149 | + | let pool = pool.clone(); | |
| 150 | + | let alerter = alerter.clone(); | |
| 151 | + | handles.push(tokio::spawn(async move { | |
| 152 | + | heartbeat_loop(&peer_name, mesh, pool, interval_secs, alerter).await; | |
| 153 | + | })); | |
| 154 | + | } | |
| 155 | + | handles | |
| 156 | + | } | |
| 157 | + | ||
| 158 | + | async fn heartbeat_loop( | |
| 159 | + | peer_name: &str, | |
| 160 | + | mesh: SharedMeshState, | |
| 161 | + | pool: sqlx::SqlitePool, | |
| 162 | + | interval_secs: u64, | |
| 163 | + | alerter: Option<Alerter>, | |
| 164 | + | ) { | |
| 165 | + | let mut interval = tokio::time::interval(std::time::Duration::from_secs(interval_secs)); | |
| 166 | + | // Skip the first immediate tick — give peers time to start up | |
| 167 | + | interval.tick().await; | |
| 168 | + | ||
| 169 | + | let address = { | |
| 170 | + | let state = mesh.read().await; | |
| 171 | + | match state.peers.get(peer_name) { | |
| 172 | + | Some(p) => p.address.clone(), | |
| 173 | + | None => return, | |
| 174 | + | } | |
| 175 | + | }; | |
| 176 | + | ||
| 177 | + | let client = reqwest::Client::builder() | |
| 178 | + | .timeout(std::time::Duration::from_secs(10)) | |
| 179 | + | .build() | |
| 180 | + | .unwrap_or_default(); | |
| 181 | + | ||
| 182 | + | loop { | |
| 183 | + | interval.tick().await; | |
| 184 | + | ||
| 185 | + | let start = std::time::Instant::now(); | |
| 186 | + | let result = client | |
| 187 | + | .get(format!("http://{address}/api/peer/info")) | |
| 188 | + | .send() | |
| 189 | + | .await; | |
| 190 | + | let latency_ms = start.elapsed().as_millis() as u64; | |
| 191 | + | ||
| 192 | + | match result.and_then(|r| r.error_for_status()) { | |
| 193 | + | Ok(response) => { | |
| 194 | + | let info: Option<InstanceInfo> = response.json().await.ok(); | |
| 195 | + | handle_heartbeat_success(peer_name, &mesh, &pool, info, latency_ms, &alerter).await; | |
| 196 | + | } | |
| 197 | + | Err(_e) => { | |
| 198 | + | handle_heartbeat_failure(peer_name, &mesh, &pool, latency_ms, &alerter).await; | |
| 199 | + | } | |
| 200 | + | } | |
| 201 | + | ||
| 202 | + | // Also fetch /api/peer/status for mesh aggregation | |
| 203 | + | let status_result = client | |
| 204 | + | .get(format!("http://{address}/api/peer/status")) | |
| 205 | + | .send() | |
| 206 | + | .await; | |
| 207 | + | match status_result { | |
| 208 | + | Ok(resp) => { | |
| 209 | + | if let Ok(data) = resp.json::<serde_json::Value>().await { | |
| 210 | + | let mut state = mesh.write().await; | |
| 211 | + | if let Some(peer) = state.peers.get_mut(peer_name) { | |
| 212 | + | peer.status_data = Some(data); | |
| 213 | + | } | |
| 214 | + | } | |
| 215 | + | } | |
| 216 | + | Err(e) => { | |
| 217 | + | tracing::debug!("{peer_name}: failed to fetch /api/peer/status: {e}"); | |
| 218 | + | } | |
| 219 | + | } | |
| 220 | + | } | |
| 221 | + | } | |
| 222 | + | ||
| 223 | + | async fn handle_heartbeat_success( | |
| 224 | + | peer_name: &str, | |
| 225 | + | mesh: &SharedMeshState, | |
| 226 | + | pool: &sqlx::SqlitePool, | |
| 227 | + | info: Option<InstanceInfo>, | |
| 228 | + | latency_ms: u64, | |
| 229 | + | alerter: &Option<Alerter>, | |
| 230 | + | ) { | |
| 231 | + | let now = chrono::Utc::now().to_rfc3339(); | |
| 232 | + | ||
| 233 | + | // Update in-memory state under lock, collect data for DB writes | |
| 234 | + | let (first_contact_id, recovery_info) = { | |
| 235 | + | let mut state = mesh.write().await; | |
| 236 | + | let Some(peer) = state.peers.get_mut(peer_name) else { | |
| 237 | + | return; | |
| 238 | + | }; | |
| 239 | + | ||
| 240 | + | let was_missing = peer.status == PeerStatus::Missing || peer.status == PeerStatus::GracePeriod; | |
| 241 | + | ||
| 242 | + | // Check UUID consistency | |
| 243 | + | let mut first_contact = None; | |
| 244 | + | if let Some(ref info) = info { | |
| 245 | + | match &peer.known_id { | |
| 246 | + | None => { | |
| 247 | + | peer.known_id = Some(info.id.clone()); | |
| 248 | + | first_contact = Some(info.id.clone()); | |
| 249 | + | tracing::info!("{peer_name}: first contact, id={}", info.id); | |
| 250 | + | } | |
| 251 | + | Some(known) if known != &info.id => { | |
| 252 | + | tracing::warn!( | |
| 253 | + | "{peer_name}: UUID mismatch! expected={known}, got={}. Possible impersonation.", | |
| 254 | + | info.id | |
| 255 | + | ); | |
| 256 | + | } | |
| 257 | + | _ => {} | |
| 258 | + | } | |
| 259 | + | } | |
| 260 | + | ||
| 261 | + | // Collect recovery data before mutating state | |
| 262 | + | let recovery = if was_missing && peer.on_missing == OnMissing::Alert { | |
| 263 | + | Some(peer.address.clone()) | |
| 264 | + | } else { | |
| 265 | + | None | |
| 266 | + | }; | |
| 267 | + | ||
| 268 | + | if was_missing { | |
| 269 | + | tracing::info!("{peer_name}: recovered (was {:?})", peer.status); | |
| 270 | + | } | |
| 271 | + | ||
| 272 | + | peer.status = PeerStatus::Online; | |
| 273 | + | peer.info = info; | |
| 274 | + | peer.last_seen = Some(now); | |
| 275 | + | peer.latency_ms = Some(latency_ms); | |
| 276 | + | peer.consecutive_failures = 0; | |
| 277 | + | ||
| 278 | + | (first_contact, recovery) | |
| 279 | + | }; | |
| 280 | + | // Lock dropped — DB writes and alerts happen without holding mesh lock | |
| 281 | + | ||
| 282 | + | if let Some(id) = first_contact_id { | |
| 283 | + | let _ = crate::db::store_peer_identity(pool, peer_name, &id).await; | |
| 284 | + | } | |
| 285 | + | let _ = crate::db::insert_peer_heartbeat(pool, peer_name, "online", latency_ms as i64).await; | |
| 286 | + | ||
| 287 | + | if let (Some(address), Some(alerter)) = (recovery_info, alerter) { | |
| 288 | + | alerter.send_peer_recovery(peer_name, &address).await; | |
| 289 | + | } | |
| 290 | + | } | |
| 291 | + | ||
| 292 | + | async fn handle_heartbeat_failure( | |
| 293 | + | peer_name: &str, | |
| 294 | + | mesh: &SharedMeshState, | |
| 295 | + | pool: &sqlx::SqlitePool, | |
| 296 | + | latency_ms: u64, | |
| 297 | + | alerter: &Option<Alerter>, | |
| 298 | + | ) { | |
| 299 | + | // Update in-memory state under lock, collect data for alert after lock drop | |
| 300 | + | let (new_status, alert_info) = { | |
| 301 | + | let mut state = mesh.write().await; | |
| 302 | + | let Some(peer) = state.peers.get_mut(peer_name) else { | |
| 303 | + | return; | |
| 304 | + | }; | |
| 305 | + | ||
| 306 | + | peer.consecutive_failures += 1; | |
| 307 | + | ||
| 308 | + | let new_status = match peer.status { | |
| 309 | + | PeerStatus::Online | PeerStatus::Unknown | PeerStatus::GracePeriod => { | |
| 310 | + | if peer.consecutive_failures >= peer.grace_count { | |
| 311 | + | PeerStatus::Missing | |
| 312 | + | } else { | |
| 313 | + | PeerStatus::GracePeriod | |
| 314 | + | } | |
| 315 | + | } | |
| 316 | + | PeerStatus::Missing => PeerStatus::Missing, | |
| 317 | + | }; | |
| 318 | + | ||
| 319 | + | let transitioned_to_missing = new_status == PeerStatus::Missing && peer.status != PeerStatus::Missing; | |
| 320 | + | ||
| 321 | + | // Collect alert data before mutating state | |
| 322 | + | let alert_info = if transitioned_to_missing { | |
| 323 | + | match peer.on_missing { | |
| 324 | + | OnMissing::Alert => { | |
| 325 | + | tracing::warn!( | |
| 326 | + | "{peer_name}: MISSING after {} consecutive failures (action: alert)", | |
| 327 | + | peer.consecutive_failures | |
| 328 | + | ); | |
| 329 | + | Some((peer.address.clone(), peer.consecutive_failures)) | |
| 330 | + | } | |
| 331 | + | OnMissing::Log => { | |
| 332 | + | tracing::info!( | |
| 333 | + | "{peer_name}: missing after {} consecutive failures", | |
| 334 | + | peer.consecutive_failures | |
| 335 | + | ); | |
| 336 | + | None | |
| 337 | + | } | |
| 338 | + | OnMissing::Ignore => None, | |
| 339 | + | } | |
| 340 | + | } else { | |
| 341 | + | None | |
| 342 | + | }; | |
| 343 | + | ||
| 344 | + | peer.status = new_status; | |
| 345 | + | ||
| 346 | + | (new_status, alert_info) | |
| 347 | + | }; | |
| 348 | + | // Lock dropped — DB write and alerts happen without holding mesh lock | |
| 349 | + | ||
| 350 | + | let status_str = new_status.to_string(); | |
| 351 | + | let _ = crate::db::insert_peer_heartbeat(pool, peer_name, &status_str, latency_ms as i64).await; | |
| 352 | + | ||
| 353 | + | if let (Some((address, failures)), Some(alerter)) = (alert_info, alerter) { | |
| 354 | + | alerter.send_peer_missing(peer_name, &address, failures).await; | |
| 355 | + | } | |
| 356 | + | } | |
| 357 | + | ||
| 358 | + | #[cfg(test)] | |
| 359 | + | mod tests { | |
| 360 | + | use super::*; | |
| 361 | + | ||
| 362 | + | #[test] | |
| 363 | + | fn override_id_takes_precedence() { | |
| 364 | + | let id = load_or_create_instance_id(Some("override-id")).unwrap(); | |
| 365 | + | assert_eq!(id, "override-id"); | |
| 366 | + | } | |
| 367 | + | ||
| 368 | + | #[test] | |
| 369 | + | fn auto_id_is_valid_uuid() { | |
| 370 | + | let id = load_or_create_instance_id(None).unwrap(); | |
| 371 | + | assert!(uuid::Uuid::parse_str(&id).is_ok()); | |
| 372 | + | } | |
| 373 | + | ||
| 374 | + | #[test] | |
| 375 | + | fn on_missing_deserialize() { | |
| 376 | + | #[derive(serde::Deserialize)] | |
| 377 | + | struct Wrapper { | |
| 378 | + | #[serde(default)] | |
| 379 | + | on_missing: OnMissing, | |
| 380 | + | } | |
| 381 | + | ||
| 382 | + | let w: Wrapper = toml::from_str(r#"on_missing = "alert""#).unwrap(); | |
| 383 | + | assert_eq!(w.on_missing, OnMissing::Alert); | |
| 384 | + | ||
| 385 | + | let w: Wrapper = toml::from_str(r#"on_missing = "log""#).unwrap(); | |
| 386 | + | assert_eq!(w.on_missing, OnMissing::Log); | |
| 387 | + | ||
| 388 | + | let w: Wrapper = toml::from_str(r#"on_missing = "ignore""#).unwrap(); | |
| 389 | + | assert_eq!(w.on_missing, OnMissing::Ignore); | |
| 390 | + | ||
| 391 | + | // Default is Log | |
| 392 | + | let w: Wrapper = toml::from_str("").unwrap(); | |
| 393 | + | assert_eq!(w.on_missing, OnMissing::Log); | |
| 394 | + | } | |
| 395 | + | ||
| 396 | + | fn test_instance_info() -> InstanceInfo { | |
| 397 | + | InstanceInfo { | |
| 398 | + | id: "test-id".to_string(), | |
| 399 | + | name: "test".to_string(), | |
| 400 | + | version: "0.1.0".to_string(), | |
| 401 | + | targets: vec![], | |
| 402 | + | started_at: "2026-03-10T00:00:00Z".to_string(), | |
| 403 | + | } | |
| 404 | + | } | |
| 405 | + | ||
| 406 | + | fn test_mesh_with_peer(grace_count: u32) -> SharedMeshState { | |
| 407 | + | let mut peer_configs = HashMap::new(); | |
| 408 | + | peer_configs.insert( | |
| 409 | + | "peer1".to_string(), | |
| 410 | + | PeerConfig { | |
| 411 | + | address: "10.0.0.1:9100".to_string(), | |
| 412 | + | on_missing: OnMissing::Alert, | |
| 413 | + | grace_count: Some(grace_count), | |
| 414 | + | }, | |
| 415 | + | ); | |
| 416 | + | new_mesh_state(test_instance_info(), &peer_configs) | |
| 417 | + | } | |
| 418 | + | ||
| 419 | + | #[tokio::test] | |
| 420 | + | async fn heartbeat_failure_transitions_through_grace_to_missing() { | |
| 421 | + | let pool = crate::db::connect_in_memory().await.unwrap(); | |
| 422 | + | let mesh = test_mesh_with_peer(3); | |
| 423 | + | ||
| 424 | + | // Start at Unknown | |
| 425 | + | assert_eq!(mesh.read().await.peers["peer1"].status, PeerStatus::Unknown); | |
| 426 | + | ||
| 427 | + | // First failure → GracePeriod | |
| 428 | + | handle_heartbeat_failure("peer1", &mesh, &pool, 0, &None).await; | |
| 429 | + | assert_eq!(mesh.read().await.peers["peer1"].status, PeerStatus::GracePeriod); | |
| 430 | + | ||
| 431 | + | // Second failure → still GracePeriod | |
| 432 | + | handle_heartbeat_failure("peer1", &mesh, &pool, 0, &None).await; | |
| 433 | + | assert_eq!(mesh.read().await.peers["peer1"].status, PeerStatus::GracePeriod); | |
| 434 | + | ||
| 435 | + | // Third failure (= grace_count) → Missing | |
| 436 | + | handle_heartbeat_failure("peer1", &mesh, &pool, 0, &None).await; | |
| 437 | + | assert_eq!(mesh.read().await.peers["peer1"].status, PeerStatus::Missing); | |
| 438 | + | ||
| 439 | + | // Fourth failure → stays Missing | |
| 440 | + | handle_heartbeat_failure("peer1", &mesh, &pool, 0, &None).await; | |
| 441 | + | assert_eq!(mesh.read().await.peers["peer1"].status, PeerStatus::Missing); | |
| 442 | + | } | |
| 443 | + | ||
| 444 | + | #[tokio::test] | |
| 445 | + | async fn heartbeat_success_recovers_from_missing() { | |
| 446 | + | let pool = crate::db::connect_in_memory().await.unwrap(); | |
| 447 | + | let mesh = test_mesh_with_peer(1); | |
| 448 | + | ||
| 449 | + | // Drive to Missing | |
| 450 | + | handle_heartbeat_failure("peer1", &mesh, &pool, 0, &None).await; | |
| 451 | + | assert_eq!(mesh.read().await.peers["peer1"].status, PeerStatus::Missing); | |
| 452 | + | ||
| 453 | + | // Success → Online | |
| 454 | + | let info = InstanceInfo { | |
| 455 | + | id: "remote-id".to_string(), | |
| 456 | + | name: "remote".to_string(), | |
| 457 | + | version: "0.1.0".to_string(), | |
| 458 | + | targets: vec![], | |
| 459 | + | started_at: "2026-03-10T00:00:00Z".to_string(), | |
| 460 | + | }; | |
| 461 | + | handle_heartbeat_success("peer1", &mesh, &pool, Some(info), 42, &None).await; | |
| 462 | + | ||
| 463 | + | let state = mesh.read().await; | |
| 464 | + | let peer = &state.peers["peer1"]; | |
| 465 | + | assert_eq!(peer.status, PeerStatus::Online); | |
| 466 | + | assert_eq!(peer.consecutive_failures, 0); | |
| 467 | + | assert_eq!(peer.latency_ms, Some(42)); | |
| 468 | + | assert_eq!(peer.known_id.as_deref(), Some("remote-id")); | |
| 469 | + | } | |
| 470 | + | ||
| 471 | + | #[tokio::test] | |
| 472 | + | async fn heartbeat_success_detects_uuid_stored_on_first_contact() { | |
| 473 | + | let pool = crate::db::connect_in_memory().await.unwrap(); | |
| 474 | + | let mesh = test_mesh_with_peer(3); | |
| 475 | + | ||
| 476 | + | let info = InstanceInfo { | |
| 477 | + | id: "uuid-abc".to_string(), | |
| 478 | + | name: "remote".to_string(), | |
| 479 | + | version: "0.1.0".to_string(), | |
| 480 | + | targets: vec![], | |
| 481 | + | started_at: "2026-03-10T00:00:00Z".to_string(), | |
| 482 | + | }; | |
| 483 | + | handle_heartbeat_success("peer1", &mesh, &pool, Some(info), 10, &None).await; | |
| 484 | + | ||
| 485 | + | // UUID should be persisted in DB | |
| 486 | + | let stored = crate::db::get_peer_identity(&pool, "peer1").await.unwrap(); | |
| 487 | + | assert_eq!(stored, Some("uuid-abc".to_string())); | |
| 488 | + | } | |
| 489 | + | ||
| 490 | + | #[tokio::test] | |
| 491 | + | async fn heartbeat_records_to_db() { | |
| 492 | + | let pool = crate::db::connect_in_memory().await.unwrap(); | |
| 493 | + | let mesh = test_mesh_with_peer(3); | |
| 494 | + | ||
| 495 | + | handle_heartbeat_failure("peer1", &mesh, &pool, 0, &None).await; | |
| 496 | + | handle_heartbeat_success("peer1", &mesh, &pool, None, 55, &None).await; | |
| 497 | + | ||
| 498 | + | let history = crate::db::get_peer_heartbeat_history(&pool, "peer1", 10).await.unwrap(); | |
| 499 | + | assert_eq!(history.len(), 2); | |
| 500 | + | // Most recent first |
Lines truncated