max / makenotwork
101 files changed,
+4097 insertions,
-3186 deletions
| @@ -1870,7 +1870,7 @@ dependencies = [ | |||
| 1870 | 1870 | ||
| 1871 | 1871 | [[package]] | |
| 1872 | 1872 | name = "mnw-cli" | |
| 1873 | - | version = "0.1.0" | |
| 1873 | + | version = "0.1.1" | |
| 1874 | 1874 | dependencies = [ | |
| 1875 | 1875 | "anyhow", | |
| 1876 | 1876 | "bytes", |
| @@ -17,6 +17,9 @@ set -e | |||
| 17 | 17 | ||
| 18 | 18 | # Configuration | |
| 19 | 19 | SERVER="root@100.120.174.96" | |
| 20 | + | SSH_PORT=2200 | |
| 21 | + | SSH_OPTS="-p $SSH_PORT" | |
| 22 | + | SCP_OPTS="-P $SSH_PORT" | |
| 20 | 23 | REMOTE_DIR="/opt/mnw-cli" | |
| 21 | 24 | STAGING_DIR="/var/lib/mnw-cli/staging" | |
| 22 | 25 | BINARY_NAME="mnw-cli" | |
| @@ -31,12 +34,12 @@ fi | |||
| 31 | 34 | ||
| 32 | 35 | upload_config() { | |
| 33 | 36 | echo "[config] Uploading configuration files..." | |
| 34 | - | scp $DEPLOY_DIR/mnw-cli.service $SERVER:/etc/systemd/system/mnw-cli.service | |
| 35 | - | ssh $SERVER "systemctl daemon-reload" | |
| 37 | + | scp $SCP_OPTS $DEPLOY_DIR/mnw-cli.service $SERVER:/etc/systemd/system/mnw-cli.service | |
| 38 | + | ssh $SSH_OPTS $SERVER "systemctl daemon-reload" | |
| 36 | 39 | ||
| 37 | 40 | # Ensure directories exist | |
| 38 | - | ssh $SERVER "mkdir -p $REMOTE_DIR $STAGING_DIR" | |
| 39 | - | ssh $SERVER "chown mnw-cli:mnw-cli $STAGING_DIR" | |
| 41 | + | ssh $SSH_OPTS $SERVER "mkdir -p $REMOTE_DIR $STAGING_DIR" | |
| 42 | + | ssh $SSH_OPTS $SERVER "chown mnw-cli:mnw-cli $STAGING_DIR" | |
| 40 | 43 | ||
| 41 | 44 | echo "[config] Done" | |
| 42 | 45 | } | |
| @@ -50,18 +53,18 @@ build_binary() { | |||
| 50 | 53 | ||
| 51 | 54 | upload_binary() { | |
| 52 | 55 | echo "[upload] Stopping service and uploading binary..." | |
| 53 | - | ssh $SERVER "systemctl stop mnw-cli || true" | |
| 54 | - | scp target/$TARGET/release/$BINARY_NAME $SERVER:$REMOTE_DIR/$BINARY_NAME | |
| 55 | - | ssh $SERVER "chmod +x $REMOTE_DIR/$BINARY_NAME" | |
| 56 | + | ssh $SSH_OPTS $SERVER "systemctl stop mnw-cli || true" | |
| 57 | + | scp $SCP_OPTS target/$TARGET/release/$BINARY_NAME $SERVER:$REMOTE_DIR/$BINARY_NAME | |
| 58 | + | ssh $SSH_OPTS $SERVER "chmod +x $REMOTE_DIR/$BINARY_NAME" | |
| 56 | 59 | echo "[upload] Done" | |
| 57 | 60 | } | |
| 58 | 61 | ||
| 59 | 62 | restart_app() { | |
| 60 | 63 | echo "[restart] Restarting mnw-cli..." | |
| 61 | - | ssh $SERVER "systemctl restart mnw-cli" | |
| 64 | + | ssh $SSH_OPTS $SERVER "systemctl restart mnw-cli" | |
| 62 | 65 | sleep 1 | |
| 63 | 66 | echo "" | |
| 64 | - | ssh $SERVER "systemctl status mnw-cli --no-pager" | |
| 67 | + | ssh $SSH_OPTS $SERVER "systemctl status mnw-cli --no-pager" | |
| 65 | 68 | } | |
| 66 | 69 | ||
| 67 | 70 | case "${1:-full}" in |
| @@ -7,16 +7,19 @@ Done: Phases 1-8, Git proxy A-C. Active: None. Next: Deploy. | |||
| 7 | 7 | ||
| 8 | 8 | ## Git Proxy — Part D: Server Configuration | |
| 9 | 9 | ||
| 10 | - | - [ ] D1: Move sshd to port 2200, `ListenAddress 100.120.174.96` (Tailscale only) | |
| 11 | - | - [ ] D2: Update mnw-cli .env (`SSH_PORT=22`, `GIT_SUDO_USER=git`) | |
| 12 | - | - [ ] D3: Sudoers rule (`/etc/sudoers.d/mnw-cli-git` — git-upload-pack, git-receive-pack, git-upload-archive) | |
| 13 | - | - [ ] D4: Firewall — `ufw delete allow 2222/tcp` (port 22 already open) | |
| 14 | - | - [ ] D5: DNS — `cli.makenot.work` A record -> `5.78.144.244`, proxy OFF | |
| 15 | - | - [ ] D6: Restart sequence (sshd -> verify admin SSH -> mnw-cli -> verify TUI + git clone) | |
| 10 | + | - [x] D1: Move sshd to port 2200, `ListenAddress 100.120.174.96` (Tailscale only) — done 2026-04-22 | |
| 11 | + | - [x] D2: Update mnw-cli .env (`SSH_PORT=22`, `GIT_SUDO_USER=git`) — done 2026-04-22 | |
| 12 | + | - [x] D3: Sudoers rule (`/etc/sudoers.d/mnw-cli-git` — git-upload-pack, git-receive-pack, git-upload-archive) — done 2026-04-22 | |
| 13 | + | - [x] D4: Firewall — removed 2222/tcp, added 2200/tcp — done 2026-04-22 | |
| 14 | + | - [ ] D5: DNS — `cli.makenot.work` A record -> `5.78.144.244`, proxy OFF (needs Cloudflare dashboard) | |
| 15 | + | - [x] D6: Restart sequence verified — admin SSH on 2200, mnw-cli on 22, both running — done 2026-04-22 | |
| 16 | 16 | ||
| 17 | 17 | ## Deploy | |
| 18 | 18 | - [ ] Test on astra (full TUI + SFTP + git push/pull) | |
| 19 | - | - [ ] Deploy to hetzner (cross-compile via deploy.sh) | |
| 19 | + | - [x] Deploy to hetzner (cross-compile via deploy.sh) — done 2026-04-22 | |
| 20 | + | - [x] Verified: SSH auth, TUI launch, git ls-remote, git clone all working — 2026-04-22 | |
| 21 | + | - [x] Fixed: NoNewPrivileges blocking sudo for git ops — 2026-04-22 | |
| 22 | + | - [x] Fixed: Tailscale SSH intercepting port 22 — disabled on hetzner — 2026-04-22 | |
| 20 | 23 | - [ ] Add PoM health check for mnw-cli (port 22 SSH banner check) | |
| 21 | 24 | ||
| 22 | 25 | ## Remaining Features (from design doc) |
| @@ -1,31 +1,31 @@ | |||
| 1 | 1 | # Multithreaded -- Code Audit Review | |
| 2 | 2 | ||
| 3 | - | **Last audited:** 2026-03-28 (seventh formal audit, Run 12 cross-project) | |
| 4 | - | **Previous audit:** 2026-03-22 (sixth audit, coverage expansion) | |
| 3 | + | **Last audited:** 2026-04-18 (ninth audit, Run 15 cross-project) | |
| 4 | + | **Previous audit:** 2026-04-15 (eighth audit, Run 14 cross-project) | |
| 5 | 5 | ||
| 6 | 6 | ## Overall Grade: A | |
| 7 | 7 | ||
| 8 | - | Run 12 cross-project audit. 225 tests (35 unit lib + 190 integration). 0 clippy warnings. v0.3.2. Grade stable at A. Internal API improvements (MNW category provisioning). Link preview fix. New dep advisories: aws-lc-sys (HIGH 7.4), rustls-webpki. | |
| 8 | + | Run 15 cross-project audit (updated 2026-04-22). 232 tests (all pass). 0 clippy warnings. v0.3.4. ~9,752 LOC. Health endpoint now verifies DB connectivity. OG image URL finding was incorrect (code never fetches/stores og:image -- only og:title and og:description). MNW API retry logic was already implemented (token exchange + userinfo both retry with backoff). | |
| 9 | 9 | ||
| 10 | 10 | ## Scorecard | |
| 11 | 11 | ||
| 12 | 12 | | Dimension | Grade | Notes | | |
| 13 | 13 | |-----------|:-----:|-------| | |
| 14 | - | | Code Quality | A | Zero clippy warnings. Consistent `map_err` + `tracing::error!` error handling. Mod log failures logged (not silently dropped). Dead code removed. | | |
| 15 | - | | Architecture | A | Clean 3-crate workspace: mt-core (time formatting), mt-db (queries/mutations), main app (routes, auth, templates). Route module properly split into forum/moderation/settings/admin. Template layer uses view-model structs. | | |
| 16 | - | | Testing | A | 225 tests (35 unit main + 190 integration) at ~32 tests/KLOC. Integration tests use real PostgreSQL with per-test database isolation. Coverage on CRUD, permissions, bans, mute/unban/unmute handlers, CSRF, pagination, rate limiting, category edit/reorder, expired ban behavior, endorsements, footnotes, verified quoting, mentions, link previews, profiles, auth flow (PKCE/state), admin routes, mutations (bans, categories, endorsements, flags, images, link previews, mentions, users). | | |
| 17 | - | | Security | A | All SQL parameterized. CSRF with constant-time comparison. OAuth PKCE with state nonce. Markdown via docengine (URL scheme allowlist + HTML sanitization, defense-in-depth). Fail-closed access checks on all ban/mute/suspension queries. Session cookie Secure flag configurable. | | |
| 18 | - | | Performance | A- | Proper indexes on all query patterns (composite indexes for category listing, thread ordering, ban lookup, mod log). Partial index on ban expiration. No N+1 queries. Per-IP rate limiting on write endpoints (tower-governor). | | |
| 19 | - | | Documentation | A- | `//!` module docs on all source files. `.env.example` documents all 9 environment variables. | | |
| 20 | - | | Dependencies | A | Minimal deps, all justified. Rust 2024 edition. Dead deps removed (thiserror, serde, serde_json from mt-core; mt-core, thiserror from mt-db). Workspace dependency management. | | |
| 21 | - | | Frontend | A | HTMX for dynamic interactions. Askama autoescaping on all template variables. CSRF auto-injected for forms and HTMX. Toast uses `textContent`. `body_html` sanitized by docengine (defense-in-depth). Client-side maxlength on all inputs. | | |
| 22 | - | | Type Safety | A- | Query layer uses focused `FromRow` projections. Dead domain types removed. | | |
| 23 | - | | Observability | A | 86 `#[instrument(skip_all)]` annotations across all route handlers and DB functions. `tracing-subscriber` with EnvFilter. | | |
| 24 | - | | Concurrency | A- | Async throughout with tokio. Graceful shutdown (SIGINT + SIGTERM). reqwest timeouts (15s + 5s). `swap_category_order` uses transaction. Per-IP rate limiting (burst 10, 2/sec). | | |
| 25 | - | | Resilience | A- | Graceful shutdown. HTTP client timeouts. Error logging without panics. Mod log failures logged. Rate limiting on writes. Gap: no retry on MNW API calls. | | |
| 26 | - | | API Consistency | A- | Consistent redirect-with-toast pattern. Proper status codes (403/404/422). Health endpoint returns JSON. | | |
| 27 | - | | Migration Safety | A- | SQLx `migrate!()` with sequential numbering (001-020). All additive. No destructive operations. | | |
| 28 | - | | Codebase Size | A | Lean codebase for full forum with OAuth, CSRF, markdown, moderation, admin, pagination, soft-delete, and settings. | | |
| 14 | + | | Code Quality | A | Zero clippy warnings. Consistent `map_err` + `tracing::error!` error handling. Mod log failures logged. | | |
| 15 | + | | Architecture | A+ | Clean 3-crate workspace: mt-core (time formatting), mt-db (queries/mutations), main app (routes, auth, templates). Route module properly split. Template layer uses view-model structs. | | |
| 16 | + | | Testing | A+ | 231 tests (all pass) at ~24 tests/KLOC. Integration tests use real PostgreSQL with per-test database isolation. Coverage on CRUD, permissions, bans, mute/unban/unmute, CSRF, pagination, rate limiting, category edit/reorder, endorsements, footnotes, verified quoting, mentions, link previews, profiles, auth flow (PKCE/state), admin routes. | | |
| 17 | + | | Security | A | All SQL parameterized. CSRF with constant-time comparison. OAuth PKCE with state nonce. Markdown via docengine (URL scheme allowlist + HTML sanitization). Fail-closed access checks. Link previews only store title/description, not og:image. | | |
| 18 | + | | Performance | A- | Proper indexes on all query patterns. Partial index on ban expiration. No N+1 queries. Per-IP rate limiting. | | |
| 19 | + | | Documentation | B+ | `//!` module docs on all source files. `.env.example` documents all env vars. Some gaps in architecture docs. | | |
| 20 | + | | Dependencies | A | Minimal deps, all justified. Rust 2024 edition. Dead deps removed. Workspace dependency management. | | |
| 21 | + | | Frontend | A | HTMX for dynamic interactions. Askama autoescaping. CSRF auto-injected. Toast uses `textContent`. `body_html` sanitized by docengine. | | |
| 22 | + | | Type Safety | A+ | Query layer uses focused `FromRow` projections. Clean domain types. | | |
| 23 | + | | Observability | B+ | 86 `#[instrument(skip_all)]` annotations. `tracing-subscriber` with EnvFilter. Gaps in some newer code paths. | | |
| 24 | + | | Concurrency | A | Async throughout with tokio. Graceful shutdown. reqwest timeouts. `swap_category_order` uses transaction. Per-IP rate limiting. | | |
| 25 | + | | Resilience | A | Graceful shutdown. HTTP client timeouts. Error logging without panics. Rate limiting. MNW API calls retry with backoff on network/5xx errors. | | |
| 26 | + | | API Consistency | A | Consistent redirect-with-toast pattern. Proper status codes (403/404/422). Health endpoint returns JSON. | | |
| 27 | + | | Migration Safety | A+ | SQLx `migrate!()` with sequential numbering. All additive. No destructive operations. | | |
| 28 | + | | Codebase Size | B+ | ~9,752 LOC -- lean for full forum with OAuth, CSRF, markdown, moderation, admin, pagination, soft-delete, and settings. | | |
| 29 | 29 | ||
| 30 | 30 | ## Module Heatmap | |
| 31 | 31 | ||
| @@ -49,45 +49,58 @@ Run 12 cross-project audit. 225 tests (35 unit lib + 190 integration). 0 clippy | |||
| 49 | 49 | ||
| 50 | 50 | ### Cold Spots | |
| 51 | 51 | ||
| 52 | - | None — all previous cold spots resolved. | |
| 52 | + | 1. ~~**Health endpoint**~~ -- Fixed 2026-04-22. Now checks DB with `SELECT 1` and returns `"degraded"` status if unreachable. | |
| 53 | + | 2. ~~**Link preview OG image URL**~~ -- Incorrect finding. Code only extracts og:title and og:description. No og:image URLs are fetched or stored. | |
| 54 | + | 3. ~~**MNW API retry logic**~~ -- Already implemented. Token exchange and userinfo both retry with 500ms/1000ms backoff on network/5xx errors. | |
| 53 | 55 | ||
| 54 | 56 | ## Strengths | |
| 55 | 57 | ||
| 56 | 58 | - **Clean architecture.** 3-crate workspace with proper separation. Route module split into focused files. Template layer uses view-model structs. | |
| 57 | 59 | - **Comprehensive CSRF.** Synchronizer token with constant-time comparison, auto-injected via JS for all forms and HTMX requests. | |
| 58 | - | - **Solid test infrastructure.** Full Axum app with real PostgreSQL per test. Cookie-aware client with automatic CSRF token extraction. | |
| 60 | + | - **Solid test infrastructure.** Full Axum app with real PostgreSQL per test. Cookie-aware client with automatic CSRF token extraction. 231 tests all passing. | |
| 59 | 61 | - **Authorization hierarchy.** Owner > mod > member correctly enforced. Owners cannot be banned. Only owners can ban mods. | |
| 60 | 62 | - **Input validation.** Length limits on all user content. Slug format validation. UUID parsing validated. Sort/order whitelisted. | |
| 61 | 63 | - **SQL safety.** All 40+ queries parameterized. Dynamic ORDER BY uses whitelist match. | |
| 62 | - | - **Efficient codebase.** 4,808 LOC for full forum functionality. | |
| 63 | 64 | ||
| 64 | 65 | ## Weaknesses | |
| 65 | 66 | ||
| 66 | - | - **No retry on MNW API calls.** OAuth token exchange and userinfo fetch have no retry logic. | |
| 67 | + | - ~~**No retry on MNW API calls.**~~ Already implemented (auth.rs:210-316). Both token exchange and userinfo retry with 500ms/1000ms backoff. | |
| 68 | + | - ~~**Health endpoint doesn't verify DB.**~~ Fixed 2026-04-22. Now runs `SELECT 1` and reports degraded status on failure. | |
| 69 | + | - ~~**OG image URL not validated.**~~ Incorrect finding. fetch_og_metadata only returns og:title and og:description. No image URLs stored or rendered. | |
| 67 | 70 | ||
| 68 | 71 | ## Mandatory Surprise | |
| 69 | 72 | ||
| 70 | - | **`CoreError` is a fully-defined typed error enum with 5 variants (NotFound, Unauthorized, Forbidden, Validation, Internal) that is never used anywhere in the application.** | |
| 73 | + | **~~Link preview image URL vulnerability~~ (Incorrect finding)** | |
| 71 | 74 | ||
| 72 | - | Route handlers use `map_err(|e| { tracing::error!(...); StatusCode::INTERNAL_SERVER_ERROR })` directly. Domain model structs in mt-core (User, Community, Category, Thread, Post, Membership, Role) are similarly defined but bypassed -- queries.rs defines its own `FromRow` structs inline. | |
| 75 | + | The original Run 15 audit claimed og:image URLs were fetched and rendered without validation. This was incorrect -- `fetch_og_metadata` only extracts `og:title` and `og:description`. The `LinkPreviewViewRow` struct has `url` (the original post link, already validated by `validate_url`), `title`, and `description` -- no image field. Verified 2026-04-22 by reading link_preview.rs and templates/public.rs. | |
| 73 | 76 | ||
| 74 | - | **Verdict: Genuine issue.** Good architectural intent that was never integrated. **Resolved:** Dead code removed (error.rs, models.rs, pool.rs deleted; unused deps cleaned from mt-core and mt-db). | |
| 77 | + | **Verdict:** No issue. | |
| 78 | + | ||
| 79 | + | ### Previous Surprise (Run 12) | |
| 80 | + | ||
| 81 | + | **`CoreError` dead code** -- fully-defined typed error enum with 5 variants was never used anywhere. **Resolved:** Dead code removed. | |
| 75 | 82 | ||
| 76 | 83 | ## Action Items | |
| 77 | 84 | ||
| 78 | 85 | Filed in `docs/mnw/mt/todo.md`. | |
| 79 | 86 | ||
| 80 | - | 1. ~~**[HIGH]** Sanitize URL schemes in markdown rendering~~ -- Done. Allowlist (http, https, mailto, ftp), 7 tests added. | |
| 81 | - | 2. ~~**[MEDIUM]** Add `#[instrument(skip_all)]` to all route handlers and DB functions~~ -- Done. 86 annotations. | |
| 82 | - | 3. ~~**[MEDIUM]** Make session cookie `Secure` flag configurable~~ -- Done. `COOKIE_SECURE` env var. | |
| 87 | + | ### All resolved (previous audits) | |
| 88 | + | 1. ~~**[HIGH]** Sanitize URL schemes in markdown rendering~~ -- Done. | |
| 89 | + | 2. ~~**[MEDIUM]** Add `#[instrument(skip_all)]` to all route handlers and DB functions~~ -- Done. | |
| 90 | + | 3. ~~**[MEDIUM]** Make session cookie `Secure` flag configurable~~ -- Done. | |
| 83 | 91 | 4. ~~**[MEDIUM]** Wrap `swap_category_order` in transaction~~ -- Done. | |
| 84 | 92 | 5. ~~**[MEDIUM]** Change fail-open access checks to fail-closed~~ -- Done. | |
| 85 | - | 6. ~~**[SMALL]** Add `//!` module docs~~ -- Done. All source files documented. | |
| 86 | - | 7. ~~**[SMALL]** Remove dead code~~ -- Done. error.rs, models.rs, pool.rs deleted. Deps cleaned. | |
| 87 | - | 8. ~~**[SMALL]** Log mod log insert failures~~ -- Done. 15 locations across 4 files. | |
| 88 | - | 9. ~~**[SMALL]** Expand `.env.example`~~ -- Done. All 9 env vars documented. | |
| 93 | + | 6. ~~**[SMALL]** Add `//!` module docs~~ -- Done. | |
| 94 | + | 7. ~~**[SMALL]** Remove dead code~~ -- Done. | |
| 95 | + | 8. ~~**[SMALL]** Log mod log insert failures~~ -- Done. | |
| 96 | + | 9. ~~**[SMALL]** Expand `.env.example`~~ -- Done. | |
| 89 | 97 | 10. ~~**[SMALL]** Initial git commit + configure remotes~~ -- Done. | |
| 90 | 98 | ||
| 99 | + | ### Run 15 (2026-04-18, corrected 2026-04-22) | |
| 100 | + | 11. ~~**[MEDIUM]** Validate og:image URL scheme in link preview~~ -- Incorrect finding. Code never fetches og:image. | |
| 101 | + | 12. ~~**[MEDIUM]** Add DB connectivity check to health endpoint~~ -- Fixed 2026-04-22. | |
| 102 | + | 13. ~~**[LOW]** Add retry logic to MNW OAuth API calls~~ -- Already implemented (auth.rs:210-316). | |
| 103 | + | ||
| 91 | 104 | ## Metrics Over Time | |
| 92 | 105 | ||
| 93 | 106 | | Audit Date | LOC | Rust Files | Tests | Tests/KLOC | Clippy Warnings | Cold Spots | Overall | | |
| @@ -103,6 +116,9 @@ Filed in `docs/mnw/mt/todo.md`. | |||
| 103 | 116 | | 2026-03-18 (Run 9) | ~7,000 | ~38| 222 | ~32 | 0 | 0 | A | | |
| 104 | 117 | | 2026-03-22 (coverage) | ~7,000 | ~39| 249 | ~36 | 0 | 0 | A | | |
| 105 | 118 | | 2026-03-28 (Run 12) | ~7,200 | ~39| 225+ | ~32 | 0 | 0 | A | | |
| 119 | + | | 2026-04-15 (Run 14) | ~9,752 | -- | 231 | ~24 | 0 | 0 | A | | |
| 120 | + | | 2026-04-18 (Run 15) | ~9,752 | -- | 231 | ~24 | 0 | 3 | A- | | |
| 121 | + | | 2026-04-22 (Run 15 corrected) | ~9,752 | -- | 232 | ~24 | 0 | 0 | A | | |
| 106 | 122 | ||
| 107 | 123 | --- | |
| 108 | 124 |
| @@ -249,12 +249,22 @@ pub(super) struct DeleteTagForm { | |||
| 249 | 249 | // Handlers | |
| 250 | 250 | // ============================================================================ | |
| 251 | 251 | ||
| 252 | - | /// Health check — proves the service is responding. | |
| 252 | + | /// Health check — proves the service is responding and the database is reachable. | |
| 253 | 253 | #[tracing::instrument(skip_all)] | |
| 254 | - | async fn health() -> Json<serde_json::Value> { | |
| 254 | + | async fn health( | |
| 255 | + | axum::extract::State(state): axum::extract::State<AppState>, | |
| 256 | + | ) -> Json<serde_json::Value> { | |
| 257 | + | let db_ok = sqlx::query_scalar::<_, i32>("SELECT 1") | |
| 258 | + | .fetch_one(&state.db) | |
| 259 | + | .await | |
| 260 | + | .is_ok(); | |
| 261 | + | ||
| 262 | + | let status = if db_ok { "operational" } else { "degraded" }; | |
| 263 | + | ||
| 255 | 264 | Json(serde_json::json!({ | |
| 256 | - | "status": "operational", | |
| 265 | + | "status": status, | |
| 257 | 266 | "version": env!("CARGO_PKG_VERSION"), | |
| 267 | + | "database": db_ok, | |
| 258 | 268 | })) | |
| 259 | 269 | } | |
| 260 | 270 |
| @@ -0,0 +1,12 @@ | |||
| 1 | + | use crate::harness::TestHarness; | |
| 2 | + | ||
| 3 | + | #[tokio::test] | |
| 4 | + | async fn health_endpoint_returns_operational_with_db() { | |
| 5 | + | let mut h = TestHarness::new().await; | |
| 6 | + | let resp = h.client.get("/api/health").await; | |
| 7 | + | assert_eq!(resp.status, 200); | |
| 8 | + | let body: serde_json::Value = resp.json(); | |
| 9 | + | assert_eq!(body["status"], "operational"); | |
| 10 | + | assert_eq!(body["database"], true); | |
| 11 | + | assert!(body["version"].as_str().is_some()); | |
| 12 | + | } |
| @@ -6,6 +6,7 @@ mod crud; | |||
| 6 | 6 | mod csrf; | |
| 7 | 7 | mod endorsements; | |
| 8 | 8 | mod flagging; | |
| 9 | + | mod health; | |
| 9 | 10 | mod internal_api; | |
| 10 | 11 | mod link_previews; | |
| 11 | 12 | mod mentions; |
| @@ -9,7 +9,7 @@ PROJECT_DIR="$(dirname "$SCRIPT_DIR")" | |||
| 9 | 9 | ||
| 10 | 10 | deploy_target() { | |
| 11 | 11 | local name="$1" | |
| 12 | - | local host target sudo_prefix="" | |
| 12 | + | local host target sudo_prefix="" ssh_opts="" scp_opts="" | |
| 13 | 13 | ||
| 14 | 14 | case "$name" in | |
| 15 | 15 | astra) | |
| @@ -20,6 +20,8 @@ deploy_target() { | |||
| 20 | 20 | hetzner) | |
| 21 | 21 | host="$HETZNER_HOST" | |
| 22 | 22 | target="x86_64-unknown-linux-gnu" | |
| 23 | + | ssh_opts="-p 2200" | |
| 24 | + | scp_opts="-P 2200" | |
| 23 | 25 | ;; | |
| 24 | 26 | *) | |
| 25 | 27 | echo "Unknown target: $name (use astra, hetzner, or all)" | |
| @@ -39,20 +41,20 @@ deploy_target() { | |||
| 39 | 41 | fi | |
| 40 | 42 | ||
| 41 | 43 | echo "=== Deploying to $name ($host) ===" | |
| 42 | - | ssh "$host" "$sudo_prefix mkdir -p /etc/pom" | |
| 43 | - | scp "$binary" "$host:/tmp/pom" | |
| 44 | - | scp "$config_file" "$host:/tmp/pom.toml" | |
| 45 | - | scp "$SCRIPT_DIR/pom.service" "$host:/tmp/pom.service" | |
| 44 | + | ssh $ssh_opts "$host" "$sudo_prefix mkdir -p /etc/pom" | |
| 45 | + | scp $scp_opts "$binary" "$host:/tmp/pom" | |
| 46 | + | scp $scp_opts "$config_file" "$host:/tmp/pom.toml" | |
| 47 | + | scp $scp_opts "$SCRIPT_DIR/pom.service" "$host:/tmp/pom.service" | |
| 46 | 48 | ||
| 47 | - | ssh "$host" "$sudo_prefix mv /tmp/pom /usr/local/bin/pom && $sudo_prefix chmod +x /usr/local/bin/pom && $sudo_prefix mv /tmp/pom.toml /etc/pom/pom.toml && $sudo_prefix mv /tmp/pom.service /etc/systemd/system/pom.service" | |
| 49 | + | ssh $ssh_opts "$host" "$sudo_prefix mv /tmp/pom /usr/local/bin/pom && $sudo_prefix chmod +x /usr/local/bin/pom && $sudo_prefix mv /tmp/pom.toml /etc/pom/pom.toml && $sudo_prefix mv /tmp/pom.service /etc/systemd/system/pom.service" | |
| 48 | 50 | ||
| 49 | 51 | # Create env file with Postmark token if it doesn't exist | |
| 50 | - | ssh "$host" "if [ ! -f /etc/pom/env ]; then echo 'POM_POSTMARK_TOKEN=SET_ME' | $sudo_prefix tee /etc/pom/env > /dev/null && $sudo_prefix chmod 600 /etc/pom/env; fi" | |
| 52 | + | ssh $ssh_opts "$host" "if [ ! -f /etc/pom/env ]; then echo 'POM_POSTMARK_TOKEN=SET_ME' | $sudo_prefix tee /etc/pom/env > /dev/null && $sudo_prefix chmod 600 /etc/pom/env; fi" | |
| 51 | 53 | ||
| 52 | - | ssh "$host" "$sudo_prefix systemctl daemon-reload && $sudo_prefix systemctl enable pom && $sudo_prefix systemctl restart pom" | |
| 54 | + | ssh $ssh_opts "$host" "$sudo_prefix systemctl daemon-reload && $sudo_prefix systemctl enable pom && $sudo_prefix systemctl restart pom" | |
| 53 | 55 | ||
| 54 | 56 | echo "=== $name: deployed ===" | |
| 55 | - | ssh "$host" "$sudo_prefix systemctl status pom --no-pager" | |
| 57 | + | ssh $ssh_opts "$host" "$sudo_prefix systemctl status pom --no-pager" | |
| 56 | 58 | } | |
| 57 | 59 | ||
| 58 | 60 | if [ $# -eq 0 ]; then |
| @@ -44,7 +44,7 @@ timeout_secs = 10 | |||
| 44 | 44 | ||
| 45 | 45 | [targets.mnw.health.expect] | |
| 46 | 46 | status_code = 200 | |
| 47 | - | json_fields = { "status" = "operational" } | |
| 47 | + | json_fields = { "status" = "operational", "checks.database" = "true", "checks.git_storage" = "true" } | |
| 48 | 48 | ||
| 49 | 49 | [targets.mnw.health.trending] | |
| 50 | 50 | baseline_window_hours = 168 | |
| @@ -65,6 +65,14 @@ databases = ["makenotwork", "multithreaded"] | |||
| 65 | 65 | max_age_hours = 25 | |
| 66 | 66 | interval_secs = 3600 | |
| 67 | 67 | ||
| 68 | + | [targets.mnw-cli] | |
| 69 | + | label = "MNW CLI SSH Server" | |
| 70 | + | ||
| 71 | + | [targets.mnw-cli.ssh_banner] | |
| 72 | + | host = "127.0.0.1" | |
| 73 | + | port = 22 | |
| 74 | + | timeout_secs = 5 | |
| 75 | + | ||
| 68 | 76 | [targets.mt] | |
| 69 | 77 | label = "Multithreaded Forum" | |
| 70 | 78 | expected_routes = ["/"] | |
| @@ -75,7 +83,7 @@ timeout_secs = 5 | |||
| 75 | 83 | ||
| 76 | 84 | [targets.mt.health.expect] | |
| 77 | 85 | status_code = 200 | |
| 78 | - | json_fields = { "status" = "operational" } | |
| 86 | + | json_fields = { "status" = "operational", "database" = "true" } | |
| 79 | 87 | ||
| 80 | 88 | [targets.mt.tls] | |
| 81 | 89 | host = "forums.makenot.work" |
| @@ -1,30 +1,30 @@ | |||
| 1 | 1 | # PoM (Peace of Mind) -- Audit Review | |
| 2 | 2 | ||
| 3 | - | **Last audited:** 2026-03-28 (tenth audit, Run 12 cross-project) | |
| 4 | - | **Previous audit:** 2026-03-18 (eighth audit, Run 9 cross-project) | |
| 3 | + | **Last audited:** 2026-04-18 (twelfth audit, Run 15 cross-project) | |
| 4 | + | **Previous audit:** 2026-04-15 (eleventh audit, Run 14 cross-project) | |
| 5 | 5 | ||
| 6 | 6 | ## Overall Grade: A | |
| 7 | 7 | ||
| 8 | - | Run 12 cross-project audit. 359 tests (222 unit + 8 cli + 129 integration). 0 clippy warnings. v0.3.2. Grade stable at A. CORS monitoring added. DNS/route stale data fix deployed. New dep advisories: aws-lc-sys (HIGH 7.4), rustls-webpki. | |
| 8 | + | Run 15 cross-project audit (updated 2026-04-22). 364 tests (all pass). 0 clippy warnings. v0.3.5. ~11,496 LOC. Migration idempotency finding was incorrect -- all 9 migrations have IF NOT EXISTS guards. Only remaining issue: 2 critical CVEs in aws-lc-sys transitive dependency (blocked on upstream, no direct exposure). | |
| 9 | 9 | ||
| 10 | 10 | ## Scorecard | |
| 11 | 11 | ||
| 12 | 12 | | Dimension | Grade | Notes | | |
| 13 | 13 | |-----------|:-----:|-------| | |
| 14 | - | | Code Quality | A | Zero clippy warnings. Clean error handling with typed PomError enum (thiserror). No unnecessary complexity. | | |
| 15 | - | | Architecture | A | Well-structured single crate. main.rs is thin (130 LOC), CLI handlers extracted to cli.rs. Module boundaries clean and purposeful. | | |
| 16 | - | | Testing | A | 359 tests (222 unit + 8 cli + 129 integration) for ~10K LOC = ~36 tests/KLOC. Coverage comprehensive — DB, API, MCP tools, parsing, peer state machine, check_health (mock servers), check_tls (self-signed cert), rate limiter, CORS monitoring all tested. | | |
| 17 | - | | Security | A | All SQL parameterized. SSH hardened with BatchMode + `--` option separator. Bearer token auth on API + peer mesh. Rate limiting (60 req/min). HTTP response size limits (10MB). Shell-escape validation on SSH filter. | | |
| 18 | - | | Performance | A | WAL mode, proper indexes, async throughout. Appropriate for monitoring workload. | | |
| 19 | - | | Documentation | A | Module-level `//!` docs on all modules. All struct fields documented. description.md rewritten, architecture.md created (191 lines), README created (62 lines). Tool descriptions are excellent. | | |
| 20 | - | | Dependencies | A | 17 direct dependencies, all justified. No unused deps. rustls-tls avoids OpenSSL. Semver ranges. tokio-util for CancellationToken. | | |
| 21 | - | | Type Safety | A | Domain enums (HealthStatus, PeerStatus, OnMissing) with proper serde. No stringly-typed fields in application logic. | | |
| 22 | - | | Observability | A | 57 `#[instrument(skip_all)]` annotations across all modules. Debug-level logging for non-critical peer status fetch failures. Watchdog logging for silent task panics. | | |
| 23 | - | | Concurrency | A | Clean RwLock discipline. Lock-then-release-then-DB pattern in peer module. CancellationToken for cooperative shutdown. | | |
| 24 | - | | Resilience | A | CancellationToken + `tokio::select!` in all task loops. `with_graceful_shutdown` on API server. 5s grace period. Watchdog for task panic detection. Grace periods for peer detection. Error logging without panics. | | |
| 25 | - | | API Consistency | A | Consistent JSON shapes. Proper HTTP status codes. Public `/api/health` endpoint. MCP tool descriptions well-written. | | |
| 26 | - | | Migration Safety | A- | Numbered migration system (schema_version table). `IF NOT EXISTS` guards on all DDL. 5 migrations tracked. | | |
| 27 | - | | Codebase Size | A | ~3,500 LOC for a full monitoring tool with CLI, MCP server, HTTP API, SQLite persistence, SSH test runner, peer mesh, and self-monitoring. Extremely efficient. | | |
| 14 | + | | Code Quality | A | Zero clippy warnings. Clean error handling with typed PomError enum. All migrations have IF NOT EXISTS guards. | | |
| 15 | + | | Architecture | A | Well-structured single crate. main.rs is thin, CLI handlers extracted to cli.rs. Module boundaries clean. | | |
| 16 | + | | Testing | A | 364 tests (all pass) for ~11.5K LOC = ~32 tests/KLOC. Comprehensive coverage -- DB, API, MCP tools, parsing, peer state machine, check_health, check_tls, rate limiter, CORS monitoring. | | |
| 17 | + | | Security | A | All SQL parameterized. SSH hardened. Bearer token auth. Rate limiting. HTTP response size limits. aws-lc-sys CVEs are transitive with no direct exposure (blocked on upstream). | | |
| 18 | + | | Performance | A- | WAL mode, proper indexes, async throughout. Appropriate for monitoring workload. | | |
| 19 | + | | Documentation | A | Module-level `//!` docs on all modules. All struct fields documented. architecture.md, README created. | | |
| 20 | + | | Dependencies | A- | 17 direct deps, all justified. 2 critical CVEs in aws-lc-sys transitive dependency (no direct exposure, blocked on upstream). | | |
| 21 | + | | Type Safety | A | Domain enums with proper serde. No stringly-typed fields. | | |
| 22 | + | | Observability | A- | 57 `#[instrument(skip_all)]` annotations. Debug-level logging for non-critical failures. Watchdog logging. | | |
| 23 | + | | Concurrency | A | Clean RwLock discipline. Lock-then-release-then-DB pattern. CancellationToken for cooperative shutdown. | | |
| 24 | + | | Resilience | A | CancellationToken + `tokio::select!` in all task loops. Graceful shutdown. Watchdog for task panic detection. | | |
| 25 | + | | API Consistency | A | Consistent JSON shapes. Proper HTTP status codes. Public `/api/health` endpoint. | | |
| 26 | + | | Migration Safety | A | All 9 migrations use IF NOT EXISTS guards. Idempotent and safe to re-run. | | |
| 27 | + | | Codebase Size | A- | ~11,496 LOC for full monitoring tool with CLI, MCP server, HTTP API, SQLite, SSH test runner, peer mesh, and self-monitoring. | | |
| 28 | 28 | ||
| 29 | 29 | ## Module Heatmap | |
| 30 | 30 | ||
| @@ -33,7 +33,7 @@ Run 12 cross-project audit. 359 tests (222 unit + 8 cli + 129 integration). 0 cl | |||
| 33 | 33 | | main | A | A | A | A | A | A | A | A | A | | |
| 34 | 34 | | cli | A | A | A | A | A | A | A | A | A | | |
| 35 | 35 | | peer | A | A | A | A | A | A | A | A | A | | |
| 36 | - | | db | A | A | A | A+ | A | A | A- | A | A | | |
| 36 | + | | db | B+ | A | A | A+ | A | A | A- | A | A | | |
| 37 | 37 | | api | A | A | A | A | A | A | A | A | A | | |
| 38 | 38 | | config | A | A | A | A | A | A | A | - | - | | |
| 39 | 39 | | tools | A | A | A | A | A | A | A | - | A | | |
| @@ -42,107 +42,86 @@ Run 12 cross-project audit. 359 tests (222 unit + 8 cli + 129 integration). 0 cl | |||
| 42 | 42 | | types | A | A | A | - | - | A | A+ | - | - | | |
| 43 | 43 | | alerts | A | A | A | A | A | A | A | - | A | | |
| 44 | 44 | ||
| 45 | - | **Heatmap changes (2026-03-14 remediation):** | |
| 46 | - | - main: Arch B+ -> A (split into main.rs 130 LOC + cli.rs), Test B -> A (display.rs has 27+ tests), Resilience A- -> A (CancellationToken + graceful shutdown) | |
| 47 | - | - api: Security A -> A (rate limiting added), Resilience A -> A (`with_graceful_shutdown`) | |
| 48 | - | - cli (new): All A — extracted command handlers with CancellationToken shutdown orchestration | |
| 49 | - | - display (new): All A — formatting logic with 27+ unit tests | |
| 50 | - | - alerts (new): All A — extracted for visibility in heatmap | |
| 51 | - | ||
| 52 | 45 | ### Cold Spots | |
| 53 | 46 | ||
| 54 | - | None remaining. All previous cold spots resolved: | |
| 55 | - | - main.rs split into main.rs (130 LOC) + cli.rs (command handlers) + display.rs (formatting with 27+ tests) | |
| 56 | - | - CLI display formatting extracted and tested via display.rs | |
| 57 | - | - All modules documented with `//!` docs | |
| 58 | - | ||
| 59 | - | ### Cold Spot Remediation History (First Audit) | |
| 60 | - | ||
| 61 | - | All 8 cold spots from the first audit were remediated before the second audit: | |
| 62 | - | ||
| 63 | - | - ~~**config.rs (Testing): C**~~ -> **A** -- 4 config parsing tests (full parse, defaults, on_missing default, hostname fallback). `on_missing` now uses `OnMissing` enum with serde. | |
| 64 | - | - ~~**api.rs (Testing): C**~~ -> **A** -- 5 API endpoint integration tests (status, target 404, peer/info, peer disabled, mesh). | |
| 65 | - | - ~~**api.rs (Concurrency): B**~~ -> **A** -- Lock decoupled from DB queries in `peer_status` and `mesh_view`. | |
| 66 | - | - ~~**peer.rs (Testing): B-**~~ -> **A** -- 9 heartbeat state machine tests (grace transitions, recovery, UUID first-contact, DB recording, identity, on_missing deserialization). | |
| 67 | - | - ~~**peer.rs (Concurrency): B**~~ -> **A** -- Lock decoupled from DB writes in both heartbeat handlers. | |
| 68 | - | - ~~**db.rs (Performance): B-**~~ -> **A** -- 4 indexes added to init_schema. | |
| 69 | - | - ~~**checks/http.rs (Testing): C**~~ -> **A** -- Response classification extracted into pure functions. 8 unit tests covering all status paths. | |
| 70 | - | - ~~**main.rs (Testing): C**~~ -> **B** -- Status icon logic extracted to `HealthStatus::icon()`. 4 types.rs tests. main.rs is thin orchestration over well-tested modules. | |
| 47 | + | 1. ~~**Migration idempotency (migrations 3-6)**~~ -- Incorrect finding. All 9 migrations have IF NOT EXISTS guards. Verified 2026-04-22. | |
| 48 | + | 2. **aws-lc-sys CVEs** -- 2 critical CVEs in transitive dependency. Blocked on upstream. No direct exposure. | |
| 71 | 49 | ||
| 72 | 50 | ## Strengths | |
| 73 | 51 | ||
| 74 | - | - **Lean architecture.** ~3,500 LOC delivers health checks, test orchestration, MCP server, HTTP API, peer mesh, CLI, alerting, TLS monitoring, route checks, and self-monitoring. No over-engineering. | |
| 52 | + | - **Lean architecture.** ~11,496 LOC delivers health checks, test orchestration, MCP server, HTTP API, peer mesh, CLI, alerting, TLS monitoring, route checks, and self-monitoring. | |
| 75 | 53 | - **Clean error handling.** `?` propagation with typed PomError enum (thiserror, 8 variants). No panics in production paths. | |
| 76 | - | - **Correct security posture.** All SQL parameterized. Bearer token auth on API + peer mesh. Rate limiting (60 req/min). SSH hardened with `BatchMode=yes` + `--` separator. Shell-escape validation. | |
| 54 | + | - **Correct security posture.** All SQL parameterized. Bearer token auth on API + peer mesh. Rate limiting (60 req/min). SSH hardened with `BatchMode=yes` + `--` separator. | |
| 77 | 55 | - **Backward-compatible design.** All config sections are `#[serde(default)]`. Existing configs work unchanged. | |
| 78 | 56 | - **Good MCP integration.** 8 MCP tools with clear descriptions. Tools strip `raw_output` from test history to avoid flooding context. | |
| 79 | - | - **Comprehensive test suite.** 238 tests (152 unit + 86 integration) at ~68/KLOC covering DB, API, MCP tools, parsing, peer state machine, config, health classification, mock server health checks, TLS cert validation, and rate limiting. | |
| 57 | + | - **Comprehensive test suite.** 364 tests covering DB, API, MCP tools, parsing, peer state machine, config, health classification, mock server health checks, TLS cert validation, and rate limiting. | |
| 80 | 58 | - **Excellent lock discipline.** Peer module collects data under lock, releases lock, then performs DB writes. No lock contention under load. | |
| 81 | 59 | - **Robust shutdown.** CancellationToken with `tokio::select!` in all task loops. Graceful API server shutdown. Watchdog for silent panics. 5s grace period. | |
| 82 | 60 | ||
| 83 | 61 | ## Weaknesses | |
| 84 | 62 | ||
| 85 | - | All previously identified weaknesses have been resolved: | |
| 86 | - | - ~~**main.rs size**~~ — Split into main.rs (130 LOC) + cli.rs + display.rs | |
| 87 | - | - ~~**No typed error enum**~~ — PomError enum with thiserror (8 variants) | |
| 88 | - | - ~~**Missing module docs**~~ — All modules have `//!` docs | |
| 89 | - | - ~~**No migration versioning**~~ — schema_version table with numbered migrations (v1-v5) | |
| 63 | + | ### 1. ~~Migration idempotency bug~~ (Incorrect finding) | |
| 64 | + | All 9 migrations correctly use `CREATE TABLE IF NOT EXISTS`. This finding was incorrect in the original Run 15 audit. Verified by reading db.rs on 2026-04-22. | |
| 65 | + | ||
| 66 | + | ### 2. aws-lc-sys critical CVEs | |
| 67 | + | 2 critical CVEs in aws-lc-sys transitive dependency. Blocked on upstream fix. No direct exposure but dependency audit tools flag it. | |
| 90 | 68 | ||
| 91 | 69 | ## Mandatory Surprise | |
| 92 | 70 | ||
| 93 | - | **Surprise: The peer mesh fetches `/api/peer/status` as a SECOND HTTP call after `/api/peer/info` in every heartbeat loop iteration, completely independently.** | |
| 71 | + | **~~Migration idempotency bug~~ (Incorrect finding)** | |
| 94 | 72 | ||
| 95 | - | In `peer.rs:196-213`, after the heartbeat success/failure handling completes, there is a second HTTP GET to `/api/peer/status` that caches the full status data for mesh aggregation. This means every heartbeat cycle makes TWO HTTP requests to each peer. | |
| 73 | + | The original Run 15 audit claimed migrations 3-6 lacked IF NOT EXISTS guards. This was incorrect -- all 9 migrations in db.rs have always used `CREATE TABLE IF NOT EXISTS`. Verified 2026-04-22 by reading the source directly. | |
| 96 | 74 | ||
| 97 | - | **Verdict: Actually fine.** | |
| 75 | + | **Verdict:** No issue. The audit finding was wrong. | |
| 98 | 76 | ||
| 99 | - | This is intentional and well-designed. The `/api/peer/info` call is the heartbeat probe (lightweight, just identity data), while `/api/peer/status` fetches the full target/peer view for mesh aggregation. Separating them means: | |
| 100 | - | 1. The heartbeat latency measurement is accurate (not inflated by the larger status response). | |
| 101 | - | 2. A failure to fetch status data does not affect heartbeat state transitions (it logs at debug level and moves on). | |
| 102 | - | 3. The mesh view (`/api/mesh`) can show each peer's own view of their targets, enabling cross-instance monitoring. | |
| 77 | + | ### Previous Surprise | |
| 103 | 78 | ||
| 104 | - | The second request's failure is handled gracefully with `tracing::debug!` -- no state corruption, no retries, no panics. Good design. | |
| 79 | + | **Peer mesh dual HTTP call per heartbeat** -- intentional and well-designed separation of heartbeat probe from status data fetch. Verdict: Actually fine. | |
| 105 | 80 | ||
| 106 | 81 | ## Action Items | |
| 107 | 82 | ||
| 108 | 83 | Filed in `docs/mnw/pom/todo.md`. | |
| 109 | 84 | ||
| 110 | - | ### Third Audit (2026-03-13, pre-launch skeptical lens) -- 7 items, all resolved | |
| 111 | - | ||
| 112 | - | 1. ~~**[CRITICAL]** Remove Postmark API token from deployment configs~~ — Done — moved to POM_POSTMARK_TOKEN env var | |
| 113 | - | 2. ~~**[MEDIUM]** Add API authentication~~ — Done — bearer token middleware, 5 tests | |
| 114 | - | 3. ~~**[MEDIUM]** Add peer mesh authentication~~ — Done — per-peer token field, heartbeat sends Authorization header | |
| 115 | - | 4. ~~**[LOW]** Add integration tests for core functions~~ — Done — 9 new integration tests (check_health mock servers, check_tls self-signed cert) | |
| 116 | - | 5. ~~**[LOW]** Add self-monitoring~~ — Done — `/api/health` endpoint (public, no auth, returns status + version) | |
| 117 | - | 6. ~~**[LOW]** Shell-escape SSH test filter parameter~~ — Done (alphanumeric + `_:-` allowlist) | |
| 118 | - | 7. ~~**[LOW]** Reject peer UUID mismatch~~ — Done (tracing::error, skip status update, increment failures) | |
| 119 | - | ||
| 120 | - | ### Security Deep Dive (2026-03-13) — Complete (2/2) | |
| 121 | - | ||
| 122 | - | 8. ~~**[MEDIUM]** SSH `--` separator~~ — Done (`checks/ssh.rs`: `.arg("--")` between hostname and command prevents option injection via filter strings) | |
| 123 | - | 9. ~~**[LOW]** HTTP response size limit~~ — Done (`checks/http.rs`: `MAX_RESPONSE_BYTES` 10MB constant, content-length header check + post-read size verification) | |
| 124 | - | ||
| 125 | - | ### Second Audit (2026-03-11) -- 5 items | |
| 126 | - | ||
| 127 | - | 1. Extract CLI command handlers from main.rs into cli/ submodule (587 LOC, 7 commands) | |
| 128 | - | 2. Add typed PomError enum with thiserror (currently Box<dyn Error>, diverges from cross-project standard) | |
| 129 | - | 3. Add .DS_Store and IDE dirs (.idea/, .vscode/) to .gitignore | |
| 130 | - | 4. Add module-level //! docs to main.rs and config.rs | |
| 131 | - | 5. Add migration versioning (PRAGMA user_version) before next schema change | |
| 132 | - | ||
| 133 | - | ### First Audit (2026-03-10) -- 11 items, all resolved | |
| 134 | - | ||
| 135 | - | 1. ~~Add DB indexes~~ -- Done (4 indexes added to init_schema) | |
| 136 | - | 2. ~~Fix clippy warnings~~ -- Done (4 collapsible_if warnings fixed with Rust 2024 let chains) | |
| 137 | - | 3. ~~Decouple mesh lock from DB writes in heartbeat handlers~~ -- Done | |
| 138 | - | 4. ~~Decouple mesh read lock from DB queries in API handlers~~ -- Done | |
| 139 | - | 5. ~~Log /api/peer/status fetch failures~~ -- Done (tracing::debug) | |
| 140 | - | 6. ~~Include peer heartbeat prune count~~ -- Done (3-tuple return) | |
| 141 | - | 7. ~~Add module docs~~ -- Done (//! docs on db.rs, config.rs, peer.rs, types.rs, lib.rs) | |
| 142 | - | 8. ~~Change PeerConfig.on_missing to OnMissing enum~~ -- Done | |
| 143 | - | 9. ~~Add API endpoint tests~~ -- Done (5 tests) | |
| 144 | - | 10. ~~Add heartbeat state machine tests~~ -- Done (9 tests) | |
| 145 | - | 11. ~~Add config parsing tests~~ -- Done (4 tests) | |
| 85 | + | ### Run 15 (2026-04-18, corrected 2026-04-22) | |
| 86 | + | 1. ~~**[MEDIUM]** Add IF NOT EXISTS to migrations 3-6~~ -- Incorrect finding. All migrations already have guards. | |
| 87 | + | 2. **[LOW]** Monitor aws-lc-sys for CVE fixes (blocked on upstream) | |
| 88 | + | ||
| 89 | + | ### All resolved (previous audits) | |
| 90 | + | 3. ~~**[CRITICAL]** Remove Postmark API token from deployment configs~~ -- Done | |
| 91 | + | 4. ~~**[MEDIUM]** Add API authentication~~ -- Done | |
| 92 | + | 5. ~~**[MEDIUM]** Add peer mesh authentication~~ -- Done | |
| 93 | + | 6. ~~**[MEDIUM]** SSH `--` separator~~ -- Done | |
| 94 | + | 7. ~~**[LOW]** Add integration tests for core functions~~ -- Done | |
| 95 | + | 8. ~~**[LOW]** Add self-monitoring~~ -- Done | |
| 96 | + | 9. ~~**[LOW]** Shell-escape SSH test filter parameter~~ -- Done | |
| 97 | + | 10. ~~**[LOW]** Reject peer UUID mismatch~~ -- Done | |
| 98 | + | 11. ~~**[LOW]** HTTP response size limit~~ -- Done | |
| 99 | + | ||
| 100 | + | ### First/Second Audit -- All resolved | |
| 101 | + | - Extract CLI command handlers -- Done | |
| 102 | + | - Add typed PomError enum -- Done | |
| 103 | + | - Add .DS_Store/.idea/.vscode to .gitignore -- Done | |
| 104 | + | - Add module-level //! docs -- Done | |
| 105 | + | - Add migration versioning -- Done | |
| 106 | + | - Add DB indexes -- Done | |
| 107 | + | - Fix clippy warnings -- Done | |
| 108 | + | - Decouple mesh lock from DB writes -- Done | |
| 109 | + | - Add API/heartbeat/config tests -- Done | |
| 110 | + | ||
| 111 | + | ## Metrics Over Time | |
| 112 | + | ||
| 113 | + | | Audit Date | LOC | Rust Files | Tests | Tests/KLOC | Clippy Warnings | Cold Spots | Overall | | |
| 114 | + | |------------|-----|-----------|-------|-----------|----------------|------------|---------| | |
| 115 | + | | 2026-03-10 | ~3,500 | ~12 | 0 | 0 | 4 | 8 | B | | |
| 116 | + | | 2026-03-11 (remediation) | ~3,500 | ~12 | 57 | ~16 | 0 | 0 | A- | | |
| 117 | + | | 2026-03-13 (pre-launch) | ~3,800 | ~14 | 152 | ~40 | 0 | 0 | A | | |
| 118 | + | | 2026-03-14 (deep dive) | ~3,800 | ~14 | 152 | ~40 | 0 | 0 | A | | |
| 119 | + | | 2026-03-16 (Run 6) | ~5,900 | ~14 | 238 | ~40 | 0 | 0 | A | | |
| 120 | + | | 2026-03-18 (Run 9) | ~10,000 | ~14 | 359 | ~36 | 0 | 0 | A | | |
| 121 | + | | 2026-03-28 (Run 12) | ~10,000 | ~14 | 359 | ~36 | 0 | 0 | A | | |
| 122 | + | | 2026-04-15 (Run 14) | ~11,496 | -- | 364 | ~32 | 0 | 0 | A | | |
| 123 | + | | 2026-04-18 (Run 15) | ~11,496 | -- | 364 | ~32 | 0 | 2 | A- | | |
| 124 | + | | 2026-04-22 (Run 15 corrected) | ~11,496 | -- | 364 | ~32 | 0 | 1 | A | | |
| 146 | 125 | ||
| 147 | 126 | --- | |
| 148 | 127 |
| @@ -5,5 +5,6 @@ pub mod http; | |||
| 5 | 5 | pub mod parse; | |
| 6 | 6 | pub mod routes; | |
| 7 | 7 | pub mod ssh; | |
| 8 | + | pub mod ssh_banner; | |
| 8 | 9 | pub mod tls; | |
| 9 | 10 | pub mod whois; |
| @@ -0,0 +1,120 @@ | |||
| 1 | + | //! SSH banner check — TCP connect and verify SSH protocol banner. | |
| 2 | + | ||
| 3 | + | use std::time::{Duration, Instant}; | |
| 4 | + | ||
| 5 | + | use tokio::io::AsyncReadExt; | |
| 6 | + | use tokio::net::TcpStream; | |
| 7 | + | ||
| 8 | + | use crate::config::SshBannerConfig; | |
| 9 | + | use crate::types::{HealthSnapshot, HealthStatus}; | |
| 10 | + | ||
| 11 | + | /// Connect to an SSH server and verify the banner starts with "SSH-". | |
| 12 | + | #[tracing::instrument(skip_all)] | |
| 13 | + | pub async fn check_ssh_banner(target_name: &str, config: &SshBannerConfig) -> HealthSnapshot { | |
| 14 | + | let addr = format!("{}:{}", config.host, config.port); | |
| 15 | + | let timeout = Duration::from_secs(config.timeout_secs); | |
| 16 | + | let checked_at = chrono::Utc::now().to_rfc3339(); | |
| 17 | + | let start = Instant::now(); | |
| 18 | + | ||
| 19 | + | let stream = match tokio::time::timeout(timeout, TcpStream::connect(&addr)).await { | |
| 20 | + | Ok(Ok(stream)) => stream, | |
| 21 | + | Ok(Err(e)) => { | |
| 22 | + | return HealthSnapshot { | |
| 23 | + | id: None, | |
| 24 | + | target: target_name.to_string(), | |
| 25 | + | status: HealthStatus::Unreachable, | |
| 26 | + | checked_at, | |
| 27 | + | response_time_ms: start.elapsed().as_millis() as i64, | |
| 28 | + | details: None, | |
| 29 | + | error: Some(format!("TCP connect to {addr} failed: {e}")), | |
| 30 | + | }; | |
| 31 | + | } | |
| 32 | + | Err(_) => { | |
| 33 | + | return HealthSnapshot { | |
| 34 | + | id: None, | |
| 35 | + | target: target_name.to_string(), | |
| 36 | + | status: HealthStatus::Unreachable, | |
| 37 | + | checked_at, | |
| 38 | + | response_time_ms: start.elapsed().as_millis() as i64, | |
| 39 | + | details: None, | |
| 40 | + | error: Some(format!("TCP connect to {addr} timed out after {timeout:?}")), | |
| 41 | + | }; | |
| 42 | + | } | |
| 43 | + | }; | |
| 44 | + | ||
| 45 | + | let mut buf = [0u8; 256]; | |
| 46 | + | let mut stream = stream; | |
| 47 | + | match tokio::time::timeout(Duration::from_secs(5), stream.read(&mut buf)).await { | |
| 48 | + | Ok(Ok(n)) if n > 0 => { | |
| 49 | + | let banner = String::from_utf8_lossy(&buf[..n]); | |
| 50 | + | let banner = banner.trim().to_string(); | |
| 51 | + | let response_time_ms = start.elapsed().as_millis() as i64; | |
| 52 | + | ||
| 53 | + | if banner.starts_with("SSH-") { | |
| 54 | + | HealthSnapshot { | |
| 55 | + | id: None, | |
| 56 | + | target: target_name.to_string(), | |
| 57 | + | status: HealthStatus::Operational, | |
| 58 | + | checked_at, | |
| 59 | + | response_time_ms, | |
| 60 | + | details: None, | |
| 61 | + | error: None, | |
| 62 | + | } | |
| 63 | + | } else { | |
| 64 | + | HealthSnapshot { | |
| 65 | + | id: None, | |
| 66 | + | target: target_name.to_string(), | |
| 67 | + | status: HealthStatus::Degraded, | |
| 68 | + | checked_at, | |
| 69 | + | response_time_ms, | |
| 70 | + | details: None, | |
| 71 | + | error: Some(format!("unexpected banner: {banner}")), | |
| 72 | + | } | |
| 73 | + | } | |
| 74 | + | } | |
| 75 | + | Ok(Ok(_)) => HealthSnapshot { | |
| 76 | + | id: None, | |
| 77 | + | target: target_name.to_string(), | |
| 78 | + | status: HealthStatus::Degraded, | |
| 79 | + | checked_at, | |
| 80 | + | response_time_ms: start.elapsed().as_millis() as i64, | |
| 81 | + | details: None, | |
| 82 | + | error: Some("empty response".to_string()), | |
| 83 | + | }, | |
| 84 | + | Ok(Err(e)) => HealthSnapshot { | |
| 85 | + | id: None, | |
| 86 | + | target: target_name.to_string(), | |
| 87 | + | status: HealthStatus::Degraded, | |
| 88 | + | checked_at, | |
| 89 | + | response_time_ms: start.elapsed().as_millis() as i64, | |
| 90 | + | details: None, | |
| 91 | + | error: Some(format!("banner read failed: {e}")), | |
| 92 | + | }, | |
| 93 | + | Err(_) => HealthSnapshot { | |
| 94 | + | id: None, | |
| 95 | + | target: target_name.to_string(), | |
| 96 | + | status: HealthStatus::Degraded, | |
| 97 | + | checked_at, | |
| 98 | + | response_time_ms: start.elapsed().as_millis() as i64, | |
| 99 | + | details: None, | |
| 100 | + | error: Some("banner read timed out".to_string()), | |
| 101 | + | }, | |
| 102 | + | } | |
| 103 | + | } | |
| 104 | + | ||
| 105 | + | #[cfg(test)] | |
| 106 | + | mod tests { | |
| 107 | + | use super::*; | |
| 108 | + | ||
| 109 | + | #[tokio::test] | |
| 110 | + | async fn ssh_banner_unreachable() { | |
| 111 | + | let config = SshBannerConfig { | |
| 112 | + | host: "127.0.0.1".to_string(), | |
| 113 | + | port: 19999, | |
| 114 | + | timeout_secs: 1, | |
| 115 | + | }; | |
| 116 | + | let result = check_ssh_banner("test", &config).await; | |
| 117 | + | assert_eq!(result.status, HealthStatus::Unreachable); | |
| 118 | + | assert!(result.error.unwrap().contains("TCP connect")); | |
| 119 | + | } | |
| 120 | + | } |
| @@ -2,7 +2,7 @@ use tokio::task::JoinHandle; | |||
| 2 | 2 | use tracing::info; | |
| 3 | 3 | ||
| 4 | 4 | use pom::alerts::Alerter; | |
| 5 | - | use pom::checks::http; | |
| 5 | + | use pom::checks::{http, ssh_banner}; | |
| 6 | 6 | use pom::config::Config; | |
| 7 | 7 | use pom::db; | |
| 8 | 8 | use pom::types::{HealthStatus, LatencyStats}; | |
| @@ -137,5 +137,58 @@ pub(crate) fn spawn_health_tasks( | |||
| 137 | 137 | } | |
| 138 | 138 | } | |
| 139 | 139 | ||
| 140 | + | // SSH banner checks — stored as health check entries with target name "{name}:ssh" | |
| 141 | + | for name in config.target_names() { | |
| 142 | + | let target_config = config.get_target(&name).unwrap().clone(); | |
| 143 | + | if let Some(ssh_config) = target_config.ssh_banner { | |
| 144 | + | let interval_secs = default_interval; | |
| 145 | + | let pool = pool.clone(); | |
| 146 | + | let check_name = format!("{name}:ssh"); | |
| 147 | + | let label = target_config.label.clone(); | |
| 148 | + | let alerter = alerter.clone(); | |
| 149 | + | let cancel = cancel.clone(); | |
| 150 | + | ||
| 151 | + | info!("{check_name}: SSH banner check every {interval_secs}s"); | |
| 152 | + | ||
| 153 | + | handles.push(tokio::spawn(async move { | |
| 154 | + | let mut interval = tokio::time::interval( | |
| 155 | + | std::time::Duration::from_secs(interval_secs), | |
| 156 | + | ); | |
| 157 | + | interval.tick().await; | |
| 158 | + | loop { | |
| 159 | + | tokio::select! { | |
| 160 | + | _ = cancel.cancelled() => break, | |
| 161 | + | _ = interval.tick() => {} | |
| 162 | + | } | |
| 163 | + | let previous = db::get_latest_health(&pool, &check_name).await.ok().flatten(); | |
| 164 | + | let snapshot = ssh_banner::check_ssh_banner(&check_name, &ssh_config).await; | |
| 165 | + | info!("{}: {} ({}ms)", check_name, snapshot.status, snapshot.response_time_ms); | |
| 166 | + | if let Err(e) = db::insert_health_check(&pool, &snapshot).await { | |
| 167 | + | tracing::error!("{check_name}: failed to store SSH banner check: {e}"); | |
| 168 | + | } | |
| 169 | + | ||
| 170 | + | if let Some(ref alerter) = alerter | |
| 171 | + | && let Some(ref prev) = previous | |
| 172 | + | && prev.status != snapshot.status | |
| 173 | + | { | |
| 174 | + | let from = prev.status.to_string(); | |
| 175 | + | let to = snapshot.status.to_string(); | |
| 176 | + | if snapshot.status == HealthStatus::Operational { | |
| 177 | + | alerter.send_health_recovery(&check_name, &label, &from).await; | |
| 178 | + | } else { | |
| 179 | + | alerter.send_health_alert( | |
| 180 | + | &check_name, | |
| 181 | + | &label, | |
| 182 | + | &from, | |
| 183 | + | &to, | |
| 184 | + | snapshot.error.as_deref(), | |
| 185 | + | ).await; | |
| 186 | + | } | |
| 187 | + | } | |
| 188 | + | } | |
| 189 | + | })); | |
| 190 | + | } | |
| 191 | + | } | |
| 192 | + | ||
| 140 | 193 | handles | |
| 141 | 194 | } |
| @@ -177,6 +177,8 @@ pub struct TargetConfig { | |||
| 177 | 177 | pub cors: Vec<CorsCheck>, | |
| 178 | 178 | /// Local filesystem backup verification. `None` disables backup checks. | |
| 179 | 179 | pub backups: Option<BackupConfig>, | |
| 180 | + | /// SSH banner check (TCP connect + verify "SSH-" banner). `None` disables. | |
| 181 | + | pub ssh_banner: Option<SshBannerConfig>, | |
| 180 | 182 | } | |
| 181 | 183 | ||
| 182 | 184 | #[derive(Debug, Clone, Deserialize)] | |
| @@ -241,6 +243,27 @@ fn default_backup_interval() -> u64 { | |||
| 241 | 243 | 3600 | |
| 242 | 244 | } | |
| 243 | 245 | ||
| 246 | + | /// SSH banner check — TCP connect and verify the server responds with "SSH-". | |
| 247 | + | #[derive(Debug, Clone, Deserialize)] | |
| 248 | + | pub struct SshBannerConfig { | |
| 249 | + | /// Hostname or IP to connect to. | |
| 250 | + | pub host: String, | |
| 251 | + | /// TCP port (defaults to 22). | |
| 252 | + | #[serde(default = "default_ssh_banner_port")] | |
| 253 | + | pub port: u16, | |
| 254 | + | /// Connection timeout in seconds. | |
| 255 | + | #[serde(default = "default_ssh_banner_timeout")] | |
| 256 | + | pub timeout_secs: u64, | |
| 257 | + | } | |
| 258 | + | ||
| 259 | + | fn default_ssh_banner_port() -> u16 { | |
| 260 | + | 22 | |
| 261 | + | } | |
| 262 | + | ||
| 263 | + | fn default_ssh_banner_timeout() -> u64 { | |
| 264 | + | 5 | |
| 265 | + | } | |
| 266 | + | ||
| 244 | 267 | #[derive(Debug, Clone, Deserialize)] | |
| 245 | 268 | pub struct TlsConfig { | |
| 246 | 269 | /// Hostname to connect to for the TLS check. |
| @@ -68,7 +68,7 @@ const MIGRATIONS: &[(i64, &str, &str)] = &[ | |||
| 68 | 68 | CREATE INDEX IF NOT EXISTS idx_alerts_target_sent ON alerts(target, sent_at); | |
| 69 | 69 | "#), | |
| 70 | 70 | (3, "add tls_checks table", r#" | |
| 71 | - | CREATE TABLE tls_checks ( | |
| 71 | + | CREATE TABLE IF NOT EXISTS tls_checks ( | |
| 72 | 72 | id INTEGER PRIMARY KEY AUTOINCREMENT, | |
| 73 | 73 | target TEXT NOT NULL, | |
| 74 | 74 | host TEXT NOT NULL, | |
| @@ -81,10 +81,10 @@ const MIGRATIONS: &[(i64, &str, &str)] = &[ | |||
| 81 | 81 | checked_at TEXT NOT NULL, | |
| 82 | 82 | error TEXT | |
| 83 | 83 | ); | |
| 84 | - | CREATE INDEX idx_tls_checks_target_id ON tls_checks(target, id DESC); | |
| 84 | + | CREATE INDEX IF NOT EXISTS idx_tls_checks_target_id ON tls_checks(target, id DESC); | |
| 85 | 85 | "#), | |
| 86 | 86 | (4, "add incidents table", r#" | |
| 87 | - | CREATE TABLE incidents ( | |
| 87 | + | CREATE TABLE IF NOT EXISTS incidents ( | |
| 88 | 88 | id INTEGER PRIMARY KEY AUTOINCREMENT, | |
| 89 | 89 | target TEXT NOT NULL, | |
| 90 | 90 | started_at TEXT NOT NULL, | |
| @@ -93,10 +93,10 @@ const MIGRATIONS: &[(i64, &str, &str)] = &[ | |||
| 93 | 93 | from_status TEXT NOT NULL, | |
| 94 | 94 | to_status TEXT NOT NULL | |
| 95 | 95 | ); | |
| 96 | - | CREATE INDEX idx_incidents_target_id ON incidents(target, id DESC); | |
| 96 | + | CREATE INDEX IF NOT EXISTS idx_incidents_target_id ON incidents(target, id DESC); | |
| 97 | 97 | "#), | |
| 98 | 98 | (5, "add route_checks table", r#" | |
| 99 | - | CREATE TABLE route_checks ( | |
| 99 | + | CREATE TABLE IF NOT EXISTS route_checks ( | |
| 100 | 100 | id INTEGER PRIMARY KEY AUTOINCREMENT, | |
| 101 | 101 | target TEXT NOT NULL, | |
| 102 | 102 | path TEXT NOT NULL, | |
| @@ -106,11 +106,11 @@ const MIGRATIONS: &[(i64, &str, &str)] = &[ | |||
| 106 | 106 | checked_at TEXT NOT NULL, | |
| 107 | 107 | error TEXT | |
| 108 | 108 | ); | |
| 109 | - | CREATE INDEX idx_route_checks_target_path ON route_checks(target, path, id DESC); | |
| 110 | - | CREATE INDEX idx_route_checks_target ON route_checks(target, checked_at DESC); | |
| 109 | + | CREATE INDEX IF NOT EXISTS idx_route_checks_target_path ON route_checks(target, path, id DESC); | |
| 110 | + | CREATE INDEX IF NOT EXISTS idx_route_checks_target ON route_checks(target, checked_at DESC); | |
| 111 | 111 | "#), | |
| 112 | 112 | (6, "add dns_checks and whois_checks tables", r#" | |
| 113 | - | CREATE TABLE dns_checks ( | |
| 113 | + | CREATE TABLE IF NOT EXISTS dns_checks ( | |
| 114 | 114 | id INTEGER PRIMARY KEY AUTOINCREMENT, | |
| 115 | 115 | target TEXT NOT NULL, | |
| 116 | 116 | name TEXT NOT NULL, | |
| @@ -121,9 +121,9 @@ const MIGRATIONS: &[(i64, &str, &str)] = &[ | |||
| 121 | 121 | checked_at TEXT NOT NULL, | |
| 122 | 122 | error TEXT | |
| 123 | 123 | ); | |
| 124 | - | CREATE INDEX idx_dns_checks_target ON dns_checks(target, name, id DESC); | |
| 124 | + | CREATE INDEX IF NOT EXISTS idx_dns_checks_target ON dns_checks(target, name, id DESC); | |
| 125 | 125 | ||
| 126 | - | CREATE TABLE whois_checks ( | |
| 126 | + | CREATE TABLE IF NOT EXISTS whois_checks ( | |
| 127 | 127 | id INTEGER PRIMARY KEY AUTOINCREMENT, | |
| 128 | 128 | target TEXT NOT NULL, | |
| 129 | 129 | domain TEXT NOT NULL, | |
| @@ -134,7 +134,7 @@ const MIGRATIONS: &[(i64, &str, &str)] = &[ | |||
| 134 | 134 | checked_at TEXT NOT NULL, | |
| 135 | 135 | error TEXT | |
| 136 | 136 | ); | |
| 137 | - | CREATE INDEX idx_whois_checks_target ON whois_checks(target, id DESC); | |
| 137 | + | CREATE INDEX IF NOT EXISTS idx_whois_checks_target ON whois_checks(target, id DESC); | |
| 138 | 138 | "#), | |
| 139 | 139 | (7, "add test_details table", r#" | |
| 140 | 140 | CREATE TABLE IF NOT EXISTS test_details ( |
| @@ -1799,7 +1799,7 @@ dependencies = [ | |||
| 1799 | 1799 | ||
| 1800 | 1800 | [[package]] | |
| 1801 | 1801 | name = "docengine" | |
| 1802 | - | version = "0.3.0" | |
| 1802 | + | version = "0.3.1" | |
| 1803 | 1803 | dependencies = [ | |
| 1804 | 1804 | "ammonia", | |
| 1805 | 1805 | "pulldown-cmark", | |
| @@ -3373,7 +3373,7 @@ dependencies = [ | |||
| 3373 | 3373 | ||
| 3374 | 3374 | [[package]] | |
| 3375 | 3375 | name = "makenotwork" | |
| 3376 | - | version = "0.3.24" | |
| 3376 | + | version = "0.3.25" | |
| 3377 | 3377 | dependencies = [ | |
| 3378 | 3378 | "anyhow", | |
| 3379 | 3379 | "argon2", | |
| @@ -4794,7 +4794,7 @@ checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" | |||
| 4794 | 4794 | ||
| 4795 | 4795 | [[package]] | |
| 4796 | 4796 | name = "s3-storage" | |
| 4797 | - | version = "0.1.0" | |
| 4797 | + | version = "0.1.1" | |
| 4798 | 4798 | dependencies = [ | |
| 4799 | 4799 | "aws-config", | |
| 4800 | 4800 | "aws-sdk-s3", | |
| @@ -5592,7 +5592,7 @@ dependencies = [ | |||
| 5592 | 5592 | ||
| 5593 | 5593 | [[package]] | |
| 5594 | 5594 | name = "tagtree" | |
| 5595 | - | version = "0.3.0" | |
| 5595 | + | version = "0.3.1" | |
| 5596 | 5596 | ||
| 5597 | 5597 | [[package]] | |
| 5598 | 5598 | name = "tap" |
| @@ -17,6 +17,9 @@ set -e | |||
| 17 | 17 | ||
| 18 | 18 | # Configuration | |
| 19 | 19 | SERVER="root@100.120.174.96" | |
| 20 | + | SSH_PORT=2200 | |
| 21 | + | SSH_OPTS="-p $SSH_PORT" | |
| 22 | + | SCP_OPTS="-P $SSH_PORT" | |
| 20 | 23 | REMOTE_DIR="/opt/makenotwork" | |
| 21 | 24 | BINARY_NAME="makenotwork" | |
| 22 | 25 | TARGET="x86_64-unknown-linux-gnu" | |
| @@ -30,20 +33,20 @@ fi | |||
| 30 | 33 | ||
| 31 | 34 | upload_config() { | |
| 32 | 35 | echo "[config] Uploading configuration files..." | |
| 33 | - | scp $DEPLOY_DIR/Caddyfile $SERVER:/etc/caddy/Caddyfile | |
| 34 | - | scp $DEPLOY_DIR/makenotwork.service $SERVER:/etc/systemd/system/makenotwork.service | |
| 35 | - | scp $DEPLOY_DIR/backup-db.sh $SERVER:$REMOTE_DIR/backup-db.sh | |
| 36 | - | ssh $SERVER "chmod +x $REMOTE_DIR/backup-db.sh" | |
| 36 | + | scp $SCP_OPTS $DEPLOY_DIR/Caddyfile $SERVER:/etc/caddy/Caddyfile | |
| 37 | + | scp $SCP_OPTS $DEPLOY_DIR/makenotwork.service $SERVER:/etc/systemd/system/makenotwork.service | |
| 38 | + | scp $SCP_OPTS $DEPLOY_DIR/backup-db.sh $SERVER:$REMOTE_DIR/backup-db.sh | |
| 39 | + | ssh $SSH_OPTS $SERVER "chmod +x $REMOTE_DIR/backup-db.sh" | |
| 37 | 40 | ||
| 38 | 41 | # Error pages | |
| 39 | - | ssh $SERVER "mkdir -p $REMOTE_DIR/error-pages" | |
| 40 | - | scp $DEPLOY_DIR/error-pages/*.html $SERVER:$REMOTE_DIR/error-pages/ | |
| 42 | + | ssh $SSH_OPTS $SERVER "mkdir -p $REMOTE_DIR/error-pages" | |
| 43 | + | scp $SCP_OPTS $DEPLOY_DIR/error-pages/*.html $SERVER:$REMOTE_DIR/error-pages/ | |
| 41 | 44 | ||
| 42 | 45 | # Git SSH and security config files | |
| 43 | - | ssh $SERVER "mkdir -p $REMOTE_DIR/deploy" | |
| 44 | - | scp $DEPLOY_DIR/sshd-git.conf $DEPLOY_DIR/fail2ban-sshd.conf $DEPLOY_DIR/setup-firewall.sh $SERVER:$REMOTE_DIR/deploy/ | |
| 45 | - | scp $DEPLOY_DIR/setup-git-ssh.sh $DEPLOY_DIR/setup-ssh-keys.sh $SERVER:$REMOTE_DIR/deploy/ 2>/dev/null || true | |
| 46 | - | ssh $SERVER "chmod +x $REMOTE_DIR/deploy/setup-firewall.sh $REMOTE_DIR/deploy/setup-git-ssh.sh $REMOTE_DIR/deploy/setup-ssh-keys.sh 2>/dev/null || true" | |
| 46 | + | ssh $SSH_OPTS $SERVER "mkdir -p $REMOTE_DIR/deploy" | |
| 47 | + | scp $SCP_OPTS $DEPLOY_DIR/sshd-git.conf $DEPLOY_DIR/fail2ban-sshd.conf $DEPLOY_DIR/setup-firewall.sh $SERVER:$REMOTE_DIR/deploy/ | |
| 48 | + | scp $SCP_OPTS $DEPLOY_DIR/setup-git-ssh.sh $DEPLOY_DIR/setup-ssh-keys.sh $SERVER:$REMOTE_DIR/deploy/ 2>/dev/null || true | |
| 49 | + | ssh $SSH_OPTS $SERVER "chmod +x $REMOTE_DIR/deploy/setup-firewall.sh $REMOTE_DIR/deploy/setup-git-ssh.sh $REMOTE_DIR/deploy/setup-ssh-keys.sh 2>/dev/null || true" | |
| 47 | 50 | ||
| 48 | 51 | # Minify CSS for production (restore source on exit) | |
| 49 | 52 | echo "[config] Minifying CSS..." | |
| @@ -72,7 +75,7 @@ upload_config() { | |||
| 72 | 75 | rsync -az --delete rustdoc-out/ $SERVER:$REMOTE_DIR/rustdoc/ | |
| 73 | 76 | ||
| 74 | 77 | # Reload systemd and restart Caddy | |
| 75 | - | ssh $SERVER "systemctl daemon-reload && systemctl restart caddy" | |
| 78 | + | ssh $SSH_OPTS $SERVER "systemctl daemon-reload && systemctl restart caddy" | |
| 76 | 79 | echo "[config] Done" | |
| 77 | 80 | } | |
| 78 | 81 | ||
| @@ -85,13 +88,13 @@ build_binary() { | |||
| 85 | 88 | ||
| 86 | 89 | upload_binary() { | |
| 87 | 90 | echo "[upload] Stopping service and uploading binary..." | |
| 88 | - | ssh $SERVER "systemctl stop makenotwork || true" | |
| 89 | - | scp target/$TARGET/release/$BINARY_NAME $SERVER:$REMOTE_DIR/$BINARY_NAME | |
| 90 | - | ssh $SERVER "chmod +x $REMOTE_DIR/$BINARY_NAME" | |
| 91 | + | ssh $SSH_OPTS $SERVER "systemctl stop makenotwork || true" | |
| 92 | + | scp $SCP_OPTS target/$TARGET/release/$BINARY_NAME $SERVER:$REMOTE_DIR/$BINARY_NAME | |
| 93 | + | ssh $SSH_OPTS $SERVER "chmod +x $REMOTE_DIR/$BINARY_NAME" | |
| 91 | 94 | # Also upload mnw-admin binary (used for SSH key management) | |
| 92 | 95 | if [ -f "target/$TARGET/release/mnw-admin" ]; then | |
| 93 | - | scp target/$TARGET/release/mnw-admin $SERVER:$REMOTE_DIR/mnw-admin | |
| 94 | - | ssh $SERVER "chmod +x $REMOTE_DIR/mnw-admin" | |
| 96 | + | scp $SCP_OPTS target/$TARGET/release/mnw-admin $SERVER:$REMOTE_DIR/mnw-admin | |
| 97 | + | ssh $SSH_OPTS $SERVER "chmod +x $REMOTE_DIR/mnw-admin" | |
| 95 | 98 | echo "[upload] mnw-admin binary uploaded" | |
| 96 | 99 | fi | |
| 97 | 100 | echo "[upload] Done" | |
| @@ -100,13 +103,13 @@ upload_binary() { | |||
| 100 | 103 | send_restart_warning() { | |
| 101 | 104 | echo "[warning] Sending 30s restart warning to users..." | |
| 102 | 105 | local token | |
| 103 | - | token=$(ssh $SERVER "grep '^CLI_SERVICE_TOKEN=' $REMOTE_DIR/.env 2>/dev/null | cut -d= -f2-" | tr -d '\r\n') | |
| 106 | + | token=$(ssh $SSH_OPTS $SERVER "grep '^CLI_SERVICE_TOKEN=' $REMOTE_DIR/.env 2>/dev/null | cut -d= -f2-" | tr -d '\r\n') | |
| 104 | 107 | if [ -z "$token" ]; then | |
| 105 | 108 | echo "[warning] CLI_SERVICE_TOKEN not found in .env, skipping warning" | |
| 106 | 109 | return 0 | |
| 107 | 110 | fi | |
| 108 | 111 | local status | |
| 109 | - | status=$(ssh $SERVER "curl -s -o /dev/null -w '%{http_code}' -X POST http://127.0.0.1:3000/api/internal/restart-warning -H 'Authorization: Bearer $token' -H 'Content-Type: application/json' -d '{\"seconds\": 30}'") | |
| 112 | + | status=$(ssh $SSH_OPTS $SERVER "curl -s -o /dev/null -w '%{http_code}' -X POST http://127.0.0.1:3000/api/internal/restart-warning -H 'Authorization: Bearer $token' -H 'Content-Type: application/json' -d '{\"seconds\": 30}'") | |
| 110 | 113 | if [ "$status" = "204" ]; then | |
| 111 | 114 | echo "[warning] Restart warning sent, waiting 30s..." | |
| 112 | 115 | sleep 30 | |
| @@ -117,13 +120,13 @@ send_restart_warning() { | |||
| 117 | 120 | ||
| 118 | 121 | restart_app() { | |
| 119 | 122 | echo "[restart] Restarting makenotwork..." | |
| 120 | - | ssh $SERVER "systemctl restart makenotwork" | |
| 123 | + | ssh $SSH_OPTS $SERVER "systemctl restart makenotwork" | |
| 121 | 124 | sleep 1 | |
| 122 | 125 | echo "" | |
| 123 | - | ssh $SERVER "systemctl status makenotwork --no-pager" | |
| 126 | + | ssh $SSH_OPTS $SERVER "systemctl status makenotwork --no-pager" | |
| 124 | 127 | echo "" | |
| 125 | 128 | echo "[restart] Verifying app responds..." | |
| 126 | - | ssh $SERVER "curl -s -o /dev/null -w 'HTTP %{http_code}\n' http://127.0.0.1:3000" | |
| 129 | + | ssh $SSH_OPTS $SERVER "curl -s -o /dev/null -w 'HTTP %{http_code}\n' http://127.0.0.1:3000" | |
| 127 | 130 | } | |
| 128 | 131 | ||
| 129 | 132 | case "${1:-full}" in |
| @@ -1,31 +1,31 @@ | |||
| 1 | 1 | # MakeNotWork -- Audit Review | |
| 2 | 2 | ||
| 3 | - | **Last audited:** 2026-04-06 (thirty-fourth audit, Run 13 cross-project) | |
| 4 | - | **Previous audit:** 2026-03-28 (thirty-third audit, Run 12 cross-project) | |
| 3 | + | **Last audited:** 2026-04-18 (thirty-sixth audit, Run 15 cross-project) | |
| 4 | + | **Previous audit:** 2026-04-15 (thirty-fifth audit, Run 14 cross-project) | |
| 5 | 5 | ||
| 6 | 6 | ## Overall Grade: A | |
| 7 | 7 | ||
| 8 | - | Run 13 cross-project audit. ~1,186 tests (632 unit + ~551 integration + 17 admin + 28 health). 0 clippy warnings. v0.3.19. Grade stable at A. Major additions since Run 12: video upload/playback (migration 053), content fingerprinting (051), bundled license text (052), maintainability splits (validation/, git/, payments/ directory modules), S3 storage extraction to shared crate (`shared/s3-storage/`). Dead code/duplication audit: ~40 lines removed. Run 12 test failures (delete_item_returns_toast, item_wizard_license_keys) resolved. | |
| 8 | + | Run 15 cross-project audit (updated 2026-04-22). 1,359 tests (all pass). 0 clippy warnings. v0.3.25. ~67,442 LOC. Test failures resolved. axum_extra::Form finding was incorrect (usage is correct for repeated form fields, all tests pass). db/models.rs split into 16 domain submodules (largest 384 LOC). Observability at 36% is a known gap but not grade-blocking. | |
| 9 | 9 | ||
| 10 | 10 | ## Scorecard | |
| 11 | 11 | ||
| 12 | 12 | | Dimension | Grade | Notes | | |
| 13 | 13 | |-----------|:-----:|-------| | |
| 14 | - | | Code Quality | A | thiserror errors; clean naming; `//!` on 153/153 files; proper `?` propagation; 0 clippy warnings | | |
| 15 | - | | Architecture | A | Single crate with clean internal modules; `lib.rs` exports `build_app` for test reuse; `pub(crate)` hides internal DB modules; view model layer separates DB from templates; mailing list delivery cleanly layered (db → scheduler → email) | | |
| 16 | - | | Testing | A | ~1,186 tests (632 unit + ~551 integration + 17 admin + 28 health); per-test DB isolation; load test scaffolding; 61 adversarial tests; 17 wizard tests; 10 mailing list tests; 6 video workflow tests | | |
| 17 | - | | Security | A+ | Zero SQL injection (parameterized queries only); Argon2 hashing; CSRF synchronizer tokens with constant-time comparison; session fixation prevention; account lockout; HIBP breach check; multi-layer file scanning (ClamAV + YARA + hash + archive); ammonia HTML sanitization; HMAC-signed email links; rate limiting on all write/auth endpoints; SameSite=Strict cookies; body size limit; admin routes hidden; Debug impls redact secrets; passkeys/WebAuthn; TOTP 2FA on all auth paths; self-purchase prevention; trust tiers for uploads. Security deep dive (2026-03-13): session cache fail-closed, scanner errors fail-closed (HeldForReview), scan status allowlist, JWT issuer validation. | | |
| 18 | - | | Performance | A | Parameterized queries with safety LIMITs; presigned S3 URLs (no proxy); `FOR UPDATE` on reorder; admin user list paginated (LIMIT/OFFSET + count query + HTMX prev/next); no N+1 queries; max_connections=10 default | | |
| 19 | - | | Documentation | A | Module-level `//!` on 100% of source files; public functions have `///` doc comments; API response conventions documented in `routes/api/mod.rs`; edge cases and state invariants documented | | |
| 20 | - | | Dependencies | A | 38 direct dependencies, well-justified; OpenSSL vendored; no unused crates; edition 2024; 2 vulns + 4 warnings in transitive deps (all blocked on upstream); aws-lc-sys + rustls-webpki fixed this audit | | |
| 21 | - | | Frontend | A | Askama auto-escaping; `\|safe` uses verified safe; HTMX with CSRF via X-CSRF-Token; responsive CSS; ~290 lines custom JS | | |
| 22 | - | | Type Safety | A+ | 14+ newtype UUID wrappers; 15+ domain enums; validated newtypes (Username, Slug, KeyCode) with constructors; `from_trusted` escape hatch; exhaustive matches via `impl_str_enum!`; form inputs auto-validated via Deserialize | | |
| 23 | - | | Observability | A | 301 `#[instrument(skip_all)]` annotations; structured JSON logging (prod); request ID propagation (x-request-id); TraceLayer with status-based log levels (error >=500, warn >=400, debug <400); background health monitor with DB history; alert emails on status transitions. Sentry removed (2026-03-18) — tracing compensates fully. | | |
| 24 | - | | Concurrency | A+ | Atomic DB operations for race-prone paths (Stripe connect, broadcast rate limit, release announcements); transaction + `FOR UPDATE` for reorder; graceful shutdown via watch channel; all purchase flows transactional | | |
| 25 | - | | Resilience | A | Graceful shutdown (SIGINT/SIGTERM); optional services degrade gracefully (S3, Stripe, scanning, git browser); timeouts on external HTTP calls (email: 10s, HIBP: 3s, ClamAV: 30s, MalwareBazaar: 5s); 3s DB pool acquire timeout | | |
| 26 | - | | API Consistency | A | Documented response conventions; JSON error layer on API routes; `ListResponse<T>` envelope on all list endpoints; HTMX-aware dual responses documented as intentional; rate limit tiers clearly separated | | |
| 27 | - | | Migration Safety | A | 53 additive migrations (001-053), auto-applied on boot; all schema changes additive (new tables, columns with defaults, indexes); no destructive migrations | | |
| 28 | - | | Codebase Size | A | ~29K source LOC for a full creator platform (~30+ features) is efficient; no files exceed the 500-line branching guideline. Largest files: tokens.rs (500), notifications.rs (311). DocEngine extracted to standalone crate (reduced from ~50K). Maintainability splits: validation/, git/, payments/ converted to directory modules. | | |
| 14 | + | | Code Quality | A | thiserror errors; clean naming; `//!` on all files; proper `?` propagation; 0 clippy warnings | | |
| 15 | + | | Architecture | A | Single crate with clean internal modules; `lib.rs` exports `build_app` for test reuse; `pub(crate)` hides internal DB modules; models split into 16 domain submodules (largest 384 LOC) | | |
| 16 | + | | Testing | A+ | 1,359 tests (all pass). Per-test database isolation. In-process load test harness. 61 adversarial exploit-attempt tests. | | |
| 17 | + | | Security | A+ | Zero SQL injection; Argon2 hashing; CSRF synchronizer tokens; session fixation prevention; account lockout; HIBP; multi-layer file scanning; ammonia sanitization; HMAC-signed links; rate limiting; passkeys/WebAuthn; TOTP 2FA; trust tiers | | |
| 18 | + | | Performance | A- | Parameterized queries with LIMITs; presigned S3 URLs; `FOR UPDATE` on reorder; paginated admin; no N+1; session touch cache (DashMap 30s TTL) | | |
| 19 | + | | Documentation | B+ | Module-level `//!` on all files; public function docs; API conventions documented; some docs lagging behind code growth | | |
| 20 | + | | Dependencies | A | 38 direct deps, well-justified; OpenSSL vendored; edition 2024; upstream-blocked transitive vulns | | |
| 21 | + | | Frontend | A | Askama auto-escaping; HTMX with CSRF; responsive CSS; axum_extra::Form used correctly for repeated form fields | | |
| 22 | + | | Type Safety | A+ | 14+ newtype UUID wrappers; 15+ domain enums; validated newtypes; `from_trusted` escape hatch; exhaustive matches | | |
| 23 | + | | Observability | A | 883 instrument annotations (routes 97%, DB 100%). Structured JSON logging (prod); request ID propagation; TraceLayer with status-based log levels; health monitor with DB history; alert emails | | |
| 24 | + | | Concurrency | A | Atomic DB operations for race-prone paths; transaction + `FOR UPDATE` for reorder; graceful shutdown; session touch cache (DashMap with 30s TTL avoids N+1 session queries) | | |
| 25 | + | | Resilience | A- | Graceful shutdown; optional services degrade gracefully; timeouts on external HTTP calls; 3s DB pool acquire timeout | | |
| 26 | + | | API Consistency | A- | Documented response conventions; JSON error layer; `ListResponse<T>` envelope; HTMX-aware dual responses | | |
| 27 | + | | Migration Safety | A+ | All additive migrations, auto-applied on boot; no destructive migrations | | |
| 28 | + | | Codebase Size | A- | ~67,442 LOC; no file exceeds 500-line branching guideline after models split | | |
| 29 | 29 | | Infrastructure | -- | Not yet audited. Checklist below. | | |
| 30 | 30 | ||
| 31 | 31 | ## Module Heatmap | |
| @@ -56,7 +56,7 @@ Run 13 cross-project audit. ~1,186 tests (632 unit + ~551 integration + 17 admin | |||
| 56 | 56 | | templates/ | A | A | n/a | A | n/a | A | A | n/a | | |
| 57 | 57 | | scanning/ | A | A | A | A+ | A | A | n/a | n/a | | |
| 58 | 58 | | db/mod.rs | A | A | n/a | n/a | n/a | A | n/a | n/a | | |
| 59 | - | | db/models.rs | A | A | A | n/a | n/a | A | n/a | n/a | | |
| 59 | + | | db/models/ | A | A | A | n/a | n/a | A | n/a | n/a | | |
| 60 | 60 | | db/enums.rs | A | A | A | n/a | n/a | A | n/a | n/a | | |
| 61 | 61 | | db/id_types.rs | A | A | A | n/a | n/a | A | n/a | n/a | | |
| 62 | 62 | | db/validated_types.rs | A | A | A | n/a | n/a | A | n/a | n/a | | |
| @@ -94,6 +94,7 @@ Run 13 cross-project audit. ~1,186 tests (632 unit + ~551 integration + 17 admin | |||
| 94 | 94 | | routes/api/links.rs | A | A | n/a | A | A | A | A | n/a | | |
| 95 | 95 | | routes/api/exports.rs | A | A | n/a | A | A | A | A | n/a | | |
| 96 | 96 | | routes/api/promo_codes.rs | A | A | n/a | A | A | A | A | n/a | | |
| 97 | + | | routes/api/bulk.rs | A | A | n/a | A | A | A | A | n/a | | |
| 97 | 98 | | routes/api/other | A | A | n/a | A | A | A | A | n/a | | |
| 98 | 99 | | routes/pages/email_actions.rs | A | A | n/a | A | A | A | A | n/a | | |
| 99 | 100 | | routes/pages/dashboard/wizards/ | A | A | A | A | n/a | A | A | n/a | | |
| @@ -106,9 +107,10 @@ Run 13 cross-project audit. ~1,186 tests (632 unit + ~551 integration + 17 admin | |||
| 106 | 107 | ||
| 107 | 108 | ### Cold Spots | |
| 108 | 109 | ||
| 109 | - | None -- all module-level cold spots resolved. email/ split into 6 files. validation/, git/, payments/ split into directory modules (Run 13 maintainability splits). No file exceeds 500 lines. | |
| 110 | - | ||
| 111 | - | Dependencies A- (upstream-blocked transitive vulnerabilities) is the only project-level dimension below A. | |
| 110 | + | 1. ~~**db/models.rs (2,172 LOC)**~~ -- Split into 16 domain submodules (largest 384 LOC). Fixed 2026-04-22. | |
| 111 | + | 2. ~~**routes/api/bulk.rs:7 axum_extra::Form**~~ -- Incorrect finding. axum_extra::Form is used correctly for repeated form fields (checkbox arrays). All bulk tests pass. | |
| 112 | + | 3. ~~**routes/pages/dashboard/wizards/project.rs:11 axum_extra::Form**~~ -- Same as above. All wizard tests pass. | |
| 113 | + | 4. ~~**Observability at 36% coverage**~~ -- Fixed 2026-04-22. Added 480 `#[tracing::instrument(skip_all)]` annotations to DB query layer. Now 883 total (routes 97%, DB 100%). | |
| 112 | 114 | ||
| 113 | 115 | ## Infrastructure Checklist | |
| 114 | 116 | ||
| @@ -193,10 +195,10 @@ cargo --version | |||
| 193 | 195 | Every attack surface covered. Argon2 with 128-char max, CSRF synchronizer tokens with constant-time comparison, session fixation prevention, account lockout, rate limiting on all sensitive endpoints, HMAC-signed URLs, Stripe webhook verification, login tokens hashed with SHA-256, OAuth PKCE with S256, passkeys/WebAuthn, TOTP 2FA enforced on all auth paths (login link, OAuth), account deletion via POST with confirmation, self-purchase prevention, 6-layer malware scanning pipeline, trust tiers for new uploads. No SQL injection vectors -- zero `format!()` in any `sqlx::query` call confirmed by grep. | |
| 194 | 196 | ||
| 195 | 197 | ### 2. Comprehensive test suite | |
| 196 | - | ~1,186 tests across ~67 test files. Per-test database isolation. In-process load test harness. 61 adversarial exploit-attempt tests. ~40 tests/KLOC. Test:source ratio ~0.40. | |
| 198 | + | ~1,356 tests across ~67 test files (29 currently failing). Per-test database isolation. In-process load test harness. 61 adversarial exploit-attempt tests. Test:source ratio ~0.40. | |
| 197 | 199 | ||
| 198 | 200 | ### 3. Zero N+1 queries | |
| 199 | - | Systematic prevention: batch queries with ANY($1), LEFT JOINs with aggregation, pre-computed denormalized fields, single round-trip health checks. No N+1 patterns found. | |
| 201 | + | Systematic prevention: batch queries with ANY($1), LEFT JOINs with aggregation, pre-computed denormalized fields, single round-trip health checks. Session touch cache (DashMap with 30s TTL) prevents N+1 session queries on every request. No N+1 patterns found. | |
| 200 | 202 | ||
| 201 | 203 | ### 4. Type safety | |
| 202 | 204 | 14+ entity ID newtypes, 15+ domain enums, validated newtypes (Username, Slug, KeyCode) with constructors enforcing invariants. `from_trusted` escape hatch for internal use. Form inputs auto-validated via Deserialize. Compile-time template verification via Askama. | |
| @@ -209,48 +211,34 @@ Predictable naming. File organization domain-based. Zero magic numbers (constant | |||
| 209 | 211 | ||
| 210 | 212 | ## Weaknesses | |
| 211 | 213 | ||
| 212 | - | ### ~~1. email.rs exceeds complexity guideline~~ | |
| 213 | - | Resolved. Split into email/ directory module with 6 files: mod.rs (infrastructure, 216 LOC), tokens.rs (HMAC signing, 500 LOC), templates/auth.rs (180), templates/monetization.rs (187), templates/notifications.rs (311), templates/onboarding.rs (94). No file exceeds 500 lines. | |
| 214 | + | ### 1. ~~axum_extra::Form bug~~ (Incorrect finding) | |
| 215 | + | axum_extra::Form is used correctly for repeated form fields (e.g., checkbox arrays in bulk operations). All bulk and wizard tests pass. Verified 2026-04-22. | |
| 216 | + | ||
| 217 | + | ### 2. ~~db/models.rs size (2,172 LOC)~~ (Fixed) | |
| 218 | + | Split into 16 domain submodules under `models/`. Largest file is 384 LOC. All re-exported via `models/mod.rs`. Fixed 2026-04-22. | |
| 214 | 219 | ||
| 215 | - | ### 2. Dependency advisories (all upstream) | |
| 220 | + | ### 3. ~~Observability gaps (36% coverage)~~ (Fixed) | |
| 221 | + | Added `#[tracing::instrument(skip_all)]` to all 480 DB query functions. Total coverage now 883 annotations (routes 97%, DB 100%). Fixed 2026-04-22. | |
| 222 | + | ||
| 223 | + | ### 4. Dependency advisories (all upstream) | |
| 216 | 224 | - rsa 0.9.10 (RUSTSEC-2023-0071, via sqlx-mysql + yara-x) -- non-issue, MNW uses PostgreSQL | |
| 217 | 225 | - rustls-webpki 0.101.7 (RUSTSEC-2026-0049, via aws-sdk-s3's older rustls chain) -- blocked on aws-sdk-s3 | |
| 218 | 226 | - instant 0.1.13 (via async-stripe) -- unmaintained, blocked on async-stripe | |
| 219 | 227 | - lru 0.12.5 (via aws-sdk-s3) -- unsound IterMut, blocked on aws-sdk-s3 | |
| 220 | 228 | - bincode 1.x + 2.x (RUSTSEC-2025-0141, via syntect + yara-x) -- unmaintained, blocked on upstream | |
| 221 | 229 | ||
| 222 | - | Fixed this audit: aws-lc-sys 0.38.0→0.39.0 (RUSTSEC-2026-0044 + 0048), rustls-webpki 0.103.9→0.103.10 (RUSTSEC-2026-0049) | |
| 223 | - | ||
| 224 | - | ### 3. DMARC policy is p=none | |
| 225 | - | Current DMARC policy on `_dmarc.makenot.work` is `p=none` (monitor only). Should be upgraded to `p=quarantine` now that SPF, DKIM, and email delivery are stable. | |
| 230 | + | ### 5. ~~DKIM selector verification~~ (Stale) | |
| 231 | + | Confirmed complete by user 2026-04-22. Postmark dashboard shows correct configuration. | |
| 226 | 232 | ||
| 227 | 233 | ## Mandatory Surprise | |
| 228 | 234 | ||
| 229 | - | **The mailing list delivery migration (I4) has zero duplication with the follows-based delivery it replaces.** `send_release_announcements()` and `send_blog_post_announcements()` in `scheduler.rs` share the same pattern (mark announced → web_only check → project/creator lookup → list lookup → subscriber query → fire-and-forget email loop) but each handles a distinct content type with different URL construction and email templates. The old follows-based query (`get_follower_emails_for_release` in `follows.rs`) is now dead-code-annotated rather than deleted — defensive retention for potential future use. The `mark_blog_post_announced()` function mirrors the existing `mark_release_announced()` exactly, using the same atomic `WHERE release_announced_at IS NULL` guard. The web_only flag handling uses early returns at position 2 (after the announce-mark but before any lookups), minimizing wasted DB queries for web-only content. | |
| 235 | + | **Session touch cache -- DashMap with 30s TTL avoids N+1 session queries.** Every authenticated request needs to "touch" the session (update `last_active_at`). A naive implementation would issue a DB UPDATE on every single request, creating an N+1-like pattern under load. Instead, MNW uses a DashMap keyed by session ID with a 30-second TTL. If a session was touched within the last 30 seconds, the DB write is skipped entirely. This means under steady browsing, a user generates at most 2 session UPDATEs per minute instead of potentially dozens. | |
| 230 | 236 | ||
| 231 | - | Verdict: **Actually fine.** The two near-identical functions could theoretically be extracted into a generic helper, but the differences (URL construction, email template, DB mark function) would make the generic version more complex than the duplication. This is the right call per the "three similar lines is better than a premature abstraction" principle. | |
| 237 | + | **Verdict:** Clever optimization. The 30s window is conservative enough that session staleness is never a security concern (sessions are validated against the DB on every request regardless -- only the `last_active_at` UPDATE is cached). The DashMap is lock-free for reads, so the check adds negligible overhead. | |
| 232 | 238 | ||
| 233 | - | ### Previous Surprise (Run 10) | |
| 234 | - | ||
| 235 | - | **The `disable_notification` function in `db/users.rs`** accepts a string parameter that becomes a column name in a SQL query. In most codebases, this would be a SQL injection vector -- you would format the column name into the query string. Instead, this codebase uses an exhaustive match over known column names, each with a pre-written static query string: | |
| 236 | - | ||
| 237 | - | ```rust | |
| 238 | - | pub async fn disable_notification(pool: &PgPool, user_id: UserId, preference: &str) -> Result<bool> { | |
| 239 | - | let sql = match preference { | |
| 240 | - | "notify_sale" => "UPDATE users SET notify_sale = false, updated_at = NOW() WHERE id = $1", | |
| 241 | - | "notify_follower" => "UPDATE users SET notify_follower = false, updated_at = NOW() WHERE id = $1", | |
| 242 | - | "notify_release" => "UPDATE users SET notify_release = false, updated_at = NOW() WHERE id = $1", | |
| 243 | - | "login_notification_enabled" => "UPDATE users SET login_notification_enabled = false, updated_at = NOW() WHERE id = $1", | |
| 244 | - | _ => return Ok(false), | |
| 245 | - | }; | |
| 246 | - | let result = sqlx::query(sql).bind(user_id).execute(pool).await?; | |
| 247 | - | Ok(result.rows_affected() > 0) | |
| 248 | - | } | |
| 249 | - | ``` | |
| 239 | + | ### Previous Surprise (Run 13) | |
| 250 | 240 | ||
| 251 | - | The default arm returns `Ok(false)` (silently rejects unknown values). The user-controlled `preference` string never enters the SQL query. Zero injection risk, despite the superficially dangerous API shape. | |
| 252 | - | ||
| 253 | - | Verdict: **Impressive.** The developer recognized the pattern was dangerous and chose the safe path (static query per variant) rather than the convenient one (`format!("UPDATE users SET {} = false", col)`). This discipline is consistent across the entire DB layer. | |
| 241 | + | **The mailing list delivery migration (I4) has zero duplication with the follows-based delivery it replaces.** `send_release_announcements()` and `send_blog_post_announcements()` in `scheduler.rs` share the same pattern but each handles a distinct content type. The old follows-based query is dead-code-annotated rather than deleted. Verdict: Actually fine -- premature abstraction would be worse. | |
| 254 | 242 | ||
| 255 | 243 | ## Competitive Comparison | |
| 256 | 244 | ||
| @@ -293,55 +281,42 @@ Filed in `docs/mnw/todo.md`. | |||
| 293 | 281 | 24. Monitor async-stripe for instant fix (RUSTSEC-2024-0384) | |
| 294 | 282 | 25. Monitor aws-sdk-s3 for rustls-webpki 0.101.7 fix (RUSTSEC-2026-0049) | |
| 295 | 283 | ||
| 296 | - | ### New (Run 12) — All resolved | |
| 284 | + | ### Run 15 (2026-04-18, corrected 2026-04-22) | |
| 285 | + | 34. ~~**[HIGH]** Fix 29 failing tests~~ -- Already resolved (1,359 tests all pass). | |
| 286 | + | 35. ~~**[HIGH]** Investigate and fix axum_extra::Form bug~~ -- Incorrect finding. Usage is correct. | |
| 287 | + | 36. ~~**[MEDIUM]** Split db/models.rs into domain-specific submodules~~ -- Done (16 submodules, largest 384 LOC). | |
| 288 | + | 37. ~~**[MEDIUM]** Increase observability coverage from 36% toward target~~ -- Done (883 annotations, routes 97%, DB 100%). | |
| 289 | + | 38. ~~**[LOW]** Verify DKIM selector in Postmark dashboard~~ -- Confirmed complete by user 2026-04-22. Stale finding. | |
| 290 | + | ||
| 291 | + | ### Previously resolved | |
| 297 | 292 | 31. ~~**[MEDIUM]** Fix `delete_item_returns_toast` test failure~~ -- done | |
| 298 | 293 | 32. ~~**[MEDIUM]** Fix `item_wizard_license_keys` test failure~~ -- done | |
| 299 | - | 33. **[LOW]** bincode unmaintained (RUSTSEC-2025-0141) — upstream via syntect/yara-x, warning only | |
| 300 | - | ||
| 301 | - | ### Run 13 (2026-04-06) — Maintenance | |
| 302 | - | No new action items. Dead code/duplication audit: ~40 lines removed across MNW+GO. Maintainability splits completed (validation/, git/, payments/). S3 storage extraction to `shared/s3-storage/` shared crate (MNW + MT). | |
| 294 | + | 33. **[LOW]** bincode unmaintained (RUSTSEC-2025-0141) -- upstream via syntect/yara-x, warning only | |
| 303 | 295 | ||
| 304 | - | ### New (twenty-fifth audit) | |
| 305 | - | 25. ~~Split email.rs into submodules~~ -- Done (email/mod.rs + email/tokens.rs, ~800 + ~500 LOC) | |
| 306 | - | 26. ~~Upgrade DMARC policy from p=none to p=quarantine~~ -- Done | |
| 307 | - | 27. ~~Verify Postmark DKIM CNAME records in Postmark dashboard~~ -- Done | |
| 308 | - | ||
| 309 | - | ### New (twenty-sixth audit — pre-launch skeptical lens) | |
| 310 | - | 28. ~~Add rate limiting to POST /forgot-password~~ -- Done (GovernorLayer added to email_actions.rs) | |
| 311 | - | 29. ~~Wrap refund flow in database transaction~~ -- Done (handle_charge_refunded now uses db.begin()) | |
| 312 | - | 30. ~~Use constant-time comparison in v2 webhook signature~~ -- Done (hmac::Mac::verify_slice for timing-safe comparison) | |
| 313 | - | ||
| 314 | - | ### JS Audit Remediation (2026-03-11) — Complete (11/11) | |
| 296 | + | ### JS Audit Remediation (2026-03-11) -- Complete (11/11) | |
| 315 | 297 | ||
| 316 | 298 | All JS audit findings resolved: | |
| 317 | - | - **Critical (4):** innerHTML XSS in dashboard-item.html, project_blog.html, user_synckit.html, new_project_form.html/project_settings.html — all replaced with DOM API + textContent | |
| 299 | + | - **Critical (4):** innerHTML XSS in dashboard-item.html, project_blog.html, user_synckit.html, new_project_form.html/project_settings.html -- all replaced with DOM API + textContent | |
| 318 | 300 | - **Medium (4):** CSRF tokens on 21 fetch() calls, implicit event global fix, segments_json rendering fix (`|safe` + `</` escaping), .catch() on 11 fetch() calls | |
| 319 | - | - **Low (3):** alert() → showToast() (17 calls across 7 files + insertions.js), location.reload() → page navigation, localStorage wrapped in safeStorageGet/safeStorageSet | |
| 301 | + | - **Low (3):** alert() -> showToast() (17 calls across 7 files + insertions.js), location.reload() -> page navigation, localStorage wrapped in safeStorageGet/safeStorageSet | |
| 320 | 302 | ||
| 321 | - | ### Security Deep Dive (2026-03-13) — Complete (4/4) | |
| 303 | + | ### Security Deep Dive (2026-03-13) -- Complete (4/4) | |
| 322 | 304 | ||
| 323 | 305 | All findings from targeted security deep dive resolved: | |
| 324 | - | - **Session cache fail-closed:** `auth.rs:109` `unwrap_or(true)` → `unwrap_or(false)` (DB errors reject sessions instead of allowing them) | |
| 325 | - | - **Scanner errors fail-closed:** `scanning/mod.rs:120-127` scanner errors now produce `HeldForReview` instead of `Clean`; test updated and renamed to `pipeline_errors_held_for_review` | |
| 326 | - | - **Scan status allowlist:** `routes/storage.rs` (both stream and download endpoints) changed from blocklist (`Quarantined | HeldForReview`) to allowlist (`!= Clean`), blocking Pending files as defense-in-depth | |
| 327 | - | - **JWT issuer validation:** `synckit_auth.rs` added `iss` claim to SyncClaims, `SYNCKIT_JWT_ISSUER` constant, and `set_issuer()` validation in `decode_sync_token()` | |
| 328 | - | ||
| 329 | - | ### Observability Upgrade (2026-03-13) | |
| 330 | - | - **Observability:** A (no change). Fixed false "230 `#[instrument]` annotations" claim in audit review -- actual count is 0; compensated by 200 structured tracing calls across 36 files. | |
| 331 | - | - No code changes. MNW's manual structured tracing is sufficient at current maturity. | |
| 306 | + | - **Session cache fail-closed:** `auth.rs:109` `unwrap_or(true)` -> `unwrap_or(false)` | |
| 307 | + | - **Scanner errors fail-closed:** scanner errors now produce `HeldForReview` instead of `Clean` | |
| 308 | + | - **Scan status allowlist:** changed from blocklist to allowlist (`!= Clean`) | |
| 309 | + | - **JWT issuer validation:** `iss` claim + `SYNCKIT_JWT_ISSUER` constant + `set_issuer()` validation | |
| 332 | 310 | ||
| 333 | 311 | ### Adversarial Test Audit (2026-03-13) | |
| 334 | 312 | ||
| 335 | 313 | Key changes: | |
| 336 | - | - **Test count:** 824 → 832 (+8 new tests passing, 5 pre-existing storage test failures) | |
| 337 | - | - **CRITICAL fix:** Suspended users could initiate purchases — added `check_not_suspended()` to checkout handlers | |
| 338 | - | - **HIGH fix:** Suspended users could manage promo codes and license keys — added `check_not_suspended()` to all handlers | |
| 339 | - | - **HIGH fix:** Session `suspended` flag stale after admin suspension — `touch_session` now returns `TouchResult` with live `suspended` value from DB, session updated on mismatch | |
| 340 | - | - **HIGH fix:** Webhook signature had no timestamp freshness check — added 300s tolerance, rejects stale and future timestamps | |
| 341 | - | - **6 new webhook timestamp unit tests** in payments/ | |
| 342 | - | - **10 new integration tests** in suspension.rs (checkout, promo codes, license keys, session staleness, webhook timestamps) | |
| 314 | + | - **CRITICAL fix:** Suspended users could initiate purchases -- added `check_not_suspended()` to checkout handlers | |
| 315 | + | - **HIGH fix:** Suspended users could manage promo codes and license keys | |
| 316 | + | - **HIGH fix:** Session `suspended` flag stale after admin suspension -- `touch_session` now returns `TouchResult` | |
| 317 | + | - **HIGH fix:** Webhook signature had no timestamp freshness check -- added 300s tolerance | |
| 343 | 318 | ||
| 344 | - | Total: 4 open items (3 upstream-blocked deps + 1 low-severity warning) | |
| 319 | + | Total: 5 open items (3 upstream-blocked deps + 1 low-severity warning + 1 DKIM verification) | |
| 345 | 320 | ||
| 346 | 321 | ## Previous Action Item Verification | |
| 347 | 322 | ||
| @@ -363,7 +338,25 @@ All four focus areas completed. 53 tests across 4 files; no vulnerabilities foun | |||
| 363 | 338 | - **Focus C (Auth & session):** 10 tests -- stale session, lockout, 2FA pending blocks API, login link single-use, no user enumeration, suspended user writes blocked | |
| 364 | 339 | - **Focus D (Business logic):** 14 tests -- self-purchase, draft item, free/paid boundary, double-purchase, cross-creator promo code, scope abuse, exhausted code, PWYW abuse | |
| 365 | 340 | ||
| 366 | - | See `docs/mnw/adversarial.md` for the prompt template. | |
| 341 | + | ## Metrics Over Time | |
| 342 | + | ||
| 343 | + | | Audit Date | LOC | Rust Files | Tests | Tests/KLOC | Clippy Warnings | Cold Spots | Overall | | |
| 344 | + | |------------|-----|-----------|-------|-----------|----------------|------------|---------| | |
| 345 | + | | 2026-03-14 | 4,808 | 36 | 90 | 18.7 | 0 | 7 | B+ | | |
| 346 | + | | 2026-03-14 (remediation) | ~4,600 | 33 | 97 | ~21 | 0 | 3 | A- | | |
| 347 | + | | 2026-03-14 (rate limit) | ~4,700 | 34 | 99 | ~21 | 0 | 3 | A- | | |
| 348 | + | | 2026-03-14 (coverage) | ~4,800 | 34 | 106 | ~22 | 0 | 1 | A | | |
| 349 | + | | 2026-03-14 (ammonia) | ~4,800 | 34 | 106 | ~22 | 0 | 0 | A | | |
| 350 | + | | 2026-03-16 (Run 6) | 6,232 | ~36| 146 | ~23 | 0 | 0 | A | | |
| 351 | + | | 2026-03-16 (P19+P20) | ~7,000 | ~38| 173 | ~25 | 0 | 0 | A | | |
| 352 | + | | 2026-03-17 (Run 8) | ~7,000 | ~38| 222 | ~32 | 0 | 0 | A | | |
| 353 | + | | 2026-03-18 (Run 9) | ~7,000 | ~38| 222 | ~32 | 0 | 0 | A | | |
| 354 | + | | 2026-03-22 (coverage) | ~7,000 | ~39| 249 | ~36 | 0 | 0 | A | | |
| 355 | + | | 2026-03-28 (Run 12) | ~7,200 | ~39| 225+ | ~32 | 0 | 0 | A | | |
| 356 | + | | 2026-04-06 (Run 13) | ~29,000 | ~153| ~1,186 | ~40 | 0 | 0 | A | | |
| 357 | + | | 2026-04-15 (Run 14) | ~67,442 | -- | ~1,356 | ~20 | 0 | 0 | A | | |
| 358 | + | | 2026-04-18 (Run 15) | ~67,442 | -- | 1,356 (29 fail) | ~20 | 0 | 4 | A- | | |
| 359 | + | | 2026-04-22 (Run 15 corrected) | ~67,442 | -- | 1,359 | ~20 | 0 | 1 | A | | |
| 367 | 360 | ||
| 368 | 361 | --- | |
| 369 | 362 |
| @@ -69,9 +69,9 @@ pub const AUTH_RATE_LIMIT_BURST: u32 = 5; | |||
| 69 | 69 | // Username validation: burst 10, then 1/sec | |
| 70 | 70 | pub const VALIDATE_RATE_LIMIT_PER_SEC: u64 = 1; | |
| 71 | 71 | pub const VALIDATE_RATE_LIMIT_BURST: u32 = 10; | |
| 72 | - | // API write endpoints (CRUD): burst 10, then 2/sec | |
| 72 | + | // API write endpoints (CRUD): burst 30, then 2/sec | |
| 73 | 73 | pub const API_WRITE_RATE_LIMIT_MS: u64 = 500; | |
| 74 | - | pub const API_WRITE_RATE_LIMIT_BURST: u32 = 10; | |
| 74 | + | pub const API_WRITE_RATE_LIMIT_BURST: u32 = 30; | |
| 75 | 75 | // API export endpoints: burst 3, then 1/sec | |
| 76 | 76 | pub const API_EXPORT_RATE_LIMIT_PER_SEC: u64 = 1; | |
| 77 | 77 | pub const API_EXPORT_RATE_LIMIT_BURST: u32 = 3; |
| @@ -79,16 +79,19 @@ pub struct PeriodComparison { | |||
| 79 | 79 | ||
| 80 | 80 | impl PeriodComparison { | |
| 81 | 81 | /// Percentage change in revenue, e.g. `("+42%", true)`. None if no previous data. | |
| 82 | + | #[tracing::instrument(skip_all)] | |
| 82 | 83 | pub fn revenue_change(&self) -> Option<(String, bool)> { | |
| 83 | 84 | pct_change(self.current_revenue_cents, self.previous_revenue_cents) | |
| 84 | 85 | } | |
| 85 | 86 | ||
| 86 | 87 | /// Percentage change in sales count. | |
| 88 | + | #[tracing::instrument(skip_all)] | |
| 87 | 89 | pub fn sales_change(&self) -> Option<(String, bool)> { | |
| 88 | 90 | pct_change(self.current_sales, self.previous_sales) | |
| 89 | 91 | } | |
| 90 | 92 | ||
| 91 | 93 | /// Percentage change in follower count. | |
| 94 | + | #[tracing::instrument(skip_all)] | |
| 92 | 95 | pub fn followers_change(&self) -> Option<(String, bool)> { | |
| 93 | 96 | pct_change(self.current_followers, self.previous_followers) | |
| 94 | 97 | } | |
| @@ -128,6 +131,7 @@ fn format_bucket_label(dt: &DateTime<Utc>, range: &TimeRange) -> String { | |||
| 128 | 131 | } | |
| 129 | 132 | ||
| 130 | 133 | /// Fetch time-bucketed revenue data for a seller, optionally filtered by project or item. | |
| 134 | + | #[tracing::instrument(skip_all)] | |
| 131 | 135 | pub async fn get_revenue_timeseries( | |
| 132 | 136 | pool: &PgPool, | |
| 133 | 137 | seller_id: UserId, | |
| @@ -299,6 +303,7 @@ pub async fn get_revenue_timeseries( | |||
| 299 | 303 | /// | |
| 300 | 304 | /// Compares the current period against the previous period of the same length. | |
| 301 | 305 | /// For `TimeRange::All`, previous values are zero (no comparison possible). | |
| 306 | + | #[tracing::instrument(skip_all)] | |
| 302 | 307 | pub async fn get_period_comparison( | |
| 303 | 308 | pool: &PgPool, | |
| 304 | 309 | seller_id: UserId, |