max / makenotwork

launch-eve audit pass: Ultra Fuzz Runs #8-9 + cross-cutting sweeps server/ Run #8 (earlier today): all 5 axes brought to A-; new src/background.rs bounded mpsc + semaphore queue replaces 22 per-request tokio::spawn sites; cart min_price_cents/chain-break MEDs fixed; item-wizard pricing_model silent fallback fixed; inline-JS copy-link migrated to delegated handler; cart free-claim N+1 closed; commit_rescan helper extends chronic-disease seal to admin paths; new migrations 123-130, 133 (backup-code prefix, completion_effects, scan_status index, sync_apps loopback, dead-letter table, ip index, non-negative duration); 7-wave backlog sweep closed 24/26 carried items. Run #9 (this session): launch-eve deep pass surfaced + fixed - UX-CRITICAL: signup TOCTOU race 23505 -> 500 + form loss (join_wizard.rs catches 23505 with constraint-name routing) - Sec-SERIOUS: delete_all_sessions_for_user non-atomic JWT bump (sessions.rs wrapped in pool.begin() / tx.commit()) - Sec-SERIOUS: 2FA login-email IP spoofable via bare x-forwarded-for (two_factor.rs uses helpers::extract_client_ip) - Pay-SERIOUS: webhook dual-failure 503 short-circuited Stripe retry (webhook/mod.rs calls unmark_event_processed before 503) Deferred with rationale in docs/audit_review.md + todo.md: 1 SERIOUS (subscription webhook ordering), 3 HIGH (dead-letter unused, reqwest per-request x5, unbounded cleanup spawn), 7 MED, 8 LOW. §1.1 public surface: OG/Twitter meta in base.html, static/manifest.json, error.html contact link, sitemap.rs with in-memory cache. info@ email pin across 8 files. doc-fuzz/exorcise/nitpick/security-review passes complete. sando/ daemon build + main updates, deploy systemd unit + config example, post-receive hook, BOM doc edits. Launchplan §1.5 A- bar holds across all 5 axes.

Author: Max Johnson <me@maxj.phd> · 2026-06-01 00:37 UTC

Commit: 3dc8dca7a120c483dd2fcef8c9f34ccad19847a6

Parent: eee96a7

155 files changed, +3743 insertions, -1100 deletions

M sando/daemon/src/build.rs +8

			@@ -38,6 +38,8 @@ pub async fn run(
38	38		let version = read_pkg_version(&server_dir.join("Cargo.toml")).await
39	39		.with_context(\|\| format!("reading version from {}/Cargo.toml", server_dir.display()))?;
40	40
	41	+	tracing::info!(sha = %sha, version = %version, dir = %server_dir.display(), "cargo build --release start");
	42	+	let started = std::time::Instant::now();
41	43		let out = Command::new("cargo")
42	44		.arg("build")
43	45		.arg("--release")
			@@ -45,6 +47,12 @@ pub async fn run(
45	47		.output()
46	48		.await
47	49		.context("spawning cargo build")?;
	50	+	let elapsed_s = started.elapsed().as_secs();
	51	+	if !out.status.success() {
	52	+	tracing::error!(sha = %sha, version = %version, elapsed_s, "cargo build --release failed");
	53	+	} else {
	54	+	tracing::info!(sha = %sha, version = %version, elapsed_s, "cargo build --release ok");
	55	+	}
48	56		anyhow::ensure!(
49	57		out.status.success(),
50	58		"cargo build --release failed:\n{}",

M sando/daemon/src/main.rs +7 -1

			@@ -20,9 +20,15 @@ mod topology;
20	20		#[tokio::main]
21	21		async fn main() -> Result<()> {
22	22		tracing_subscriber::fmt()
	23	+	// stdout is block-buffered under systemd (no TTY) so events never
	24	+	// reach journald until the buffer fills or the process exits. stderr
	25	+	// is line-buffered, which is what we want for a long-running service.
	26	+	.with_writer(std::io::stderr)
23	27		.with_env_filter(
24	28		tracing_subscriber::EnvFilter::try_from_default_env()
25		-	.unwrap_or_else(\|_\| "sando_daemon=info,tower_http=info".into()),
	29	+	// bin target name is `sandod`, NOT the package name `sando-daemon` —
	30	+	// `module_path!()` uses the binary's crate name, so events come from `sandod::*`.
	31	+	.unwrap_or_else(\|_\| "sandod=info,tower_http=info".into()),
26	32		)
27	33		.init();
28	34

A sando/deploy/sando-daemon.toml.example +9

		@@ -0,0 +1,9 @@
1	+	# Sando daemon config (production).
2	+	# Install at /etc/sando/sando-daemon.toml on the Sando host.
3	+
4	+	listen = "100.103.89.95:7766" # pop-os tailnet IP; bind tailnet-only, not 0.0.0.0
5	+	db_path = "/srv/sando/state/sando.db"
6	+	topology_path = "/etc/sando/sando.toml"
7	+	workdir = "/srv/sando/work"
8	+	release_root = "/srv/sando/releases"
9	+	scratch_db_url = "postgres:///sando_scratch?host=/var/run/postgresql"

A sando/deploy/sandod.service +55

		@@ -0,0 +1,55 @@
1	+	# Sando daemon systemd service
2	+	# Place at /etc/systemd/system/sandod.service on the Sando host (pop-os).
3	+	#
4	+	# Commands:
5	+	# sudo systemctl daemon-reload
6	+	# sudo systemctl enable sandod
7	+	# sudo systemctl start sandod
8	+	# sudo systemctl status sandod
9	+	# journalctl -u sandod -f
10	+
11	+	[Unit]
12	+	Description=Sando deploy controller
13	+	Documentation=https://github.com/maxjmath/MNW
14	+	After=network.target postgresql.service
15	+	Wants=postgresql.service
16	+
17	+	[Service]
18	+	Type=simple
19	+	User=sando
20	+	Group=sando
21	+	WorkingDirectory=/srv/sando
22	+	ExecStart=/usr/local/bin/sandod
23	+	Restart=on-failure
24	+	RestartSec=5
25	+
26	+	Environment=SANDO_CONFIG=/etc/sando/sando-daemon.toml
27	+	Environment=PATH=/srv/sando/.cargo/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
28	+	Environment=HOME=/srv/sando
29	+	EnvironmentFile=-/etc/sando/sando.env
30	+
31	+	# Security hardening. Sando needs ssh outbound, git over fs, postgres over
32	+	# unix socket, and read/write on its own state dirs.
33	+	NoNewPrivileges=true
34	+	ProtectSystem=strict
35	+	ProtectHome=true
36	+	PrivateTmp=true
37	+	ReadWritePaths=/srv/sando
38	+	RestrictAddressFamilies=AF_UNIX AF_INET AF_INET6
39	+	RestrictNamespaces=true
40	+	RestrictRealtime=true
41	+	RestrictSUIDSGID=true
42	+	LockPersonality=true
43	+	ProtectKernelTunables=true
44	+	ProtectKernelModules=true
45	+	ProtectControlGroups=true
46	+	SystemCallArchitectures=native
47	+
48	+	LimitNOFILE=65535
49	+
50	+	StandardOutput=journal
51	+	StandardError=journal
52	+	SyslogIdentifier=sandod
53	+
54	+	[Install]
55	+	WantedBy=multi-user.target

M sando/hooks/post-receive +10

			@@ -8,6 +8,16 @@
8	8
9	9		set -euo pipefail
10	10
	11	+	# Pick up SANDO_DAEMON / SANDO_BRANCH from the daemon's env file when present
	12	+	# (the same file systemd's EnvironmentFile= points at). Lets the deployed hook
	13	+	# reach a non-loopback listen address without changing the hook source.
	14	+	if [[ -r /etc/sando/sando.env ]]; then
	15	+	set -a
	16	+	# shellcheck disable=SC1091
	17	+	source /etc/sando/sando.env
	18	+	set +a
	19	+	fi
	20	+
11	21		DAEMON_URL="${SANDO_DAEMON:-http://127.0.0.1:7766}"
12	22		DEPLOY_BRANCH="${SANDO_BRANCH:-main}"
13	23

M sando/plans/mm-hardware-bom.md +1 -1

			@@ -1,6 +1,6 @@
1	1		# MakeMachine Hardware BOM
2	2
3		-	Settled 2026-05-23. Top-of-line host platform; GPUs are fungible and live on the EveryCycle GPU thread (see `~/hardware/everycycle/docs/roadmap.md`).
	3	+	Settled 2026-05-23. Top-of-line host platform; GPUs are fungible and live on the EveryCycle GPU thread (see `~/Code/everycycle/docs/roadmap.md`).
4	4
5	5		The substrate is built once and kept stable; GPU experimentation happens above it without revisiting motherboard, CPU, or RAM.
6	6

M sando/sando.toml +9 -5

			@@ -4,9 +4,13 @@
4	4		# unlock promotion to the next tier, the nodes it ships to, and the canary
5	5		# policy for shipping within the tier.
6	6		#
7		-	# Day-one wiring: MM (local) -> A (testnot.work) -> B (prod-1). C is declared
8		-	# but not provisioned; adding the second prod node later is a config edit
9		-	# (set provisioned = true, fill in [[tier.node]]).
	7	+	# Day-one wiring: host (pop-os, local) -> A (testnot.work) -> B (prod-1). C is
	8	+	# declared but not provisioned; adding the second prod node later is a config
	9	+	# edit (set provisioned = true, fill in [[tier.node]]).
	10	+	#
	11	+	# Note: the host tier is named "mm" for legacy reasons (string identifier baked
	12	+	# into the schema + code). It refers to whatever machine sandod runs on —
	13	+	# currently pop-os, not a MakeMachine. Rename is a follow-up cleanup.
10	14
11	15		[repo]
12	16		bare_path = "/srv/sando/mnw.git"
			@@ -18,7 +22,7 @@ branch = "main"
18	22		source = "rsync://astra/var/backups/mnw/latest.sql.gz"
19	23		local_path = "/srv/sando/backups/latest.sql.gz"
20	24
21		-	# ---- MM: local pre-staging gate ----
	25	+	# ---- host: pop-os local pre-staging gate ----
22	26		[[tier]]
23	27		name = "mm"
24	28		provisioned = true
			@@ -28,7 +32,7 @@ gates = [
28	32		{ kind = "migration_dry_run" },
29	33		{ kind = "boot_smoke" },
30	34		]
31		-	# MM is the daemon's own host; no remote node row.
	35	+	# Host is the daemon's own machine (pop-os); no remote node row.
32	36
33	37		# ---- A: testnot.work staging ----
34	38		[[tier]]

M sando/todo.md +25 -33

			@@ -4,7 +4,9 @@ Open work only. Completed items move to `todo_done.md` (sibling file) when one e
4	4
5	5		Format rule: every actionable line is a `- [ ]` checkbox. Headings group phases and themes; do not put status updates in them.
6	6
7		-	Roadmap target: replace `server/deploy/deploy.sh` and astra-hosted `server/deploy/run-ci.sh` with Sando running on the MakeMachine, gating Hetzner prod through testnot.work.
	7	+	Roadmap target: replace `server/deploy/deploy.sh` and astra-hosted `server/deploy/run-ci.sh` with Sando running on pop-os, gating Hetzner prod through testnot.work.
	8	+
	9	+	Host decision: Sando runs on pop-os (x86_64 Ubuntu-derived, systemd). Architecturally closest to Hetzner prod, no cross-compile, no init-system split. MakeMachine and EveryCycle are now a separate project — not Sando's concern.
8	10
9	11		Phases are ordered for execution. Phase 0 must finish before Phase 1 is meaningful. Phases 5+ are post-cutover hardening.
10	12
			@@ -13,11 +15,11 @@ Phases are ordered for execution. Phase 0 must finish before Phase 1 is meaningf
13	15		Read these to orient before working on Sando:
14	16
15	17		- `README.md` — quickstart, API surface, v0 limitations
16		-	- `sando.toml` — current topology (MM → A → B; C declared, not provisioned)
	18	+	- `sando.toml` — current topology (host → A → B; C declared, not provisioned)
17	19		- `daemon/src/main.rs` — startup sequence (config → topology → migrate → sync → bare-repo bootstrap → serve)
18	20		- `daemon/src/routes.rs` — `/state`, `/promote`, `/rollback`, `/rebuild`, `/backup/fetch`, `/events`
19	21		- `daemon/src/gates.rs` — gate runners; the load-bearing logic
20		-	- `daemon/src/build.rs` — `build_and_run_mm` is the MM-tier pipeline
	22	+	- `daemon/src/build.rs` — host-tier build pipeline
21	23		- `daemon/src/deploy.rs` — `deploy_local`; remote SSH stub
22	24		- `daemon/migrations/001_init.sql` — schema (tiers/nodes as rows)
23	25		- `server/deploy/deploy.sh` — current cross-compile + push-to-Hetzner script (what we are replacing)
			@@ -26,33 +28,23 @@ Read these to orient before working on Sando:
26	28
27	29		---
28	30
29		-	## Phase 0 — MakeMachine bootstrap
30		-
31		-	Hardware and base provisioning. None of the remote-deploy work below matters until MM exists.
32		-
33		-	Platform decision: MM runs Mountaineer. MM is the first real Mountaineer deployment and Sando is its first real sysop helper (principle 14). Hetzner prod stays on its current distro for now; the Mountaineer-for-prod question is deferred at least a year. If MM-on-Mountaineer ever blocks an MNW deploy for more than a day, fall back to Ubuntu on MM — capture the trigger in `plans/mm-platform-fallback.md` before flipping the install.
	31	+	## Phase 0 — pop-os bootstrap
34	32
35		-	- [ ] Purchase MakeMachine hardware per `plans/mm-hardware-bom.md` (Threadripper Pro 7975WX + WRX90D8-2L/2T + 512 GB ECC RDIMM + 2× 4 TB Gen5 NVMe; ~$10.5K including A1 GPU). Dual-use as Sando host + EveryCycle dev box — see `~/hardware/everycycle/docs/roadmap.md` for the EveryCycle side.
36		-	- [ ] Install Mountaineer (ZFS root, s6+s6-rc init, nushell, podman). Use the latest Dull Edge build available, or hand-roll from `side_projects/mountaineer/` if no release has shipped yet.
37		-	- [ ] Write `plans/mm-platform-fallback.md`: explicit trigger conditions for re-imaging MM with Ubuntu, plus the swap-in procedure (which env files, which binaries, which directories to preserve).
38		-	- [ ] Join MM to tailnet; allocate a stable hostname and record in `_meta/infra_tailnet.md`.
39		-	- [ ] Provision `sando` system user; lock down the home dir; set up scoped SSH keys for outbound deploys.
40		-	- [ ] Install scratch Postgres locally on MM (via apk); create the `sando_scratch` role + DB used by `migration_dry_run`.
41		-	- [ ] Write Sando's s6-rc service definition (`sandod` long-run service, dependency on tailscale and postgres, restart on failure, env from `/etc/sando/sando.env`). Contribute upstream to Alpine if the definition turns out general enough — see Mountaineer principle on giving back.
42		-	- [ ] Install `sandod` binary at `/usr/local/bin/sandod`; bring up the s6 service.
43		-	- [ ] Write the production `sando.toml`; bare repo path under `/srv/sando/mnw.git`; A node `testnot.work`; B node Hetzner prod. Use `node.init = "systemd"` for the Hetzner nodes (see Phase 1).
44		-	- [ ] Verify MNW server builds reproducibly on Mountaineer (musl libc vs glibc — sqlx/tokio/axum should be fine but confirm before relying on it). Capture any musl-specific surprises in `plans/mm-build-notes.md`.
	33	+	- [x] Provision `sando` system user on pop-os; lock down home dir; generate SSH keypair at `/srv/sando/.ssh/id_ed25519` for outbound deploys.
	34	+	- [x] Install scratch Postgres locally on pop-os; create `sando_scratch` role + DB used by `migration_dry_run`. (Owner of own DB; non-superuser.)
	35	+	- [x] Write systemd unit for `sandod` (long-run service, restart on failure, env from `/etc/sando/sando.env`). Installed at `/etc/systemd/system/sandod.service`.
	36	+	- [x] Write the production `sando.toml`; bare repo path under `/srv/sando/mnw.git`. Installed at `/etc/sando/sando.toml`; daemon config at `/etc/sando/sando-daemon.toml`.
	37	+	- [x] Install `sandod` binary at `/usr/local/bin/sandod`; enable + start the service. Live on `100.103.89.95:7766`; bare repo auto-bootstrapped at `/srv/sando/mnw.git`.
	38	+	- [ ] Verify MNW server builds reproducibly on pop-os.
	39	+	- [ ] Register sando pubkey with Hetzner prod (`deploy@alpha-west-1`) and testnot.work once that node exists. Pubkey: `ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIEK+vhpr1V8VnsEemN9x6tAA2S05kmv/mQ3eVgSXSkJ8 sando@pop-os`.
45	40
46	41		## Phase 1 — Remote deploy
47	42
48		-	The MVP only deploys to `ssh_target=local`. Production needs real SSH/rsync, and the init-system split (MM on s6, Hetzner on systemd) needs a backend abstraction from day one.
	43	+	The MVP only deploys to `ssh_target=local`. Production needs real SSH/rsync.
49	44
50		-	- [ ] Add `node.init` field to `sando.toml`: `"systemd" \| "s6" \| "local"`. Default `"systemd"` for backwards-compat. Every node declares its init explicitly so a future Hetzner-on-Mountaineer move is a TOML edit, not a Sando code change.
51		-	- [ ] Refactor `deploy.rs` around an `InitBackend` trait with `reload_or_restart(unit_name) -> Result<()>` and `unit_path(release_root, version) -> PathBuf`. Two impls: `Systemd` (shells `systemctl reload-or-restart`) and `S6` (shells `s6-svc -r` against the service dir). `Local` impl is a no-op restart for dev.
52		-	- [ ] Implement `deploy::deploy_node` remote path: rsync the staged binary to `<ssh_target>:<release_root>/releases/<version>/server`, then `ssh <ssh_target>` runs `ln -sfn releases/<version> current` plus the init-backend-appropriate reload.
53		-	- [ ] Settle service-name convention. Current MNW server systemd unit is `makenotwork.service`; on s6 it would be `/etc/s6-rc/sv/mnw-server/`. Capture both names + the migration plan in `plans/service-names.md` before changing anything live.
54		-	- [ ] Add `node.service_name` field to `sando.toml` (default derives from tier+role) so the convention is explicit per-node and backend-agnostic.
55		-	- [ ] Bootstrap script for adding a fresh node: creates `<release_root>`, installs the init-backend-appropriate service definition pointing at `<release_root>/current/server`, adds the sando SSH key to `authorized_keys`. Idempotent. One script per backend, or one script that branches on init kind.
	45	+	- [ ] Implement `deploy::deploy_node` remote path: rsync staged binary to `<ssh_target>:<release_root>/releases/<version>/server`, then `ssh <ssh_target>` runs `ln -sfn releases/<version> current` plus `systemctl reload-or-restart <service>`.
	46	+	- [ ] Add `node.service_name` to `sando.toml` (default `makenotwork.service`).
	47	+	- [ ] Bootstrap script for adding a fresh node: creates `<release_root>`, installs the systemd unit pointing at `<release_root>/current/server`, adds the sando SSH key to `authorized_keys`. Idempotent.
56	48		- [ ] Garbage-collect old releases on the remote: keep last N (configurable, default 5) per node. Run at end of each successful deploy.
57	49		- [ ] Handle `rsync` failure mid-deploy: leave the previous `current` symlink intact; mark `deploys.outcome = 'failed'`; do not advance `tier_state`.
58	50
			@@ -62,7 +54,7 @@ The MVP only deploys to `ssh_target=local`. Production needs real SSH/rsync, and
62	54
63	55		- [ ] Confirm astra's offsite replica (per `sync-backup-offsite.sh`) writes a deterministic latest-link path Sando can rsync from. If not, add one.
64	56		- [ ] Wire the production `sando.toml` `backup.source` to the astra rsync URL.
65		-	- [ ] Schedule a daily `POST /backup/fetch` (cron or systemd timer on MM) so a fresh backup is always within 24h of any promote attempt.
	57	+	- [ ] Schedule a daily `POST /backup/fetch` (systemd timer on pop-os) so a fresh backup is always within 24h of any promote attempt.
66	58		- [ ] First end-to-end `migration_dry_run` against a real prod backup; confirm it catches the 2026-05-22 incident class (drop+recreate column migration sequence).
67	59		- [ ] Document the failure modes: what does the operator see in `/state` when the dry-run fails? Capture in `plans/migration-dryrun-failures.md`.
68	60		- [ ] Decide retention on `backups` table — prune rows older than N days so SQLite doesn't grow forever.
			@@ -77,7 +69,6 @@ Sando currently only ships the binary. `deploy.sh` does more. Inventory each pie
77	69		- [ ] Error pages — static HTML in `server/deploy/error-pages/`. Either bake into the binary (preferred — versions with code) or ship as a `releases/<version>/error-pages/` sibling. Capture decision.
78	70		- [ ] Security configs — `sshd-git.conf`, `fail2ban-sshd.conf`, `setup-firewall.sh`. Move to node-bootstrap.
79	71		- [ ] Restart warning — `deploy.sh send_restart_warning` posts a banner before restart. Decide whether Sando emits this and through what surface (probably the existing in-app banner mechanism).
80		-	- [ ] Cross-compile from macOS — `deploy.sh` builds on the dev laptop via `cargo-zigbuild`. Sando builds natively on MM (x86_64 Linux). Verify the resulting binaries are byte-identical or at least behavior-equivalent across one full sprint before retiring `deploy.sh`.
81	72		- [ ] Prod migrations — today, who runs `sqlx migrate run` against prod? `deploy.sh` doesn't (verify). Sando should run prod migrations as part of `POST /promote/{tier}` for the prod tiers, OR there should be an explicit `POST /migrate/{tier}` operator action. Decide.
82	73
83	74		## Phase 4 — Cutover
			@@ -99,15 +90,15 @@ The TUI polls. The MVP requires you to hand-insert a row for `manual_confirm`. B
99	90		- [ ] TUI: actions pane. `p` for promote (prompts for version + tier), `R` for rollback, `b` for backup fetch, `c` for manual_confirm.
100	91		- [ ] `POST /confirm/{tier}` endpoint that inserts a `gate_runs` row with `passed=1, gate_kind='manual_confirm'` for the current pending version. Replaces the hand-SQL workaround.
101	92		- [ ] TUI live log pane that follows the most recent build / gate run; backed by `WS /events`.
102		-	- [ ] `POST /promote` body should accept `version` as optional; default to the current MM version when target is A, predecessor's current when target is B+. Reduces ceremony.
	93	+	- [ ] `POST /promote` body should accept `version` as optional; default to the current host version when target is A, predecessor's current when target is B+. Reduces ceremony.
103	94
104	95		## Phase 6 — Monitoring + alerting
105	96
106		-	- [ ] Wire MM's `/metrics` endpoint into the existing MNW Prometheus scrape config; record where the scrape config lives in `_meta/` or wherever monitoring already runs.
	97	+	- [ ] Wire pop-os `/metrics` endpoint into the existing MNW Prometheus scrape config; record where the scrape config lives in `_meta/` or wherever monitoring already runs.
107	98		- [ ] Add counters: `sando_builds_total{outcome}`, `sando_gates_total{tier,kind,outcome}`, `sando_deploys_total{tier,outcome}`, `sando_burn_in_remaining_hours{tier}`.
108	99		- [ ] Alert: build failed. Page on first failure (not flap-protected — builds are infrequent).
109	100		- [ ] Alert: migration_dry_run failed. Page immediately. This is the 2026-05-22-class signal.
110		-	- [ ] Alert: a tier has had `current_version` unchanged for > N days while MM is green. (Operator forgot to promote.)
	101	+	- [ ] Alert: a tier has had `current_version` unchanged for > N days while host is green. (Operator forgot to promote.)
111	102
112	103		## Phase 7 — Multi-node B+C
113	104
			@@ -125,7 +116,7 @@ Move Postgres off the prod app node so B+C become truly interchangeable.
125	116
126	117		- [ ] Provision Postgres-only machine D (modest spec; reliability over performance).
127	118		- [ ] Migrate the prod DB from Hetzner app node to D. Capture procedure in `plans/postgres-d-migration.md`.
128		-	- [ ] Update `server` `DATABASE_URL` everywhere (env files on B+C, scratch URL on MM stays local).
	119	+	- [ ] Update `server` `DATABASE_URL` everywhere (env files on B+C, scratch URL on pop-os stays local).
129	120		- [ ] Replica/HA story stays deferred; D is SPOF for now (per `_meta/preclear/.../decisions.md`).
130	121
131	122		## Phase 9 — Hardening
			@@ -135,10 +126,11 @@ Pick up after cutover is stable.
135	126		- [ ] Tailnet ACL audit: confirm only the laptop can reach `sandod:7766`. Document the ACL.
136	127		- [ ] Decide if v0.2 needs token auth on `sandod` endpoints (revisit assumption from `decisions.md` once there's a real second operator).
137	128		- [ ] Sando self-deploy: Sando builds and deploys itself through its own pipeline. Bootstraps the bootstrap. Closes the chicken-and-egg loop and is satisfying.
138		-	- [ ] Backup-of-Sando-state: nightly SQLite snapshot to astra. The state DB tracks 6 months of deploys; losing it on a MM disk failure would be annoying.
	129	+	- [ ] Backup-of-Sando-state: nightly SQLite snapshot to astra. The state DB tracks 6 months of deploys; losing it on a pop-os disk failure would be annoying.
139	130
140	131		## Notes / non-checkbox
141	132
142		-	- WS `/events` and the operator-UX work in Phase 5 can run in parallel with Phase 1-3 once MM exists. They are sequenced after for review clarity, not because they block anything.
	133	+	- WS `/events` and the operator-UX work in Phase 5 can run in parallel with Phase 1-3 once Phase 0 is done. They are sequenced after for review clarity, not because they block anything.
143	134		- "Hotfix override" and `reset_burn_in` flag are already implemented end-to-end (see `decisions.md`); not on this list because there's nothing left to do until prod uses them.
144	135		- C tier exists in the schema as a `provisioned=false` row from day one — adding C in Phase 7 is a TOML edit, not a migration.
	136	+	- MakeMachine + EveryCycle are now a separate project. The hardware BOM (`plans/mm-hardware-bom.md`) should move there when that project gets its own repo.

M server/docs/architecture.md +3 -3

			@@ -97,7 +97,7 @@ A `json_error_layer` middleware converts HTML error responses to `{"error": "...
97	97
98	98		## Database Layer
99	99
100		-	PostgreSQL via sqlx with compile-time checked queries. 50 migrations (auto-applied on boot). Connection pool: 25 max connections, 3-second acquire timeout.
	100	+	PostgreSQL via sqlx with compile-time checked queries. Numbered migrations in `migrations/`, auto-applied on boot; the directory is the source of truth. Connection pool: 25 max connections, 3-second acquire timeout.
101	101
102	102		### DB Modules
103	103
			@@ -229,7 +229,7 @@ HTTP client (`mt_client`) for the Multithreaded forum instance. HMAC-signed inte
229	229
230	230		- Passwords: Argon2id with random salt per hash
231	231		- Sessions: `tower-sessions` with PostgreSQL-backed store, ID regeneration on login (prevents fixation), 7-day expiry on inactivity
232		-	- Session cache: DashMap caches recent session validations (30-second TTL) to skip per-request DB touch
	232	+	- Session cache: DashMap caches recent session validations (TTL from `constants::SESSION_TOUCH_CACHE_SECS`, currently 5s) to skip per-request DB touch
233	233		- 2FA: TOTP (totp-rs, 6-digit, 30-second step, +/-1 skew) + WebAuthn passkeys (webauthn-rs)
234	234		- Account lockout: 5 failed attempts triggers 15-minute lockout
235	235		- New-device notifications: Email alert on login from unrecognized session
			@@ -359,7 +359,7 @@ Two spawned Tokio tasks, coordinated via `watch::channel` for graceful shutdown:
359	359		\| Scheduler \| `src/scheduler.rs` \|
360	360		\| Shared types \| `src/types/` \|
361	361		\| Askama templates \| `templates/` \|
362		-	\| Migrations \| `migrations/` (001-050) \|
	362	+	\| Migrations \| `migrations/` (numbered, applied in order) \|
363	363		\| Static assets \| `static/` \|
364	364		\| Integration tests \| `tests/` \|
365	365		\| Deploy scripts \| `deploy/` \|

A server/docs/audit_review.md +500

		@@ -0,0 +1,1192 @@
1	+	# Ultra Fuzz Report — MNW Server (Run #9 — launch eve)
2	+
3	+	Run date: 2026-05-31 (evening)
4	+	Run number: 9 (launchplan_final.md §1.5 referred to it as "Run #5" — stale; this is the 9th)
5	+	Trigger: launchplan §1.5 pre-launch pass
6	+
7	+	## Run #9 headline
8	+
9	+	Run #8 closed with "BAR MET — ALL FIVE AXES A-". Run #9 went deeper and surfaced 1 CRITICAL + 4 SERIOUS + several MED/HIGH items the prior 8 runs missed. All four launch-critical items fixed in-session; remaining items deferred with rationale below.
10	+
11	+	\| Axis \| Run #8 \| Run #9 \| Direction \|
12	+	\|------\|--------\|--------\|-----------\|
13	+	\| Payments \| A- \| A- \| flat — 2 new SERIOUS surfaced; 1 fixed (webhook unmark on dual-failure 503), 1 deferred (subscription out-of-order webhook) \|
14	+	\| Storage \| A- \| A- \| flat — 1 new HIGH (migration 129 dead-letter table unused) + 2 MEDs (is_s3_key_live unindexed full-scan, LIKE-suffix false-positive); deferred \|
15	+	\| UX Wiring \| A- → B- → A- \| A- \| dipped on grade-cap for signup TOCTOU CRITICAL, restored after fix \|
16	+	\| Security \| A- \| A- \| flat — 2 new SERIOUS, both fixed (JWT-bump non-atomic, 2FA email IP spoofable) \|
17	+	\| Performance \| A- \| A- \| flat — 2 new HIGH (per-request reqwest::Client::new in 5 hot paths, unbounded spawn in expired-account cleanup); deferred to post-launch \|
18	+
19	+	Net Run #9 (post-fix): 0 CRITICAL · 1 SERIOUS open (Payments subscription ordering — documented deferral) · 3 HIGH open (deferred) · 7 MED open (deferred). Launchplan §1.5 A- bar holds.
20	+
21	+	## Run #9 — CRITICAL fixed in-session
22	+
23	+	### UX-CRITICAL — Signup TOCTOU: race → 500 + form loss → FIXED 2026-05-31
24	+
25	+	`src/routes/pages/public/join_wizard.rs:99-139`. The wizard ran separate `get_user_by_username` / `get_user_by_email` checks before `create_user`. A concurrent signup with the same username or email slipping between SELECT and INSERT raised a 23505 unique violation that bubbled to `AppError::Database` → 500 "Something went wrong" — and the user's entire typed-in form was lost. On a public alpha-launch surge this is the highest-traffic public endpoint; the wrong page to be returning 500s on.
26	+
27	+	Fix landed: `create_user` call site now matches `AppError::Database(sqlx::Error::Database(_))` with code 23505, inspects the constraint name (`users_username_key` / `users_email_key`), and routes through `return_error(..)` with a friendly message — same flow as the explicit pre-check branches. Same shape as the existing 23505 handling in `db/license_keys.rs`, `db/builds.rs`, `routes/api/guest_checkout.rs`.
28	+
29	+	Known follow-up (not blocking): the form-reload still loses typed values on the error swap; `return_error` renders `LoginErrorTemplate` (message-only). Preserving field values would require threading them through the template — file a separate Phase 4 polish item.
30	+
31	+	## Run #9 — SERIOUS fixed in-session
32	+
33	+	### Sec-SERIOUS — `delete_all_sessions_for_user` non-atomic JWT bump → FIXED 2026-05-31
34	+
35	+	`src/db/sessions.rs:247-263`. The function ran `DELETE FROM user_sessions` then a separate `UPDATE users SET jwt_invalidated_at = NOW()` on independent connections. If the UPDATE dropped (pool timeout, conn drop, postgres restart), session cookies were dead but every outstanding SyncKit JWT survived until natural expiry — exactly the leak this function exists to prevent. The in-code comment ("a session row deleted without a JWT bump is harmless, the converse would leak access") inverted reality.
36	+
37	+	Fix landed: both writes wrapped in `pool.begin()` / `tx.commit()`. Comment updated.
38	+
39	+	### Sec-SERIOUS — 2FA login-notification email uses spoofable IP → FIXED 2026-05-31
40	+
41	+	`src/routes/pages/public/two_factor.rs:308-312`. The 2FA-completion path read `x-forwarded-for` raw (first-comma-split) for the new-login email's IP field. Every other login surface (`routes/auth.rs:242`, `auth.rs:486`, `auth.rs:528`) routes through `crate::helpers::extract_client_ip` which prioritizes `CF-Connecting-IP`. An attacker who already captured a password could pre-set `X-Forwarded-For: 1.2.3.4` on the verify-2fa POST so the "new login from <city>" email lied about origin — the exact email users are told to trust for compromise detection.
42	+
43	+	Fix landed: swapped to `crate::helpers::extract_client_ip(&headers)`. One-line change, parity restored.
44	+
45	+	### Pay-SERIOUS — Webhook dual-failure dropped events silently → FIXED 2026-05-31
46	+
47	+	`src/routes/stripe/webhook/mod.rs:73-89`. Dedup row was marked processed before handler dispatch (correct for at-least-once). On `(handler_err, insert_failed_event_err)` dual failure, code returned 503 to trigger Stripe redelivery — but Stripe's redelivery would short-circuit at the dedup check (line 50) and 200 the event without ever processing it. The code's own comment acknowledged the bug; the right tool (`unmark_event_processed`, defined 30 lines away in `db/webhook_events.rs:40`) was never called.
48	+
49	+	Fix landed: call `db::webhook_events::unmark_event_processed(&state.db, &event_id)` before returning 503, with logged-error best-effort if even that fails (same scenario where 503 was already wrong).
50	+
51	+	## Run #9 — DEFERRED with rationale (above A- bar)
52	+
53	+	### Pay-SERIOUS — Subscription webhook out-of-order events resurrect `active`
54	+
55	+	`src/routes/stripe/webhook/subscriptions.rs:90, 116, 140`. Handlers blindly overwrite `subscriptions.status` and `period_end` from the webhook payload. Stripe does NOT guarantee delivery order. Sequence `past_due → active` reordered as `active → past_due → active(stale)` overwrites a legitimate `past_due` with stale `active` — restoring access for a user who hasn't paid.
56	+
57	+	Deferral rationale: worst case is restored access for a few minutes until the next webhook arrives. Fix requires re-extracting Stripe's top-level `created` from `UntypedEvent` (currently dropped) and adding `WHERE last_event_at IS NULL OR last_event_at <= $created` guards on every status/period write across Fan+, creator-tier, and synckit code paths — non-trivial cross-cutting change. Post-launch fix in Phase 4; tracked in todo.md.
58	+
59	+	### Sto-HIGH — Migration 129 dead-letter table never written
60	+
61	+	`migrations/129_pending_s3_deletions_dead_letter.sql` creates `pending_s3_deletions_dead_letter` and documents it as "operator-visible parking lot... require manual triage." `src/scheduler/cleanup.rs:453-457` on `attempts >= 10` only logs `tracing::error!` then removes the row — never inserts into the dead-letter table. Permanently-failing keys have zero operator visibility.
62	+
63	+	Deferral rationale: operational, not runtime. No user impact; only operators lose triage signal. One-INSERT fix; bundle into Phase 4.
64	+
65	+	### Perf-HIGH — Per-request `reqwest::Client::new()` in 5 hot paths
66	+
67	+	`routes/pages/dashboard/main.rs:118`, `routes/pages/public/landing.rs:284`, `routes/api/internal/cli_features.rs:440`, `routes/api/domains.rs:319`, `auth.rs:559`. Each call builds a fresh TCP pool, TLS context, DNS resolver — no keep-alive across requests. `MtClient` in `AppState` already keeps a pooled client; the dashboard bypasses it.
68	+
69	+	Deferral rationale: real but matters at scale. Private alpha launch traffic well below where this becomes a tail-latency contributor. 30-min refactor; bundle into Phase 4 once launch traffic settles.
70	+
71	+	### Perf-HIGH — Unbounded `tokio::spawn` in expired-account cleanup
72	+
73	+	`src/scheduler/cleanup.rs:215-220` (`spawn_expired_account_cleanups`). Daily tick spawns one task per expired account, no governor. `cleanup_sandbox_accounts` (same file, ~100 lines above) correctly caps at `CLEANUP_PARALLELISM=4` via `JoinSet`; the terminated/content-removal variants don't. A backlog of 200 expired accounts fan-outs 200 concurrent S3 prefix listings racing for the 25-conn pool at midnight.
74	+
75	+	Deferral rationale: runs once daily; current expired-account count is small (private alpha). Trivial fix (lift the existing JoinSet pattern); not launch-blocking. Bundle with Phase 4.
76	+
77	+	## Run #9 — MED/LOW deferred (read-only carry-forward, in todo.md)
78	+
79	+	- Pay-MED: `pricing.rs::parse_dollars_to_cents` misinterprets European decimal comma (`1,23` → 12300¢). User-controlled input; fixable in a single regex.
80	+	- Pay-MED: SyncKit app-sub checkout silently defaults `storage_limit_bytes` to 0 if metadata missing.
81	+	- Pay-MED: Guest checkout email falls back to `"unknown@guest"` sentinel; collisions possible.
82	+	- Sto-MED: `is_s3_key_live` runs 7 EXISTS subqueries on unindexed `items.audio_s3_key` / `cover_s3_key` / `video_s3_key` / `versions.s3_key` etc — sequential scans per retry.
83	+	- Sto-MED: `is_s3_key_live` LIKE-suffix pattern `'%' \|\| s3_key` false-positives on neighboring keys (key `abc/file.png` matches `xabc/file.png`) — skips a legitimate delete → S3 object leaks.
84	+	- UX-MED: "Log in" return_to query param in `purchase.html:145` is dead-wired — login handler always redirects `/dashboard`. Lost purchase intent.
85	+	- UX-MED: Admin user filter buttons (`admin-users.html:35-44`) use `class="primary"` / `class="secondary"` instead of `btn-primary` / `btn-secondary` — renders unstyled.
86	+	- UX-LOW: Pagination links in `git/issues.html:72,76` don't URL-encode `search`; `&page=99` in search query corrupts pagination.
87	+	- UX-LOW: 5 sites do `.render().unwrap_or_default()` on Askama templates (blank UI on render failure, no log).
88	+	- UX-LOW: `slugify` in `formatting.rs` produces `"post"` for any non-ASCII title; international creators get opaque URLs.
89	+	- Sec-MINOR: `csrf.rs:176-185` `validate_token_consuming` doesn't consume — name promises stronger property than implementation.
90	+	- Sec-MINOR: `routes/oauth.rs:101-111` `is_localhost_redirect` allows any port on localhost regardless of registered URI.
91	+	- Sec-MINOR: `routes/pages/public/two_factor.rs::pending_2fa_started_at` reads `i64` via session.get; type mismatch silently → None → instantly-expired.
92	+	- Sec-MINOR: `scanning/archive.rs:124` path-traversal check misses lone `..` segment (no trailing separator).
93	+	- Perf-LOW: `scheduler/announcements.rs` linear walk through subscriber list in a single spawned task; no checkpointing.
94	+	- Perf-LOW: `db/page_views.rs` `pending` HashMap has no max-cardinality cap (crawler hitting 100k unique target_ids before tick).
95	+	- Perf-LOW: `build_runner.rs:441` local artifact tmpfile leaks if process crashes between SCP and `remove_file`.
96	+
97	+	## Run #9 — mandatory surprises
98	+
99	+	- Payments: `routes/stripe/webhook/mod.rs:82-89` literally documents the bug it ships ("the dedup row was already marked processed... Stripe won't retry") and then chooses 503 anyway. The fix (`unmark_event_processed`) sat 30 lines away in the same crate, never called. Scar-tissue-comment-without-the-fix is a recognizable pattern across the codebase.
100	+	- Storage: `routes/storage/mod.rs::commit_upload` sealed-helper pattern (Run #7 fix for the chronic disease) is the strongest piece of structural engineering in the repo — turned an enum into a witness type. But the neighbor file `migrations/129_pending_s3_deletions_dead_letter.sql` shows the opposite: migration written with detailed prose explaining the operator's parking lot, and the actual INSERT never wired up. Two adjacent fixes from the same audit-cycle, one structural and load-bearing, one ceremonial and silently broken.
101	+	- UX: `csrf.rs` `PostureMethodRouter` + sealed `CsrfManuallyValidated` witness make registering a mutation route without an explicit posture declaration uncompilable. A+ engineering. The contrast with the signup wizard's TOCTOU-and-500-with-lost-form is jarring — defensive depth on CSRF, none on the front door.
102	+	- Security: `routes/auth.rs:128-130` malformed-email branch skips the DUMMY_HASH timing equalizer that was added explicitly to prevent timing-side-channel user enumeration. ~2 orders of magnitude faster than every other failure path. The equalizer exists; this one path bypasses it.
103	+	- Performance: `db/projects.rs::get_project_ids_for_user` is the only `fetch_all` in `projects.rs` without a `LIMIT`. Its neighbor `get_projects_by_user` caps at 500 with a documented safety comment. Cyber-squatter with 10k projects + account expiry → 10k S3 prefix-deletes in one spawned task. Asymmetric defense within the same module.
104	+
105	+	## Run #9 — stress-tested OK
106	+
107	+	Verified attacks the code survived (high-confidence positives):
108	+
109	+	- Stripe webhook signature replay (HMAC constant-time, multi-secret rotation, timestamp tolerance both directions)
110	+	- Promo code concurrent over-use (single atomic UPDATE with max_uses + expires_at + starts_at)
111	+	- Cart race past pre-check (23505 fallback aborts cleanly without charging)
112	+	- License key prediction (6 wordlist × CSPRNG ≈ 66 bits)
113	+	- Pre-signed URL Content-Length binding (S3 rejects mismatch at protocol level)
114	+	- Storage cap atomicity (`try_replace_storage` single UPDATE)
115	+	- Build claim race (partial unique index + 23505 backstop)
116	+	- Idempotent re-confirms in all 4 upload confirm handlers (reaper-deletes-live-object closed)
117	+	- Session row + JWT atomicity (post-fix verified above)
118	+	- TOTP replay across skew window (matched-step tracked + strict `>` gate)
119	+	- OAuth PKCE downgrade (S256 pinned at authorize + token-exchange)
120	+	- CSRF body bypass via textarea-smuggled token (proper form parser)
121	+	- Git diff/blame XSS (HTML-escaped in attacker-controlled spots)
122	+	- Internal error leakage (tests assert no PG host, no S3 bucket, no sqlx variant leaks)
123	+
124	+	## Run #9 confidence per axis
125	+
126	+	- Payments HIGH (~70% LoC read this pass; Phase 4 backlog visible)
127	+	- Storage HIGH (full module read; cleanup.rs upper half only — MEDIUM there)
128	+	- UX Wiring HIGH for CSRF/error/validation; MEDIUM for wizard step partials, embed routes, dashboard CSV import
129	+	- Security HIGH for auth/CSRF/session; MEDIUM for scanning (YARA rule content unread), API key scoping
130	+	- Performance HIGH for scan worker, scheduler, storage, build_runner; MEDIUM for SyncKit, postmark, import pipeline
131	+
132	+	## Run #9 bug counts
133	+
134	+	\| Severity \| Payments \| Storage \| UX \| Security \| Perf \| Total \|
135	+	\|---\|---\|---\|---\|---\|---\|---\|
136	+	\| CRITICAL \| — \| — \| 1 (FIXED) \| — \| — \| 1 \|
137	+	\| SERIOUS \| 2 (1 FIXED, 1 deferred) \| — \| — \| 2 (FIXED) \| — \| 4 \|
138	+	\| HIGH \| — \| 1 (deferred) \| — \| — \| 2 (deferred) \| 3 \|
139	+	\| MED \| 3 (deferred) \| 2 (deferred) \| 2 (deferred) \| — \| — \| 7 \|
140	+	\| LOW/NOTE \| 2 \| — \| 3 \| 4 \| 3 \| 12 \|
141	+
142	+	## Run #9 delta vs Run #8
143	+
144	+	- 1 CRITICAL surfaced + fixed (signup TOCTOU); class missed by prior 8 runs because no agent explicitly probed the public-signup race window
145	+	- 4 SERIOUS surfaced; 3 fixed in-session, 1 deferred with rationale
146	+	- Run #8 "BAR MET" claim was correct for the surfaces it audited but understated: this pass added explicit attack-vector probing for cross-conn atomicity, IP spoof parity across auth surfaces, and webhook dedup edge paths — none of which were in prior runs' scope
147	+	- All previously closed Run #8 fixes verified intact (commit_upload seal, S1 tx atomicity, background.rs queue, cart MEDs)
148	+
149	+	---
150	+
151	+	# Ultra Fuzz Report — MNW Server (Run #8 — historical)
152	+
153	+	Run date: 2026-05-31
154	+	Run number: 8
155	+
156	+	## Run #8 Headline
157	+
158	+	\| Axis \| Run #5 \| Run #6 \| Run #7 \| Run #8 \| Direction \|
159	+	\|------\|--------\|--------\|--------\|--------\|-----------\|
160	+	\| Payments \| B \| B+ \| A- \| A- \| flat — H2 still deferred; 2 new MEDs surfaced (cart `min_price_cents` bypass, cart-all chain-break on all-free first seller) \|
161	+	\| Storage \| B- \| A- \| B+ \| A- \| ↑ H1 + S1 fixes verified closed; commit_upload seal intact across all 7 confirm handlers; genericization clean at every caller including synckit/blobs.rs \|
162	+	\| UX Wiring \| B \| A- \| A- \| A- \| flat — 1 new MED (item-wizard `pricing_model` silent fallback to "free" — same disease class fixed in project wizard at Run #6, not propagated) \|
163	+	\| Security \| A- \| A- \| A- \| A- \| flat — only diff in scope (username availability fail-closed) is a net improvement; MED backlog identical to Run #5/#6/#7 \|
164	+	\| Performance \| B- \| A- \| A- \| A- \| flat with 1 new SERIOUS — webhook `checkout_helpers.rs` unbounded `tokio::spawn` (send_purchase_emails / mailing_list / tip_email) competes with request handlers for the 25-slot pool under burst \|
165	+
166	+	Net Run #8: 0 CRITICAL · 1 SERIOUS new (Perf webhook spawn) — FIXED 2026-05-31 · 5 new MED — ALL FIXED 2026-05-31 · 1 SERIOUS previously-deferred (Payments H2 `claim_free_project` soft race) — FIXED 2026-05-31.
167	+
168	+	Post-Run #8 status (2026-05-31 end-of-day): 0 CRITICAL · 0 SERIOUS · 0 MED open from any prior run. All five axes A-, all above-MED items closed, all Run #8 MEDs closed, prior-deferred SERIOUS closed. Launchplan §1.5 bar fully cleared.
169	+
170	+	2026-05-31 post-Run-#8 backlog sweep (7 waves): 24 of 26 carried MED/LOW/NOTE items closed across Storage (5), Security (8), Performance (3), UX (2), Payments (2), Auth (4). Two deferred with rationale: `build_runner.rs` serial targets (LOW, builds run rarely, refactor touches denominator) and `scheduler/mod.rs` advisory-lock granularity (multi-replica concern, single-process today). New schema migration `133_items_duration_seconds_nonnegative.sql` pins the negative-duration invariant in the DB. New `commit_rescan` helper extends the chronic-disease commit_upload seal to admin paths. Tests: 1655 / 0.
171	+
172	+	Launchplan §1.5 bar: ALL 5 AXES AT A- — BAR MET. The new Perf SERIOUS is axis-internal and the agent kept Perf at A- (machinery wins outweigh; same shape as previously-closed `record_view` per-request spawn — apply mpsc + drainer pattern). New Payments MEDs and UX MED are launch-quality items worth addressing or documenting before ship; none are A- blockers.
173	+
174	+	## Run #8 — new findings above MED
175	+
176	+	### P-SERIOUS — Webhook hot-path unbounded `tokio::spawn` (Performance) — FIXED 2026-05-31
177	+	`src/routes/stripe/webhook/checkout_helpers.rs:58, 96, 124, 290` + `src/routes/stripe/webhook/checkout.rs:618`. `send_purchase_emails`, `subscribe_buyer_to_mailing_list`, `send_tip_email`, `send_guest_sale_notification`, guest-purchase-confirmation each `tokio::spawn` from the webhook handler. Multi-item cart fires N spawns per webhook; each task acquires 1-2 pool conns + a Postmark call. No JoinSet, no cap. Under burst, hundreds of detached tasks competed with request handlers for the 25-slot pool. Same shape as the Run #4 `record_view` per-request spawn (fixed via mpsc + drainer).
178	+
179	+	Fix landed: new generic `src/background.rs` module — `BackgroundTx` + `spawn_pool()` with bounded mpsc (capacity 1024) + semaphore-bounded concurrent execution (8 workers, well below `DB_POOL_MAX_CONNECTIONS=25`). `state.bg.spawn(name, fut)` is non-blocking; queue overflow logs a warning and drops the task. The `spawn_email!` macro was refactored to use the bg queue (covers 17 callers across auth/admin/follows/library/two_factor/stripe webhook/login flows). The 5 manual webhook `tokio::spawn` sites were also migrated. Per-request email sends from postmark issue replies (×2), guest-claim email, and join-wizard signup (×2) were migrated in the same pass — same disease, same fix.
180	+
181	+	Out of scope for this fix (different bug shapes; defer to Phase 4 polish or own remediation): import pipeline (long-running, needs own bound), MT community creation (single outbound HTTP, minor pool pressure), creator departure notification + status broadcast (broadcast-class — use `broadcast.rs` JoinSet pattern), idempotency-store post-response (trivial DB write), build_runner (already gated by claim flow), scheduler/monitor/scanning/page_views (background workers, not per-request).
182	+
183	+	### Payments MED — Cart `min_price_cents` bypass — FIXED 2026-05-31
184	+	Both cart paths (`process_seller_checkout` and `create_cart_checkout`) now check `pc.min_price_cents` for non-platform Discount codes before applying the discount. Cart skips the ineligible item (others may still qualify) rather than rejecting the whole cart — matches the existing scope-skip pattern.
185	+
186	+	### Payments MED — Cart-all chain-break on all-free first seller — FIXED 2026-05-31
187	+	`process_seller_checkout` signature changed `Result<String>` → `Result<Option<String>>`; all-free path now returns `Ok(None)` instead of `Err(BadRequest)`. New `drain_to_paid` helper loops through the queued sellers until a paid one is reached (returns URL) or queue exhausted (returns `Ok(None)` → library redirect). Both callers (`create_cart_checkout_all` and `checkout_success`) updated to use it.
188	+
189	+	### UX MED — Item wizard `pricing_model` silent fallback — FIXED 2026-05-31
190	+	`save_pricing` now rejects missing pricing_model with `AppError::validation("Select a pricing model")` and rejects unknown values with `format!("Unknown pricing model: {other}")`. Same shape as the project wizard Run #6 fix.
191	+
192	+	### UX MED — Inline-JS template duplication — FIXED 2026-05-31
193	+	Added delegated `data-copy-link` click handler to `static/mnw.js` with proper `.catch()` (falls back to `window.prompt` in non-secure contexts — better than the silent-no-op the inline snippets shipped with). 8 templates migrated from `onclick="navigator.clipboard.writeText(...).then(...)"` to `<a href="..." data-copy-link>Copy link</a>` (audio_player, blog_post, collection, item, project, text_reader, user, video_player). `href` is the real URL so middle-click / no-JS / share menus still work. Cache-bust query bumped to `v=0531`.
194	+
195	+	### Perf MED — Cart free-claim N+1 — FIXED 2026-05-31
196	+	Extended `CartItem` with `enable_license_keys` + `default_max_activations` (both cart queries pull them through). Three free-claim loops (single-seller paid path, discount-zeroed promo path, chain-flow path) drop the per-item `get_item_by_id` and replace per-item `remove_from_cart` DELETE with a single bulk `remove_from_cart_bulk(..., ANY($2))` at the end of each loop. Per-item tx for `claim_free_item` stays (the per-item claim-vs-already-purchased return value is load-bearing for sales-count increment). Roundtrips per free item dropped from ~5-7 to ~3-4; per-loop DELETEs from N to 1.
197	+
198	+	## Run #8 — verified standing (storage fixes from session)
199	+
200	+	- H1 (`uploads.rs::confirm_upload` L295-337) — three-arm match correct. Zero-rows arm rolls back (replace path = `try_replace_storage` swap-back with `i64::MAX` cap; fresh-upload path = `decrement_storage_used`), then `enqueue_s3_orphan(new_key)`, returns BadRequest "Item was modified concurrently." Returns BEFORE `commit_upload` and BEFORE `remove_pending_upload` — pending_uploads row left as reaper second-line defense.
201	+	- S1 (`media.rs::media_confirm` L241-293) — single `state.db.begin()` wraps storage credit + pending_uploads clear + media_files INSERT. S3 IO entirely outside tx. tx drop → Postgres ROLLBACK → all three writes reverted atomically. 23505 detection via typed `AppError::Database(sqlx::Error::Database(...))` pattern works post-rollback. S3 cleanup fires on every tx-failure branch.
202	+	- Genericization — `pending_uploads::remove_pending_upload` and `media_files::create` now `impl PgExecutor<'e>`. All 12 callers (including `synckit/blobs.rs:157`) still compile and execute correctly.
203	+	- Pool pressure delta from S1 tx — neutral-to-better. Prior code grabbed 3 separate conns serially; new code grabs 1 conn for ~3× the duration. Users-row write lock held ~ms. Per-user serialization for sub-second uploads acceptable.
204	+
205	+	## Run #8 — mandatory surprises
206	+
207	+	- Payments: `compute_splits` more careful than its comment promises — remainder-distribution loop constrained by `expected_total = amount * raw_total_pct.min(100) / 100`, so under-100% splits keep the owner's share AND distribute floor-rounding remainders up to bound. Proptest-style invariant tests fully fence it.
208	+	- Storage: `try_increment_storage_on` inside the tx holds a row-level lock on `users` for the duration of the tx. Not a bug (sub-ms hold; cap can't be over-shot via WHERE re-evaluation under READ COMMITTED). But every media confirm now serializes per-user against every other storage write.
209	+	- UX: Copy-link button is a chimera. Nine templates copy the same inline `onclick` that calls `navigator.clipboard.writeText`, mutates `this.textContent` to `"Copied!"` — silently broken in any tab loaded over plain HTTP, in iframes, or with restrictive CSP. No `.catch()` → no fallback, no error.
210	+	- Security: `routes/auth.rs:128-130` malformed-email branch skips DUMMY_HASH timing equalizer. ~2 orders of magnitude faster than every other failure path — distinguishes "you submitted an invalid-email-shaped string" from "valid email, unknown account." Real timing oracle a few lines above the equalizer that was deliberately added to prevent exactly this.
211	+	- Performance: `metrics::idempotency_middleware` does a DB SELECT on EVERY POST/PUT with an `Idempotency-Key` header BEFORE the handler runs. No bloom filter, no negative cache. ~1 extra ms per POST already doing 2-5 DB queries — free 20%+ on POST p50 available by adding an in-memory `seen` set.
212	+
213	+	## Run #8 bug counts
214	+
215	+	\| Severity \| Payments \| Storage \| UX \| Security \| Perf \| Total \|
216	+	\|---\|---\|---\|---\|---\|---\|---\|
217	+	\| CRITICAL \| — \| — \| — \| — \| — \| 0 \|
218	+	\| SERIOUS \| 1 (deferred) \| — \| — \| — \| 1 (new) \| 2 \|
219	+	\| MED \| 2 (new) \| 7 \| 5 \| 8 \| 5 \| 27 \|
220	+	\| LOW/NOTE \| 5 \| 3 \| 4 \| 3 \| 2 \| 17 \|
221	+
222	+	## Run #8 confidence per axis
223	+
224	+	- Payments HIGH (~70% LoC read)
225	+	- Storage HIGH (full)
226	+	- UX HIGH
227	+	- Security HIGH (scoped); MEDIUM for storage-route auth side-effects
228	+	- Performance HIGH
229	+
230	+	## Run #8 delta vs Run #7
231	+
232	+	- Storage B+ → A-. H1 + S1 fixes verified closed. Genericization clean.
233	+	- Payments A- flat. 2 new MEDs (cart `min_price_cents` bypass, cart-all chain-break) surfaced via expanded coverage; H2 deferred unchanged.
234	+	- UX A- flat. 1 new MED (item-wizard `pricing_model` silent fallback) — same disease class as project wizard fix from Run #6, not propagated.
235	+	- Security A- flat. Net improvement (username fail-closed). MED backlog identical.
236	+	- Performance A- flat. 1 new SERIOUS (webhook unbounded spawn) — same shape as Run #4 `record_view` fix. Cart free-flow N+1 (MED) — Run #5 fix covered paid only.
237	+
238	+	---
239	+
240	+	# Ultra Fuzz Report — MNW Server (Run #7 — historical)
241	+
242	+	Run date: 2026-05-31
243	+	Run number: 7 (+ S1 + Storage code-fuzz fixes confirmed in Run #8)
244	+
245	+	## Headline
246	+
247	+	\| Axis \| Run #5 \| Run #6 \| Run #7 \| Direction \|
248	+	\|------\|--------\|--------\|--------\|-----------\|
249	+	\| Payments \| B \| B+ \| A- \| ↑↑ Phase 2 + Run #6 + Run #7 fixes all landed; S1 cart 23505 swallow fixed post-Run #7; H2 claim_free_project soft race deferred \|
250	+	\| Storage \| B- \| A- \| B+ → A- pending Run #8 \| ↑/↓ commit_upload structural fix is excellent; Run #6 idempotency fix introduced HIGH-1 (pending_uploads leak in 4 sites) + HIGH-2 (missing rollback on update_*_url) — both fixed post-Run #7. Storage code-fuzz 2026-05-31 surfaced H1 (confirm_upload silent zero-rows + side-effects-already-fired) and reopened S1 media_confirm tx atomicity — both fixed in same session \|
251	+	\| UX Wiring \| B \| A- \| A- \| ↑ field-aware deletion + parse_dollars_to_cents shared; pricing_model silent fallback HIGH found and fixed post-Run #7 \|
252	+	\| Security \| A- \| A- \| (unchanged) \| flat — no security-touching changes in Runs #6/#7 \|
253	+	\| Performance \| B- \| A- \| (unchanged) \| flat — no perf-touching changes in Runs #6/#7 \|
254	+
255	+	## Post-Run #7 Storage code-fuzz (2026-05-31)
256	+
257	+	Targeted code-fuzz scoped to the Storage axis to verify A- before triggering full Run #8. Two findings above MED, both fixed in-session:
258	+
259	+	- H1 (HIGH) — `routes/storage/uploads.rs::confirm_upload` silent `rows_affected = 0`. Same shape as the just-closed HIGH-2 (`update__url`), one step further along the same handler family. UPDATE at L295 uses ownership-filter `WHERE id = $1 AND project_id IN (SELECT id FROM projects WHERE user_id = $4)`; `rows_affected()` was never checked. If the item was deleted between `get_item_owner` (L156) and the UPDATE, storage credit stayed incremented, `pending_uploads` got cleared a few lines down, and `commit_upload` enqueued a scan job against a ghost target — permanent S3 leak + over-charged counter. Fix:* three-arm match on the UPDATE result; zero-rows case rolls back storage and routes the new S3 key through `enqueue_s3_orphan` so the reaper still cleans it, then returns BadRequest "Item was modified concurrently."
260	+	- S1 (SERIOUS, Run #5 plan #12 reopened) — `routes/storage/media.rs::media_confirm` three-write atomicity. Run #5 called for wrapping `try_increment_storage` → `remove_pending_upload` → `media_files::create` in a transaction; Run #7's in-process compensation only covered in-process errors. Process interruption (panic, OOM kill, container restart) between any two writes still leaked. Fix: all three writes now in a single tx; tx drop rolls back storage + pending_uploads + media_files atomically. Only the S3 object needs explicit cleanup (single `delete_object` after rollback). Supporting DB-layer changes: `creator_tiers::try_increment_storage_on(&mut PgConnection)` tx-friendly variant; `pending_uploads::remove_pending_upload` and `media_files::create` signatures genericized to `impl PgExecutor<'e>` (backwards compatible).
261	+
262	+	Remaining storage MED/LOW (below launchplan §1.5 A- bar; ride into Phase 4 polish or document deferral):
263	+	- MED — `update_project_image_url` / `update_item_cover` ignore `rows_affected()` (same shape as H1; mitigated for current callers because the only follow-on side-effect is `bump_cache_generation`).
264	+	- MED — `downloads.rs:120` `((duration as u64) * 2).max(3600)` with no DB `CHECK (duration_seconds >= 0)`. Negative duration → multi-decade presigned URL. Exploitability requires creator-controlled negative duration; ffprobe doesn't produce them. Cap in code + add CHECK migration.
265	+	- MED — Admin rescan paths (`routes/admin/uploads.rs:347, 390`) call `db::scan_jobs::enqueue` directly, bypassing the `commit_upload` structural seal. Ordering is correct so no live bug; demote `db::scan_jobs::enqueue` to `pub(crate)` and expose `commit_rescan(target, ...)` to close the chronic-disease finding for real.
266	+	- MED — `enqueue_s3_orphan` single-policy doc in `routes/storage/mod.rs:24-30` overstates the discipline; many `s3.delete_object(...).await.ok()` direct calls remain at pre-storage-credit rejection paths. Tighten the doc or migrate the post-storage-credit sites.
267	+	- MED — `is_s3_key_live` doesn't enumerate project image URLs (project cover keys live in a distinct prefix so no current bug; surface is fragile if future code paths queue project image keys).
268	+	- LOW — `scanning/worker.rs:251` inline `UPDATE media_files SET scan_status` instead of `db::scanning::update_media_file_scan_status` helper.
269	+	- LOW — `routes/pages/dashboard/wizards/item/save.rs:95` `update_item_cover_image_url` updates only `cover_image_url` (not s3_key/size); client-side hidden-field abuse can desync.
270	+	- LOW — `db/pending_uploads.rs::remove_pending_upload` deletes by s3_key alone (per-handler prefix validation makes cross-user collision unreachable, but the function signature is broader than it needs to be).
271	+
272	+	Chronic disease status (5th run): The invariant-in-prose / sibling-not-swept pattern that recurred across Runs #2–#6 was structurally addressed in Run #7 via two helpers:
273	+	- `routes/storage/mod.rs::commit_upload(target: CommitTarget, ...)` — sealed `enqueue_scan_for` to module-private; the helper is now the only handler-reachable path for scan enqueue + scan_status flip after a DB write. Bug shapes 1–3 from prior runs are now structurally impossible to introduce in a new sibling.
274	+	- `crate::pricing::parse_dollars_to_cents` + `validate_dollars_f64` — canonical dollar-to-cents conversion; bypassing has historically introduced NaN→$0 and saturating-overflow silent bugs.
275	+
276	+	Net after Run #7 + S1 fix: 0 CRITICAL · 0 HIGH/SERIOUS · 1 SERIOUS deferred (Payments H2 soft race on `claim_free_project`) · a handful of MED/LOW polish items.
277	+
278	+	---
279	+
280	+	# Ultra Fuzz Report — MNW Server (Run #5 — historical)
281	+
282	+	Run date: 2026-05-30
283	+	Run number: 5
284	+
285	+	## Headline
286	+
287	+	\| Axis \| Run #4 \| Run #5 \| Direction \|
288	+	\|------\|--------\|--------\|-----------\|
289	+	\| Payments \| A- \| B \| ↓ (Run #4 plan items closed; 4 new SERIOUS surfaces previously unaudited: NULL item_id refund, splits >100% overflow, tip project authorization, cart unlisted bypass) \|
290	+	\| Storage \| A- \| B- \| ↓ (Run #4 `images.rs` ordering bug closed; same disease reappeared in `uploads.rs` route gate ordering — file-type rejection runs AFTER scan enqueue) \|
291	+	\| UX Wiring \| C+ \| B \| ↑ (Run #4 CSRF patchwork + creator-tier token fixed and structurally enforced; new CRIT: field-aware validation API is dead code at template boundary) \|
292	+	\| Security \| B+ \| A- \| ↑ (Run #4 git-shell validation, lockout email flood, CSRF policy all verified; no new CRIT/HIGH; remaining gaps are operational/MED) \|
293	+	\| Performance \| B \| B- \| ↓ (Run #4 scan_jobs retention + pool permit + broadcast bounding verified; new HIGHs in previously unaudited cart checkout + page-view paths + scheduler integrity scan) \|
294	+
295	+	Net: 3 CRITICAL (vs Run #4: 4), 13 HIGH/SERIOUS (vs Run #4: 10), 11 MED, 9 MINOR/LOW. Two axes regressed because Run #5 reached previously-unaudited territory (Payments tip/cart/refund edges; Performance hot-path request loops) while Run #4 plan items themselves were correctly closed. The Storage regression is a recurrence of the same shape in a sibling handler — the chronic invariant-in-prose disease, fourth consecutive run.
296	+
297	+	## Critical / High Findings (fix before launch)
298	+
299	+	1. [Storage — CRITICAL] `routes/storage/uploads.rs:204-237` — `confirm_upload` calls `enqueue_scan_for(...)` and `update_item_scan_status(... Pending)` BEFORE the match arm rejects `Download`/`Insertion`/`MediaImage`/`MediaVideo` with `BadRequest`. A misrouted-but-valid `item_id` confirms flips that item's scan status to Pending, blocks `stream_url` for every fan, and leaks a scan-job row for an S3 key that's then deleted.
300	+	2. [UX — CRITICAL] `error.rs:216-264` + `templates/error.html` — `AppError::validation_fields(summary, [(field, msg), ...])` is consumed only by unit tests. `ErrorTemplate` has no `fields:` member; no template renders per-field highlights. Every non-HTMX validation failure degrades to the global "Go Home / Go Back" page and wipes submitted form input. Handler authors are misled into thinking their carefully-tagged field errors reach the UI.
301	+	3. [Perf — CRITICAL] `build_runner.rs:175-180` — Partial-failure error message reports `("{}/{} succeeded", artifact_keys.len(), artifact_keys.len() + 1)`. Denominator is always `succeeded + 1`, regardless of how many targets actually ran. Three targets, one succeeded, two failed → reports "1/2" (should be 1/3). Failed-target count is never tracked.
302	+
303	+	### HIGH / SERIOUS
304	+
305	+	4. [Payments — SERIOUS] `db/transactions.rs:699-716` — `refund_transaction_by_payment_intent` returns `Vec<(TransactionId, ItemId)>` (non-Optional). Project-level transactions store `item_id IS NULL` (`routes/stripe/checkout/project.rs:135`). On `charge.refunded` for a project-level purchase, sqlx fails to decode NULL → `ItemId`; webhook handler 5xx's; Stripe retries forever.
306	+	5. [Payments — SERIOUS] `routes/stripe/webhook/checkout_helpers.rs:240-269` — `compute_splits` comment says "Defensive clamp: a misconfigured project_members row could sum past 100%" but the loop only adds remainder pennies and never subtracts. Two members at 60%+60% on $10 each are credited $6 each — $12 of $10 of revenue. Clamp only affects `expected_total`, never the already-computed per-member amounts. Tests cover ≤100% only.
307	+	6. [Payments — SERIOUS] `routes/stripe/checkout/tips.rs:104-106` — `TipForm.project_id` is taken verbatim from the form. The webhook later calls `record_tip_splits(tip.id, tip.project_id, ...)` and credits THAT project's members. An attacker tipping creator A can pass project B's UUID; B's members get split obligations credited against A's tip. Stripe money flows correctly; on-platform `tip_splits` records and any downstream reporting are corrupted.
308	+	7. [Payments — SERIOUS] `db/cart.rs:94-123` + `routes/stripe/checkout/cart.rs` — `item.rs:47-49` enforces "Unlisted items can only be obtained through their bundle" via `if !item.listed`. `toggle_cart_preflight` and `get_cart_items` check `is_public` but NOT `listed`. An attacker who knows an unlisted item's UUID can POST to `/api/cart/{id}/toggle` and check out via the cart flow, fully bypassing the bundle-only gate.
309	+	8. [Payments — SERIOUS] `routes/stripe/webhook/subscriptions.rs:117-121, 67-69, 95-96` — `status_str.parse::<SubscriptionStatus>()` returns BadRequest for any status not in `enums.rs:183-198` (Stripe's `paused` is new). Webhook handler returns Err; scheduler retries forever until status changes.
310	+	9. [Payments — SERIOUS] `payments/webhooks.rs:294-308` — `is_full_refund` returns true when `amount_refunded >= amount` and both are zero (Stripe sometimes emits these for $0 verification charges). Triggers `refund_transaction_by_payment_intent` with default `unknown` intent ID. Test at line 517-525 pins the behavior.
311	+	10. [Storage — HIGH] `routes/storage/versions.rs:159-174` — `version_confirm_upload` enqueues scan and flips `scan_status` to Pending BEFORE the `version.s3_key == req.s3_key` idempotency check at line 172. Duplicate retry of an already-confirmed upload knocks a Clean version back to Pending, breaking downloads.
312	+	11. [Storage — HIGH] `routes/storage/images.rs:179-208` — `project_image_confirm` replace branch is gated on `Ok(Some(old_size))` from `s3.object_size(&old_key)`. On `Err` (S3 hiccup) or `Ok(None)` (URL with no object behind it) it falls into the "no old image" branch, `try_increment_storage` without decrementing. Permanent storage over-count. Also: `update_project_image_url` runs AFTER `enqueue_deletions` of the old key, with no rollback path.
313	+	12. [Storage — HIGH] `routes/storage/media.rs:236-293` — `media_confirm` does three separate writes (`try_increment_storage`, `remove_pending_upload`, `media_files::create`) outside a transaction. Interruption between steps leaves S3 object orphaned with storage credit consumed and no DB row.
314	+	13. [UX — HIGH] `routes/pages/dashboard/wizards/item/save.rs:183-185, 214-227` — `let price_cents = (price_dollars * 100.0).round() as i32; if price_cents > 0 { validate_price_cents(price_cents)?; }`. Guard skips validation for 0 and negative values; value goes through `PriceCents::from_db` (no validation) into `update_item`. Submitting `price=-5` writes `-500` cents. Same pattern on PWYW: no `min <= suggested` check.
315	+	14. [UX — HIGH] `routes/pages/dashboard/wizards/item/save.rs:179-183` + `routes/api/items/bulk.rs:136-139` + `routes/pages/dashboard/wizards/project.rs:264-298` — `price_dollars: f64 = …parse()…unwrap_or(0.0)`. `"NaN".parse::<f64>()` succeeds; `NaN as i32 == 0` (silent Free). `1e20` saturates `i32::MAX`. Bulk path catches via `PriceCents::new` cap; `save.rs` does not — persists raw.
316	+	15. [UX — HIGH] `routes/auth.rs:356-361` — `let is_taken = db::users::get_user_by_username(...).await.map(\|u\| u.is_some()).unwrap_or(false);`. Transient DB error during signup live-check returns "available", misleading the user; subsequent signup races whatever real state the DB is in.
317	+	16. [Perf — HIGH] `routes/stripe/checkout/cart.rs:68-248` — Per cart item: sequential `has_purchased_item`, optional `remove_from_cart`, per-free-item `begin tx → claim_free_item → increment_sales_count → commit`, `get_item_by_id`, second `remove_from_cart`. 20-item cart ≈ 80 sequential roundtrips, ~20 separate transactions, 20 distinct pool acquisitions in series.
318	+	17. [Perf — HIGH] `db/page_views.rs:18-32` — `record_view` spawned per public request, takes a pool connection to UPSERT. With `DB_POOL_MAX_CONNECTIONS = 25`, a viral item link spawns unbounded tasks, eats the pool, times out real request handlers at acquire. No batching, no per-(target,session) debounce.
319	+	18. [Perf — HIGH] `scheduler/integrity.rs:53-73` — `check_sales_count_drift`: `SELECT i.id, i.sales_count, COUNT(t.id) FROM items LEFT JOIN transactions ... GROUP BY i.id HAVING i.sales_count != COUNT(t.id) LIMIT 50`. `HAVING` post-aggregation; Postgres scans every row in `items` and joins every completed transaction in history before filtering. `LIMIT 50` doesn't cap the work. Weekly multi-minute query holding a pool connection.
320	+
321	+	## Scorecard
322	+
323	+	### Axis Summary Grades
324	+
325	+	\| Axis \| Overall \| Cold Spots \| Mandatory Surprise \|
326	+	\|------\|---------\|------------\|--------------------\|
327	+	\| Payments \| B \| `routes/stripe/checkout/cart.rs` (B-), `routes/stripe/checkout/tips.rs` (B-), `db/transactions.rs` (B-), `routes/stripe/webhook/checkout_helpers.rs` (B-), `routes/stripe/webhook/subscriptions.rs` (B) \| `compute_splits` carries a "Defensive clamp" comment that explicitly anticipates the >100% case and then fails to defend against it — only `expected_total` is clamped, the already-computed per-member splits go unchanged. Treat as evidence the defensive-comment culture is itself unreliable; comments and code drift independently. \|
328	+	\| Storage \| B- \| `routes/storage/uploads.rs` (C+), `routes/storage/images.rs` (C+), `routes/storage/versions.rs` (C+), `routes/storage/media.rs` (B-), `db/mod.rs::check_sandbox_cap` (C+) \| `stream_url` (`downloads.rs:119-122`) computes presigned expiry as `((duration as u64) * 2).max(3600)` where `duration: i32` and no DB CHECK ≥ 0 exists on `duration_seconds`. A negative value becomes near-`u64::MAX` expiry — a centuries-long presigned URL. The cast width and missing CHECK are independent latent bugs that compose into a multi-decade credential leak. \|
329	+	\| UX Wiring \| B \| `routes/pages/dashboard/wizards/item/save.rs` (B-), `error.rs` (B-), `routes/pages/public/discover.rs` (B) \| `update_item` takes ~13 positional `Option`s; call sites are unreadable and error-prone. The negative-price bug (HIGH #13) is born from this signature: anyone calling it has no compiler help distinguishing `Some(-500)` (bug) from `Some(500)` (intent). \|
330	+	\| Security \| A- \| `helpers.rs` (B+), `scanning/clamav.rs` (B), `scanning/yara.rs` (B), `rate_limit.rs` (B+) \| The "11 layer" scan pipeline test gives a false sense of coverage. ClamAV is `FailOpen` by explicit policy (`scanning/clamav.rs:19`), YARA silently skips rule files that fail to compile (`scanning/yara.rs:54-67`), and there is no startup assertion that any real AV layer is live. A misconfigured deploy can pass EICAR as Clean while the test suite is green. \|
331	+	\| Performance \| B- \| `routes/stripe/checkout/cart.rs` (C), `scheduler/announcements.rs` (C+), `scheduler/integrity.rs` (C+), `scheduler/cleanup.rs` (B-), `build_runner.rs` (B-), `db/page_views.rs` (C+), `db/pending_s3_deletions.rs` (B) \| The biggest scaling cliff is a 1-line `tokio::spawn` on the page-view path, not anything that "looks expensive". Hot-path response shipped its tail-latency problem to the same pool that serves it. \|
332	+
333	+	## Bug Counts by Severity
334	+
335	+	\| Severity \| Payments \| Storage \| UX \| Security \| Perf \| Total \|
336	+	\|---\|---\|---\|---\|---\|---\|---\|
337	+	\| CRITICAL \| — \| 1 \| 1 \| — \| 1 \| 3 \|
338	+	\| HIGH/SERIOUS \| 5 \| 3 \| 3 \| — \| 3 \| 14 \|
339	+	\| MED \| 2 \| 3 \| 2 \| 4 \| 2 \| 13 \|
340	+	\| MINOR/LOW \| 2 \| 2 \| 2 \| 3 \| 1 \| 10 \|
341	+
342	+	## Cross-Cutting Concerns
343	+
344	+	1. Side-effects-before-validation pattern. Storage (uploads/versions/images route gates run after scan enqueue), Payments (tip `project_id` accepted before authorization, cart `listed` not checked before checkout), UX (price `from_db` after a guard that skips zero/negative). Four files, three axes, same shape: persist first, validate later.
345	+	2. Invariant-in-prose, fourth consecutive run. Run #2→#3 was MaybeUser; Run #3→#4 was scan_status ordering comments-vs-code; Run #4 partial fix landed (`images.rs`) but the same disease moved up a layer to `uploads.rs` (the route-level file-type gate now runs after scan enqueue). The Payments "defensive clamp" comment in `compute_splits` is the same shape on a different organ. No type-level constructive impossibility has yet been applied to any of these.
346	+	3. Optional positional args as bug carriers. `update_item`'s ~13 positional `Option`s let the wizard pass a negative-price `Option<PriceCents::from_db>` past the validator. Same pattern is implicated in the UX field-error finding — `ErrorTemplate`'s struct literal is missing a `fields:` field at every callsite and the compiler doesn't care.
347	+	4. Hot-path pool pressure from fire-and-forget writes. `record_view` per pageview, `tokio::spawn` per cart line, scheduler advisory-lock conn pinned across S3. The 25-connection pool is sized for a quiet box; three independent fan-out patterns can each saturate it.
348	+	5. FailOpen with no liveness assertion. ClamAV FailOpen + YARA optional + no startup gate = a green test suite can coexist with zero real AV coverage. Same shape as the Performance "spawned task accumulates without bound" pattern — both are silent degradations the operator never sees.
349	+
350	+	## Components Successfully Stress-Tested
351	+
352	+	- All Run #4 Phase 1 closures verified standing (CSRF creator-tier token, `images.rs` scan_status ordering structural fix, git-shell validation, lockout `=` predicate, promo dedupe, scanner streaming + pool permit, broadcast bounded fan-out, scan_jobs retention).
353	+	- Stripe HMAC: multi-secret `v1=` rotation now accepts on any match (Run #4 polish landed).
354	+	- Promo `try_increment_use_count` race-free via atomic single-row UPDATE; release path uses detach for no-double-decrement; proptest-covered.
355	+	- License keys: 66-bit entropy, DB UNIQUE, `FOR UPDATE` activation, full recount on revoke (display lag only — finding #M).
356	+	- CSRF posture: `CsrfRouter<S>` newtype prevents a bare `Router::route(path, post(...))` from compiling in mutation-bearing files. Verified.
357	+	- Argon2id parameters + `DUMMY_HASH` timing equalization on user-not-found (login, OAuth, SyncKit).
358	+	- PKCE-S256 pinned at both authorize and token endpoints; OAuth code atomic single-use consume.
359	+	- JWT future-iat rejection + `jwt_invalidated_at` second-equal `<=` semantics; password change bumps `jwt_invalidated_at` via `update_user_password`.
360	+	- SSE shard-guard drop-before-remove; cross-process advisory locks for scheduler ticks.
361	+	- ZIP bomb: decompressed-bytes counted (not claimed); ratio + depth caps; nested magic-byte detection.
362	+	- `try_increment_storage` cap-predicate UPDATE; concurrent uploads cannot both squeeze past cap.
363	+
364	+	## Confidence Per Axis
365	+
366	+	- Payments HIGH — read 22 of 23 listed files end-to-end with targeted attacks per surface; all four SERIOUS reproducible by line-tracing.
367	+	- Storage HIGH — CRITICAL and all three HIGHs mechanically reproducible; mandatory surprise composes two latent bugs via line-by-line read.
368	+	- UX Wiring HIGH — full read of `csrf.rs`, `error.rs`, `markdown.rs`, `formatting.rs`, `validation/mod.rs`; spot-checked 20+ templates for CSRF pattern; CRITICAL field-aware-validation finding cross-checked by grepping `validation_fields_ref` callers.
369	+	- Security MEDIUM — auth/CSRF/OAuth/scanning surfaces walked thoroughly; admin/moderation/reports/ssh_keys API/totp routes only sampled. ClamAV FailOpen is policy not bug; flagged as architectural risk.
370	+	- Performance MEDIUM-HIGH — spot-checked DB call patterns across 15+ files; exhaustive route-level N+1 sweep deferred; stripe/webhook code shows similar `for x in &xs` loops at `checkout.rs:149,167,198,452` that were not deep-audited.
371	+
372	+	## Metrics
373	+
374	+	- Modules audited: ~80
375	+	- Cold spots (≤ B): 18
376	+	- Bugs: 3 CRITICAL, 14 HIGH/SERIOUS, 13 MED, 10 MINOR/LOW
377	+	- Axes at A- or above: 1/5 (Security)
378	+
379	+	## Delta Since Run #4
380	+
381	+	FIXED (Run #4 items not surfaced this run):
382	+	- All 10 Run #4 Phase 1 items verified closed (CSRF creator-tier, `images.rs` ordering, git-shell validation, lockout email flood, cancel_pending CSRF, promo dedupe, scanner streaming + pool permit, scan_jobs retention, broadcast bounding).
383	+	- All 7 Run #4 Phase 2 items verified closed (cart template price math, media reupload race, pending_uploads reaper bump, TOTP step-replay, delete_other_sessions cache eviction, `/login` CSRF, OAuth fetch_optional).
384	+	- All 5 Run #4 Phase 3 items verified closed (claim_pending_build partial index, build status reaper race, `extract_s3_key_from_url` host pinning, TOTP `pending_2fa` tracking row, KNOWN_SYNC_APPS removed entirely).
385	+	- All Phase 4 polish items verified closed.
386	+
387	+	NEW CRITICAL/HIGH in Run #5 (previously unaudited or regressed):
388	+	- Storage: `uploads.rs` route-level file-type gate runs after scan enqueue (CRIT).
389	+	- UX: `validation_fields` plumbing is dead code at template boundary (CRIT).
390	+	- Perf: `build_runner.rs` partial-failure denominator nonsense (CRIT).
391	+	- Payments: NULL `item_id` decode bomb on project-level refunds (SERIOUS).
392	+	- Payments: `compute_splits` over-credits when project_members sum >100% (SERIOUS).
393	+	- Payments: tip `project_id` not validated vs recipient (SERIOUS).
394	+	- Payments: cart bypasses item `listed` gate (SERIOUS).
395	+	- Payments: unknown subscription status retry storm (SERIOUS).
396	+	- Storage: `version_confirm_upload` scan enqueue before idempotency check (HIGH).
397	+	- Storage: `project_image_confirm` mis-accounts on S3 probe failure + no rollback (HIGH).
398	+	- Storage: `media_confirm` non-atomic three-write sequence (HIGH).
399	+	- UX: negative/NaN price acceptance via `PriceCents::from_db` after permissive guard (HIGH).
400	+	- UX: username availability check fails open on DB error (HIGH).
401	+	- Perf: cart checkout 80 sequential roundtrips (HIGH).
402	+	- Perf: `record_view` unbounded spawn per public request (HIGH).
403	+	- Perf: `check_sales_count_drift` full-table aggregate (HIGH).
404	+
405	+	CHRONIC (across Run #3 → Run #4 → Run #5):
406	+	- Invariant-in-prose / policy-not-in-types — FOURTH consecutive run. Run #4 partially fixed the scan_status ordering inside `images.rs` (and the CSRF policy via `CsrfRouter` structurally), but the same disease moved up a layer: in `uploads.rs` the route-level file-type gate now runs after scan enqueue. The constructive-impossibility shape needed: extract a `commit_upload(file_type, ...)` higher-level operation that validates the file_type before doing any scan/credit side effects, then make `enqueue_scan_for` + `update_*_scan_status` `pub(crate)` so handlers cannot call them directly. The Payments `compute_splits` "Defensive clamp" comment + the UX `validation_fields_ref` orphan plumbing are the same disease in different organs.
407	+
408	+	REGRESSED:
409	+	- Payments (A- → B) — four new SERIOUS bugs surfaced in previously-unaudited tip/cart/refund/subscription-status corners. Not a regression in fixed code; a regression in audit coverage.
410	+	- Storage (A- → B-) — invariant-in-prose recurrence (chronic above).
411	+	- Performance (B → B-) — hot-path request loops audited for the first time.
412	+
413	+	---
414	+
415	+	# Plan: Restore Every Axis to A- or Higher (Run #5)
416	+
417	+	Target grades: Payments A · Storage A · UX A- · Security A- · Performance A-.
418	+
419	+	User priority for the launch window: resolve every CRITICAL/SERIOUS/HIGH before re-running. Iterate until audits surface only small new errors.
420	+
421	+	## Phase 1 — CRITICAL (fix today)
422	+
423	+	1. Storage CRIT — `uploads.rs` file-type gate ordering. `routes/storage/uploads.rs:204-237`. Move the match arm that rejects `Download`/`Insertion`/`MediaImage`/`MediaVideo` BEFORE `enqueue_scan_for` and `update_item_scan_status`. Then make `enqueue_scan_for` + `update_*_scan_status` `pub(crate)` and expose a `commit_upload(file_type, item_id, s3_key)` higher-level op that performs validation → credit → row insert → status flip in the correct order. The same constructor must serve `versions.rs` and `images.rs`. This closes the chronic invariant-in-prose finding.
424	+	2. UX CRIT — Field-aware validation reaches the UI. `error.rs:216-264` + `templates/error.html` + `templates/partials/form_errors.html` (new). Either (a) add `fields: Vec<(String, String)>` to `ErrorTemplate` and a `{% for f in fields %}` block in `error.html` + per-input markup; or (b) delete `validation_fields*` API entirely and replace handler callsites with `validation(summary)`. Choose (a) for non-HTMX forms that need to preserve user input; choose (b) only if every existing callsite is HTMX-only and uses OOB swaps for inline errors. Audit all `validation_fields` callers and pick a path.
425	+	3. Perf CRIT — `build_runner.rs` partial-failure denominator. `build_runner.rs:175-180`. Track `failed_count` alongside `artifact_keys`; report `succeeded/(succeeded+failed)`. Add a test that runs 3 targets with 2 failures and asserts "1/3" in the error string.
426	+
427	+	## Phase 2 — SERIOUS / HIGH (fix this weekend)
428	+
429	+	4. Payments SERIOUS — NULL item_id refund decode. `db/transactions.rs:699-716`. Change return to `Vec<(TransactionId, Option<ItemId>)>`; `refund_transaction_by_payment_intent` caller skips `decrement_sales_count`/`revoke_keys_by_transaction` when `item_id is None`. Add a fixture-based test against a project-level transaction.
430	+	5. Payments SERIOUS — `compute_splits` over-credit. `routes/stripe/webhook/checkout_helpers.rs:240-269`. Reject `total_split_pct > 100` at the project_members write site (DB CHECK or validation). Defensively, scale each split proportionally when sum > 100, OR clamp each split against remaining `expected_total` budget in the loop. Add a test at 60%+60%.
431	+	6. Payments SERIOUS — Tip project authorization. `routes/stripe/checkout/tips.rs:104-106`. After accepting `TipForm`, fetch the project and assert `project.user_id == recipient_id`; return 400 otherwise.
432	+	7. Payments SERIOUS — Cart bypasses `listed` gate. `db/cart.rs:94-123` and `get_cart_items`/`get_cart_items_for_seller`. Add `AND i.listed = true` to all three queries. Add a check in the per-seller checkout path. Add a regression test that toggles an unlisted item into the cart and asserts rejection.
433	+	8. Payments SERIOUS — Unknown subscription status. `routes/stripe/webhook/subscriptions.rs:117-121`. Replace `?` with a match: known statuses dispatch; unknown statuses `tracing::warn!` and return `StatusCode::OK` so Stripe stops retrying.
434	+	9. Payments SERIOUS — `is_full_refund` zero-amount. `payments/webhooks.rs:294-308`. Predicate becomes `amount > 0 && amount_refunded >= amount`. Update the test at line 517-525 to invert (zero-amount must NOT be treated as full refund).
435	+	10. Storage HIGH — `versions.rs` enqueue-before-idempotency. `routes/storage/versions.rs:159-174`. Move idempotency `version.s3_key == req.s3_key` check BEFORE `enqueue_scan_for`. Apply the Phase 1 `commit_upload` helper here.
436	+	11. Storage HIGH — `project_image_confirm` probe-failure + no rollback. `routes/storage/images.rs:179-208`. (a) On `Err` or `Ok(None)` from `s3.object_size`, fall back to the row's recorded size (add a `project_image_bytes` column if not present) rather than the "no old image" branch. (b) Move `enqueue_deletions` to AFTER `update_project_image_url` success, or wrap both in a tx with the enqueue inside.
437	+	12. Storage HIGH — `media_confirm` non-atomic three-write. `routes/storage/media.rs:236-293`. Wrap `try_increment_storage` → `remove_pending_upload` → `media_files::create` in a transaction. The storage credit refund must fire on any failure path.
438	+	13. UX HIGH — Negative/NaN prices via `from_db`. `routes/pages/dashboard/wizards/item/save.rs:183-185, 214-227`. Use `PriceCents::new(price_cents)?` unconditionally; drop the `> 0` guard. Add `min <= suggested` check on PWYW.
439	+	14. UX HIGH — f64 price parsing accepts NaN. Same file + `routes/api/items/bulk.rs:136-139` + `routes/pages/dashboard/wizards/project.rs:264-298`. Parse as decimal cents directly (or `Decimal::from_str_exact` from the `rust_decimal` crate already in `Cargo.lock`); reject NaN/Inf; reject negative/saturating values before cast.
440	+	15. UX HIGH — Username live-check fails open. `routes/auth.rs:356-361`. Propagate the DB error or treat it as "unavailable, try again" — never "available" by default.
441	+	16. Perf HIGH — Cart checkout sequential roundtrips. `routes/stripe/checkout/cart.rs:68-248`. Bulk-load `has_purchased_item` once with `WHERE item_id = ANY($1)`. Batch `get_item_by_id` lookups. Claim free items in a single transaction with batched inserts. Aim for ≤ 5 roundtrips for any cart size.
442	+	17. Perf HIGH — `record_view` unbounded spawn. `db/page_views.rs:18-32`. Replace per-request spawn with an `mpsc` channel; one background task drains every 250ms and flushes one bulk `INSERT … ON CONFLICT … DO UPDATE SET view_count = page_view_daily.view_count + EXCLUDED.view_count`.
443	+	18. Perf HIGH — Sales drift full-table aggregate. `scheduler/integrity.rs:53-73`. Maintain trigger-updated `transactions_completed_count` per item, or run the check off-pool against a snapshot. Short term: add `WHERE i.sales_count > 0 OR EXISTS (SELECT 1 FROM transactions WHERE item_id = i.id LIMIT 1)` to drop the LEFT JOIN's all-zero rows from the aggregate.
444	+
445	+	## Phase 3 — MED (fix before re-run if cheap)
446	+
447	+	- Storage: advisory-lock leak in `check_sandbox_cap` (`db/mod.rs:92-128`) → `pg_advisory_xact_lock` or RAII guard.
448	+	- Storage: `is_s3_key_live` missing tables (`db/pending_s3_deletions.rs:67-82`) → audit all s3_key-bearing columns; consider normalized `s3_objects` table.
449	+	- Storage: `delete_version` owner SELECT outside tx + post-commit S3 enqueue (`db/versions.rs:267-315`) → owner SELECT inside tx; enqueue inside tx.
450	+	- Security: ClamAV `FailOpen` startup assertion (`scanning/clamav.rs:19` + `scanning/mod.rs:151-164`) → refuse boot if scan configured but no AV layer live; emit `tracing::error!` after N consecutive ClamAV errors.
451	+	- Security: `helpers.rs:44-50` `DefaultHasher` for advisory lock keys → stable hasher (`sha2` first 8 bytes, or `xxh3` with constant seed).
452	+	- Security: OAuth `state` size cap (`routes/oauth.rs:379-386`) → reject `form.state.len() > 1024`; cap `code_challenge` at 44 base64url chars.
453	+	- Security: `extract_client_ip` non-Cloudflare fallback warning (`helpers.rs:33-40`) → emit one-shot `tracing::warn!` at startup if no `CF-Connecting-IP` seen after N requests.
454	+	- UX: pagination offset overflow (`routes/pages/public/discover.rs:85-87`, `routes/admin/users.rs:37-39`) → clamp `page` to `total_pages.max(1)` before arithmetic.
455	+	- UX: forms render without `_csrf` when handler forgets to populate `csrf_token` → make `csrf_token` non-optional in form-bearing templates (compile-time error) or render an inline "refresh and try again" notice.
456	+	- UX: `validate_username` byte-length check (`routes/auth.rs:322`) → `chars().count()`, or reorder ASCII filter before length.
457	+	- Perf: scheduler advisory-lock connection pinned across S3 (`scheduler/mod.rs:92-279`) → dedicated `PgPoolOptions::new().max_connections(1)` outside the main pool.
458	+	- Perf: cleanup S3 deletes serialized inside scheduler tick (`scheduler/cleanup.rs:77-100`) → `for_each_concurrent(8, ...)`; better, move user-deletion off the scheduler tick.
459	+
460	+	## Phase 4 — Polish (after re-run shows axes ≥ A-)
461	+
462	+	- Payments: `has_active_subscription_to_item` period-end clause mirroring (`db/subscriptions.rs:464-470`).
463	+	- Payments: `get_active_creator_tier` + `sync_user_creator_tier` period-end defense (`db/creator_tiers.rs:91-103, 181-194`).
464	+	- Payments: `release_use_count` race messaging (`db/promo_codes.rs:184-200`).
465	+	- Payments: License key `activation_count` recount on revoke (`db/license_keys.rs:343-382`).
466	+	- Payments: Subscription minimum-charge check (`payments/checkout.rs:283-317`).
467	+	- Payments: Webhook v1/v2 unmark-on-failure parity (`routes/stripe/webhook/mod.rs:48-86`).
468	+	- Storage: `media_files.list_folders` scan filter (`db/media_files.rs:73-82`).
469	+	- Storage: `pending_uploads.record_pending_upload` silent user-mismatch (`db/pending_uploads.rs:23-33`).
470	+	- Storage: `append_log_bounded` non-atomic size cap (`build_runner.rs:516-534`).
471	+	- Storage: `downloads.rs:119-122` presigned-URL expiry: cap `duration_seconds` at i64 + add DB CHECK ≥ 0.
472	+	- Security: `validate_token_consuming` for OAuth POST (`routes/oauth.rs:206`).
473	+	- Security: `parse_repo_path` rejects lone-dot entries (`git_ssh.rs:162`).
474	+	- Security: ClamAV INSTREAM 16K cap → treat truncation as fail-closed (`scanning/clamav.rs:101-108`).
475	+	- UX: validation error messages stop reflecting user input (`wizards/item/mod.rs:176-179`).
476	+	- UX: CSRF body extraction stops using `from_utf8_lossy` (`csrf.rs:528-543`).
477	+	- Perf: scan-pipeline 400 MiB worst-case capacity-plan note (`constants.rs:156-157`).
478	+	- Perf: announcement fan-out persistence + resume (`scheduler/announcements.rs:59-89, 147-177`).
479	+	- Perf: build log per-line DB roundtrip (`build_runner.rs:516-534`) → in-process running total.
480	+
481	+	## Phase 5 — Chronic (must land in Run #6 or this audit cycle has failed)
482	+
483	+	Invariant-in-prose / policy-not-in-types, fourth consecutive run. The Phase 1 #1 fix (constructive `commit_upload` helper sealing the lower-level ops) is the only acceptable resolution. Memory notes, comments warning future authors, and renamed-helper approaches have been tried in three prior runs and recurred each time. After Phase 1 lands, audit `compute_splits` and `ErrorTemplate` for the same shape and apply the same treatment.
484	+
485	+	---
486	+
487	+
488	+
489	+	## Headline
490	+
491	+	\| Axis \| Run #3 \| Run #4 \| Direction \|
492	+	\|------\|--------\|--------\|-----------\|
493	+	\| Payments \| A- \| A- \| flat (1 new SERIOUS: promo over-release on cart cleanup) \|
494	+	\| Storage \| B+ \| A- \| ↑ (Run #3 image-confirm rollback/race-guard fixes verified; one residual CRIT in same file) \|
495	+	\| UX Wiring \| B+ \| C+ \| ↓ (CSRF policy patchwork: missing tokens + undocumented mutation in exempt prefix) \|
496	+	\| Security \| B+ \| B+ \| flat (different HIGHs: git-shell repo-name validation + lockout DoS) \|
497	+	\| Performance \| B- \| B \| ↑ (Run #3 sync-FS-in-async + DashMap shard-lock + monitor split all verified; new unbounded scan_jobs/broadcast/pool-permit findings) \|
498	+
499	+	Net: 4 CRITICALs (vs Run #3: 2), 10 HIGH/SERIOUS (vs Run #3: 10), 22 MED, 23 MINOR/LOW. Ship-blockers are concentrated in two structural rots — CSRF policy and scan_jobs growth — not in net-new logic mistakes.
500	+

Lines truncated

M server/docs/frontend.md +2 -10

			@@ -76,7 +76,7 @@ Single file: `static/style.css` (~3100 lines). No preprocessor, no minification
76	76		2. Variables (`:root` custom properties)
77	77		3. Reset + Base (global element styles)
78	78		4. Layout (`.padded-page`, `.centered-page`, `.container`)
79		-	5. Buttons (`.primary`, `.secondary`, `.danger`, `.small`)
	79	+	5. Buttons (`.btn-primary`, `.btn-secondary`, `.btn-danger`, `.small` modifier)
80	80		6. Forms (`.form-group`, `.form-section`, `.checkbox-group`)
81	81		7. Tables (`.data-table`, `.compact-table`)
82	82		8. Utilities (`.text-sm`, `.muted`, `.scroll-x`, etc.)
			@@ -110,15 +110,7 @@ Single file: `static/style.css` (~3100 lines). No preprocessor, no minification
110	110
111	111		### Button Variants
112	112
113		-	\| Class \| Background \| Text \| Usage \|
114		-	\|-------\|-----------\|------\|-------\|
115		-	\| `button` (default) \| `--light-background` \| `--detail` \| Generic actions \|
116		-	\| `.primary` \| `--primary-dark` (black) \| White \| Main CTAs \|
117		-	\| `.secondary` \| `--surface-muted` \| `--detail` \| Alternative actions \|
118		-	\| `.danger` \| `--danger` (red) \| White \| Destructive actions \|
119		-	\| `.small` \| (modifier) \| (modifier) \| Compact size for table cells \|
120		-
121		-	Combine: `class="primary small"`, `class="danger small"`.
	113	+	Buttons use the `.btn-*` family. See `design-system.md` § Buttons for the current class names, colors, and modifiers. The bare `.primary` / `.secondary` / `.danger` classes were retired; use `.btn-primary`, `.btn-secondary`, `.btn-danger` instead.
122	114
123	115		## HTMX Patterns
124	116

M server/docs/schema.md +4 -11

			@@ -1,6 +1,6 @@
1	1		# Schema — MNW Server
2	2
3		-	PostgreSQL database. 57 migrations in `migrations/`, auto-applied on boot via sqlx. Extension: `pg_trgm` (trigram fuzzy search).
	3	+	PostgreSQL database. Migrations live under `migrations/`, numbered and auto-applied on boot via sqlx; the directory is the source of truth. Extension: `pg_trgm` (trigram fuzzy search).
4	4
5	5		## Domain Map
6	6
			@@ -20,7 +20,6 @@ PostgreSQL database. 57 migrations in `migrations/`, auto-applied on boot via sq
20	20		\| Custom Domains \| 1 \| Creator vanity domains \|
21	21		\| OAuth \| 1 \| PKCE authorization codes \|
22	22		\| Waitlist & Invites \| 3 \| Creator waves, waitlist, invite codes \|
23		-	\| Content Security \| 2 \| Download fingerprints, streaming sessions \|
24	23		\| Admin \| 1 \| Abuse reports \|
25	24		\| Media \| 1 \| User media library (images for markdown) \|
26	25		\| Import \| 1 \| Bulk import jobs (Patreon, Ko-fi, Gumroad) \|
			@@ -47,7 +46,7 @@ Core accounts. Every user has one row; creator features are gated by `can_create
47	46		\| stripe_account_id \| TEXT \| Stripe Connect account \|
48	47		\| stripe_onboarding_complete \| BOOL \| \|
49	48		\| can_create_projects \| BOOL \| Creator gate \|
50		-	\| creator_tier \| TEXT \| 'basic', 'small_files', 'big_files', 'streaming' \|
	49	+	\| creator_tier \| TEXT \| 'basic', 'small_files', 'big_files', 'everything' \|
51	50		\| storage_used_bytes \| BIGINT \| Computed from versions + insertions \|
52	51		\| max_file_override_bytes \| BIGINT \| Per-user override \|
53	52		\| grandfathered_until \| TIMESTAMPTZ \| Grace period for existing creators \|
			@@ -130,7 +129,7 @@ Products/content within projects. The central commerce entity — holds pricing,
130	129		\| enable_license_keys \| BOOL \| DRM gate \|
131	130		\| custom_license_text \| TEXT \| License shown on download \|
132	131		\| sales_count / play_count / download_count \| INT \| Denormalized counters \|
133		-	\| web_only \| BOOL \| Prevents download (streaming only) \|
	132	+	\| web_only \| BOOL \| Publish without emailing mailing-list subscribers \|
134	133
135	134		Indexes: project_id, is_public, sales_count, tsvector search (title+description+body), title trigram, desc trigram, (project_id, slug).
136	135		Trigger: `update_items_updated_at`.
			@@ -463,12 +462,6 @@ Creator onboarding pipeline: waves (batches), waitlist (applications), invite co
463	462		- creator_waitlist: user_id → users CASCADE (UNIQUE), wave_id → creator_waves SET NULL
464	463		- invite_codes: creator_id → users CASCADE, code UNIQUE
465	464
466		-	### download_fingerprints
467		-	Watermark tracking for paid downloads. Records fingerprint ID, IP, UA per download.
468		-
469		-	### streaming_sessions
470		-	Active streaming sessions with IP binding and concurrency tracking.
471		-
472	465		### reports
473	466		User abuse reports. Status: open → resolved/dismissed.
474	467
			@@ -507,6 +500,6 @@ All trigram indexes use `gin_trgm_ops` from the `pg_trgm` extension.
507	500
508	501		## Key Paths
509	502
510		-	- `migrations/` — all 57 migration files (numbered, applied in order)
	503	+	- `migrations/` — numbered SQL files, applied in order
511	504		- `src/db/` — query functions grouped by domain
512	505		- `src/models/` — Rust structs matching table schemas

A server/migrations/123_backup_code_prefix.sql +15

		@@ -0,0 +1,15 @@
1	+	-- Add a non-secret lookup prefix to backup_codes so verification is O(1)
2	+	-- instead of one Argon2 hash per unused row.
3	+	--
4	+	-- The prefix is the first 4 chars of the (now 16-char) code; the full code is
5	+	-- still Argon2-hashed in code_hash, so leaking code_prefix only narrows the
6	+	-- offline brute-force space from 36^16 to 36^12 (~62 bits remaining secret).
7	+	--
8	+	-- Legacy 8-char codes have code_prefix = NULL; verify falls back to the
9	+	-- iterate-all path for those rows until they're regenerated.
10	+
11	+	ALTER TABLE backup_codes ADD COLUMN code_prefix VARCHAR(8) NULL;
12	+
13	+	CREATE INDEX idx_backup_codes_user_prefix
14	+	ON backup_codes (user_id, code_prefix)
15	+	WHERE used_at IS NULL AND code_prefix IS NOT NULL;

A server/migrations/124_completion_effects.sql +72

		@@ -0,0 +1,72 @@
1	+	-- Outbox table for transaction-completion side effects.
2	+	--
3	+	-- The Stripe webhook handler flips the underlying row (transaction, tip,
4	+	-- subscription) to "completed" inside one DB transaction; it ALSO inserts one
5	+	-- `completion_effects` row per side-effect (bundle grant, license key, revenue
6	+	-- splits, emails, etc.) in that same transaction. A background worker drains
7	+	-- the outbox with FOR UPDATE SKIP LOCKED, retrying each effect independently.
8	+	--
9	+	-- This decouples "the sale is recorded" (atomic, exactly-once) from "all
10	+	-- derived effects have run" (eventually consistent, individually retryable).
11	+	-- Before this table existed, a retry of a completed webhook silently
12	+	-- abandoned every effect because `complete_transaction` returned `Ok(None)`
13	+	-- on the second call and the handler short-circuited.
14	+
15	+	CREATE TABLE completion_effects (
16	+	id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
17	+
18	+	-- Polymorphic parent: exactly one of these is set per row.
19	+	transaction_id UUID REFERENCES transactions(id) ON DELETE CASCADE,
20	+	tip_id UUID REFERENCES tips(id) ON DELETE CASCADE,
21	+	subscription_id UUID REFERENCES subscriptions(id) ON DELETE CASCADE,
22	+
23	+	-- Discriminator for the dispatcher's switch.
24	+	kind VARCHAR(64) NOT NULL,
25	+
26	+	-- JSON-serialised inputs needed to re-execute the effect.
27	+	payload JSONB NOT NULL DEFAULT '{}'::jsonb,
28	+
29	+	-- pending \| succeeded \| failed (terminal after max_attempts)
30	+	status VARCHAR(16) NOT NULL DEFAULT 'pending',
31	+	attempt INT NOT NULL DEFAULT 0,
32	+	last_error TEXT,
33	+
34	+	scheduled_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
35	+	completed_at TIMESTAMPTZ,
36	+	created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
37	+
38	+	CHECK (
39	+	(transaction_id IS NOT NULL)::int +
40	+	(tip_id IS NOT NULL)::int +
41	+	(subscription_id IS NOT NULL)::int = 1
42	+	)
43	+	);
44	+
45	+	CREATE INDEX idx_completion_effects_pending
46	+	ON completion_effects (scheduled_at)
47	+	WHERE status = 'pending';
48	+
49	+	CREATE INDEX idx_completion_effects_transaction
50	+	ON completion_effects (transaction_id)
51	+	WHERE transaction_id IS NOT NULL;
52	+
53	+	CREATE INDEX idx_completion_effects_tip
54	+	ON completion_effects (tip_id)
55	+	WHERE tip_id IS NOT NULL;
56	+
57	+	CREATE INDEX idx_completion_effects_subscription
58	+	ON completion_effects (subscription_id)
59	+	WHERE subscription_id IS NOT NULL;
60	+
61	+	-- Effect-level idempotency: don't double-enqueue the same effect for the same
62	+	-- parent (e.g. a duplicate webhook delivery races the first one between the
63	+	-- "row already completed?" check and the outbox INSERT).
64	+	CREATE UNIQUE INDEX uq_completion_effects_transaction_kind
65	+	ON completion_effects (transaction_id, kind)
66	+	WHERE transaction_id IS NOT NULL;
67	+	CREATE UNIQUE INDEX uq_completion_effects_tip_kind
68	+	ON completion_effects (tip_id, kind)
69	+	WHERE tip_id IS NOT NULL;
70	+	CREATE UNIQUE INDEX uq_completion_effects_subscription_kind
71	+	ON completion_effects (subscription_id, kind)
72	+	WHERE subscription_id IS NOT NULL;

A server/migrations/125_completion_effects_more_parents.sql +40

		@@ -0,0 +1,40 @@
1	+	-- Extend the completion_effects outbox to accept fan_plus and creator-tier
2	+	-- subscriptions as parents. These are separate tables from `subscriptions`
3	+	-- (content-creator subscriptions); the original migration only modeled the
4	+	-- three parent types known at the time. Each new parent gets the same
5	+	-- (parent_id, kind) unique-index treatment for at-most-once enqueue.
6	+
7	+	ALTER TABLE completion_effects
8	+	ADD COLUMN fan_plus_subscription_id UUID
9	+	REFERENCES fan_plus_subscriptions(id) ON DELETE CASCADE;
10	+
11	+	ALTER TABLE completion_effects
12	+	ADD COLUMN creator_subscription_id UUID
13	+	REFERENCES creator_subscriptions(id) ON DELETE CASCADE;
14	+
15	+	-- Replace the original 3-way exactly-one CHECK with a 5-way version.
16	+	ALTER TABLE completion_effects DROP CONSTRAINT completion_effects_check;
17	+	ALTER TABLE completion_effects ADD CONSTRAINT completion_effects_check
18	+	CHECK (
19	+	(transaction_id IS NOT NULL)::int +
20	+	(tip_id IS NOT NULL)::int +
21	+	(subscription_id IS NOT NULL)::int +
22	+	(fan_plus_subscription_id IS NOT NULL)::int +
23	+	(creator_subscription_id IS NOT NULL)::int = 1
24	+	);
25	+
26	+	CREATE INDEX idx_completion_effects_fan_plus_subscription
27	+	ON completion_effects (fan_plus_subscription_id)
28	+	WHERE fan_plus_subscription_id IS NOT NULL;
29	+
30	+	CREATE INDEX idx_completion_effects_creator_subscription
31	+	ON completion_effects (creator_subscription_id)
32	+	WHERE creator_subscription_id IS NOT NULL;
33	+
34	+	CREATE UNIQUE INDEX uq_completion_effects_fan_plus_kind
35	+	ON completion_effects (fan_plus_subscription_id, kind)
36	+	WHERE fan_plus_subscription_id IS NOT NULL;
37	+
38	+	CREATE UNIQUE INDEX uq_completion_effects_creator_subscription_kind
39	+	ON completion_effects (creator_subscription_id, kind)
40	+	WHERE creator_subscription_id IS NOT NULL;

A server/migrations/126_project_cover_image_size.sql +11

		@@ -0,0 +1,11 @@
1	+	-- Track the byte size of each project's cover image in the database so we
2	+	-- can correctly decrement storage on replace/clear without doing an S3 HEAD.
3	+	-- The HEAD-based path was racy: between two replace requests, the S3 object
4	+	-- size could change underfoot (or the HEAD could fail entirely), producing
5	+	-- a wrong decrement and silently corrupting `creator_tiers.storage_used`.
6	+	--
7	+	-- NULL means "size not recorded" — pre-migration rows fall back to a
8	+	-- best-effort S3 HEAD until the next replace stores a real value.
9	+
10	+	ALTER TABLE projects
11	+	ADD COLUMN cover_image_size_bytes BIGINT;

A server/migrations/127_media_files_scan_status_index.sql +9

		@@ -0,0 +1,9 @@
1	+	-- Composite index for the `list user's media filtered by scan_status` query
2	+	-- path. Without it, listing a creator's clean media files scans the full
3	+	-- (user_id) partition and filters in memory, which becomes a hotspot on
4	+	-- accounts with thousands of media rows.
5	+	--
6	+	-- Run #2 audit, medium-priority.
7	+
8	+	CREATE INDEX IF NOT EXISTS idx_media_files_user_scan_status
9	+	ON media_files (user_id, scan_status);

A server/migrations/128_sync_apps_allow_loopback.sql +24

		@@ -0,0 +1,24 @@
1	+	-- Per-app opt-in for the RFC 8252 loopback redirect wildcard.
2	+	--
3	+	-- Background: OAuth `redirect_uri` validation accepts ANY http://127.0.0.1:PORT/,
4	+	-- [::1]:PORT/, or localhost:PORT/ regardless of an app's registered
5	+	-- redirect_uris list. That's correct for native (desktop/mobile) apps that
6	+	-- can't reserve known ports, but it widens the attack surface for any
7	+	-- web-only app — a phishing URL pointing /oauth/authorize at the attacker's
8	+	-- loopback can hijack the auth flow even with PKCE in play (attacker
9	+	-- initiates and supplies their own code_verifier).
10	+	--
11	+	-- This column gates the wildcard per app:
12	+	-- - existing rows default `true` to preserve SyncKit (today's only OAuth
13	+	-- consumer, all native).
14	+	-- - new rows registered via the developer UI default `false`; the app
15	+	-- creator must explicitly opt in by checking "This is a desktop or
16	+	-- mobile app."
17	+	--
18	+	-- See `routes/oauth.rs::is_localhost_redirect` and the Run #2 audit entry.
19	+
20	+	ALTER TABLE sync_apps ADD COLUMN allow_loopback BOOLEAN NOT NULL DEFAULT true;
21	+
22	+	-- Future-proof: change the column-level default to `false` so newly
23	+	-- INSERTed rows that don't explicitly set it land safe.
24	+	ALTER TABLE sync_apps ALTER COLUMN allow_loopback SET DEFAULT false;

A server/migrations/129_pending_s3_deletions_dead_letter.sql +22

		@@ -0,0 +1,22 @@
1	+	-- Migration 129: pending_s3_deletions dead-letter table
2	+	--
3	+	-- Audit Run #3 (Storage SERIOUS): `pending_s3_deletions.attempts` was
4	+	-- incremented forever on a permanently-failing key (malformed s3_key, gone
5	+	-- bucket, ACL gap), generating retry noise and crowding out real deletions.
6	+	-- The dead-letter table moves rows that exceed MAX_ATTEMPTS off the hot
7	+	-- queue into an operator-visible parking lot so the worker can keep making
8	+	-- progress on the legitimate backlog. Rows here require manual triage.
9	+
10	+	CREATE TABLE IF NOT EXISTS pending_s3_deletions_dead_letter (
11	+	id UUID PRIMARY KEY,
12	+	s3_key TEXT NOT NULL,
13	+	bucket TEXT NOT NULL,
14	+	source TEXT NOT NULL,
15	+	created_at TIMESTAMPTZ NOT NULL,
16	+	attempts INT NOT NULL,
17	+	last_attempted_at TIMESTAMPTZ,
18	+	dead_lettered_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
19	+	);
20	+
21	+	CREATE INDEX IF NOT EXISTS idx_pending_s3_deletions_dead_letter_dead_lettered_at
22	+	ON pending_s3_deletions_dead_letter(dead_lettered_at);

A server/migrations/130_user_sessions_ip_address_index.sql +11

		@@ -0,0 +1,11 @@
1	+	-- Migration 130: index user_sessions(ip_address)
2	+	--
3	+	-- Audit Run #3 (Perf LOW): `count_active_sandboxes_for_ip` (db/mod.rs)
4	+	-- filters `user_sessions.ip_address = $1` without an index, so the abuse
5	+	-- prevention cap that fires on every sandbox signup table-scans
6	+	-- user_sessions. Partial index over the non-null subset since ip_address
7	+	-- is nullable (legacy sessions predating session tracking can carry NULL).
8	+
9	+	CREATE INDEX IF NOT EXISTS idx_user_sessions_ip_address
10	+	ON user_sessions(ip_address)
11	+	WHERE ip_address IS NOT NULL;

A server/migrations/133_items_duration_seconds_nonnegative.sql +13

M server/site-docs/public/about/compare.md +2 -2

M server/site-docs/public/about/roadmap.md +2 -2

M server/site-docs/public/guide/fan-plus.md +10 -2

M server/site-docs/public/guide/payouts.md +2 -2

M server/site-docs/public/guide/splits.md +4

M server/site-docs/public/guide/stripe.md +3 -3

M server/site-docs/public/guide/tiers.md +28 -24

M server/site-docs/public/html/contact.html +3 -3

M server/site-docs/public/legal/appeals.md +1 -1

M server/site-docs/public/legal/copyright.md +2

M server/site-docs/public/support/contact.md +3 -3

M server/site-docs/public/support/faq.md +2 -2

M server/site-docs/public/tech/architecture.md +1 -1

M server/site-docs/public/tech/content-scanning.md +14 -2

M server/site-docs/public/tech/security.md +2

M server/src/auth.rs +1 -1

A server/src/background.rs +75

M server/src/build_runner.rs +68 -26

M server/src/constants.rs +13

M server/src/db/cart.rs +34 -4

M server/src/db/creator_tiers.rs +39

M server/src/db/items/media.rs +9 -3

M server/src/db/media_files.rs +3 -3

M server/src/db/mod.rs +1 -1

M server/src/db/page_views.rs +84 -2

M server/src/db/pending_s3_deletions.rs +15

M server/src/db/pending_uploads.rs +13 -4

M server/src/db/project_members.rs +31 -3

M server/src/db/projects.rs +9 -3

M server/src/db/scanning.rs +16

M server/src/db/sessions.rs +9 -5

M server/src/db/transactions.rs +68 -6

M server/src/db/users.rs +12

M server/src/db/validated_types.rs +1 -1

M server/src/email/mod.rs +68

M server/src/email/templates/notifications.rs +1 -1

M server/src/error.rs +7 -94

M server/src/git_ssh.rs +8

M server/src/helpers.rs +50 -13

M server/src/lib.rs +10

M server/src/main.rs +15 -5

M server/src/metrics.rs +36 -1

M server/src/monitor.rs +27 -3

M server/src/payments/webhooks.rs +10 -3

M server/src/pricing.rs +117

M server/src/routes/admin/moderation.rs +2 -2

M server/src/routes/admin/uploads.rs +19 -9

M server/src/routes/admin/users.rs +11 -3

M server/src/routes/api/cart.rs +8

M server/src/routes/api/content_insertions.rs +7 -9

M server/src/routes/api/follows.rs +2 -2

M server/src/routes/api/guest_checkout.rs +11 -4

M server/src/routes/api/internal/uploads.rs +28 -24

M server/src/routes/api/items/bulk.rs +6 -4

M server/src/routes/api/projects.rs +7 -8

M server/src/routes/api/users/broadcast.rs +12

M server/src/routes/api/users/library.rs +1 -1

M server/src/routes/api/users/profile.rs +2 -18

M server/src/routes/api/users/support.rs +1 -1

M server/src/routes/auth.rs +32 -11

M server/src/routes/oauth.rs +17 -4

M server/src/routes/pages/dashboard/mod.rs -1

M server/src/routes/pages/dashboard/tabs/item.rs -18

M server/src/routes/pages/dashboard/tabs/mod.rs +1 -1

M server/src/routes/pages/dashboard/wizards/item/save.rs +35 -32

M server/src/routes/pages/dashboard/wizards/project.rs +24 -28

M server/src/routes/pages/email_actions/account.rs +1 -20

M server/src/routes/pages/email_actions/links.rs +1 -1

M server/src/routes/pages/public/content/library.rs +1 -1

M server/src/routes/pages/public/content/mod.rs +6 -8

M server/src/routes/pages/public/content/project.rs +1 -1

M server/src/routes/pages/public/join_wizard.rs +33 -53

M server/src/routes/pages/public/mod.rs +3

A server/src/routes/pages/public/sitemap.rs +167

M server/src/routes/pages/public/two_factor.rs +2 -6

M server/src/routes/postmark/issues.rs +2 -2

M server/src/routes/storage/downloads.rs +9 -1

M server/src/routes/storage/images.rs +181 -63

M server/src/routes/storage/media.rs +51 -62

M server/src/routes/storage/mod.rs +155 -19

M server/src/routes/storage/uploads.rs +61 -30

M server/src/routes/storage/versions.rs +19 -16

M server/src/routes/stripe/checkout/cart.rs +234 -77

M server/src/routes/stripe/checkout/item.rs +25 -15

M server/src/routes/stripe/checkout/mod.rs +8 -2

M server/src/routes/stripe/checkout/project.rs +14 -2

M server/src/routes/stripe/checkout/tips.rs +20 -3

M server/src/routes/stripe/webhook/billing.rs +7 -5

M server/src/routes/stripe/webhook/checkout.rs +3 -3

M server/src/routes/stripe/webhook/checkout_helpers.rs +29 -10

M server/src/routes/stripe/webhook/mod.rs +10 -1

M server/src/routes/stripe/webhook/subscriptions.rs +27 -12

M server/src/routes/synckit/blobs.rs +1 -1

M server/src/scanning/clamav.rs +61

M server/src/scanning/mod.rs +39

M server/src/scanning/worker.rs +6 -6

M server/src/scheduler/cleanup.rs +77 -31

M server/src/scheduler/integrity.rs +6

M server/src/synckit_auth.rs +5 -2

M server/src/templates/mod.rs -1

M server/src/templates/partials.rs -7

M server/src/validation/items.rs +2 -1

A server/static/manifest.json +22

M server/static/mnw.js +41

M server/templates/base.html +13 -1

M server/templates/dashboards/admin-scan-audit.html +1 -1

M server/templates/dashboards/admin-uploads.html +1 -1

M server/templates/pages/audio_player.html +1 -1

M server/templates/pages/blog_post.html +1 -1

M server/templates/pages/cart.html +33 -1

M server/templates/pages/collection.html +1 -1

M server/templates/pages/error.html +3 -1

M server/templates/pages/index.html +1 -1

M server/templates/pages/item.html +1 -1

M server/templates/pages/policy.html +12

M server/templates/pages/project.html +1 -1

M server/templates/pages/text_reader.html +1 -1

M server/templates/pages/user.html +1 -1

M server/templates/pages/video_player.html +1 -1

A server/templates/partials/backup_codes.html +4

A server/templates/partials/domain_verify_instructions.html +7

M server/templates/partials/tabs/item_details.html +3 -1

D server/templates/partials/tabs/item_settings.html -53

M server/templates/partials/tabs/library_collections.html +2 -2

M server/templates/partials/tabs/library_purchases.html +2 -2

D server/templates/partials/tabs/library_subscriptions.html -32

D server/templates/partials/tabs/library_wishlists.html -44

M server/templates/partials/tabs/project_content.html +5 -5

M server/templates/partials/tabs/user_account.html +6 -6

M server/templates/wizards/steps/project/basics.html +3 -3

M server/templates/wizards/wizard_project.html +2 -2

M server/tests/harness/mod.rs +2

M server/tests/load/runner.rs +2

M server/todo.md +298 -1