max / makenotwork

sando: daily testnot staging-mirror refresh Add the testnot.work refresh job (M8): a systemd timer on the Sando host that reloads testnot's database from the prod backup sandod-backup-fetch already pulls, keeping the staging mirror ~daily-fresh. The restore runs as the postgres superuser over Tailscale SSH (handles extension/owner lines without granting the app role superuser) and recreates the public schema owned by the app role so boot migrations can create. The app applies any newer migrations on restart. Runs at 05:00 UTC, after the 04:00 backup fetch. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>

Author: Max Johnson <me@maxj.phd> · 2026-06-07 21:07 UTC

Commit: 132431d57d8489af019289dd746db1a90104ed7f

Parent: 9cd5e49

3 files changed, +103 insertions, -0 deletions

A sando/deploy/mnw-testnot-refresh.service +19

		@@ -0,0 +1,19 @@
1	+	# One-shot: refresh the testnot.work staging mirror from the latest prod backup.
2	+	# Paired with mnw-testnot-refresh.timer for daily execution.
3	+	#
4	+	# Runs after sandod-backup-fetch so it uses a fresh dump. Needs root on fw13:
5	+	# reads /srv/sando/backups (sando-owned) and uses Tailscale SSH (node identity)
6	+	# to reach testnot as root.
7	+	#
8	+	# Place at /etc/systemd/system/mnw-testnot-refresh.service on the Sando host.
9	+
10	+	[Unit]
11	+	Description=MNW: refresh testnot staging mirror from prod backup
12	+	After=sandod-backup-fetch.service network-online.target
13	+	Wants=network-online.target
14	+
15	+	[Service]
16	+	Type=oneshot
17	+	ExecStart=/usr/local/bin/mnw-testnot-refresh.sh
18	+	# A failed refresh leaves testnot on its previous data + the app restarted;
19	+	# the next daily cycle retries. Don't hammer.

A sando/deploy/mnw-testnot-refresh.sh +67

		@@ -0,0 +1,67 @@
1	+	#!/usr/bin/env bash
2	+	# Refresh the testnot.work staging mirror from the latest production backup.
3	+	#
4	+	# testnot is a read-only mirror of prod, gated app-side to Fan+/creator accounts
5	+	# (ACCESS_GATE). This job reloads its database from the prod backup that
6	+	# sandod-backup-fetch already pulls to fw13, so the mirror tracks live ~daily.
7	+	#
8	+	# Runs on fw13 (the Sando host, where the backup lives and which has tailnet
9	+	# root on testnot via Tailscale SSH). The restore runs as the postgres
10	+	# superuser on testnot — streamed over Tailscale SSH — so extension/owner lines
11	+	# in the dump apply cleanly without granting the app role superuser. The app
12	+	# applies any newer migrations on the next boot (MNW migrates on startup), so a
13	+	# prod dump a few migrations behind the deployed binary self-heals on restart.
14	+	#
15	+	# Idempotent and safe to re-run: it stops the app, resets the schema, restores,
16	+	# and starts the app. testnot holds no durable state of its own (it's a mirror),
17	+	# so a wiped/refreshed DB each run is the intended behavior.
18	+	set -euo pipefail
19	+
20	+	BACKUP="${TESTNOT_BACKUP:-/srv/sando/backups/latest.sql.gz}"
21	+	SSH_TARGET="${TESTNOT_SSH:-root@testnot}"
22	+	DB="${TESTNOT_DB:-makenotwork}"
23	+	SERVICE="makenotwork.service"
24	+
25	+	log() { echo "[$(date -u +%H:%M:%S)] $*"; }
26	+	ts_ssh() { tailscale ssh "$SSH_TARGET" "$@"; }
27	+
28	+	[ -r "$BACKUP" ] \|\| { echo "backup not readable: $BACKUP" >&2; exit 1; }
29	+	log "backup: $BACKUP ($(stat -c %s "$BACKUP") bytes)"
30	+
31	+	log "stopping $SERVICE on testnot"
32	+	ts_ssh "systemctl stop $SERVICE"
33	+
34	+	# Drop every non-system schema (mirrors sandod reset_scratch — migrations create
35	+	# custom schemas like tower_sessions that survive DROP SCHEMA public CASCADE).
36	+	# Recreate public OWNED BY the app role: on PG15+ a postgres-owned public grants
37	+	# no CREATE to other roles, so boot migrations would fail with "no schema has
38	+	# been selected to create in" (same gotcha as the sando scratch DB).
39	+	log "resetting schema"
40	+	ts_ssh "sudo -u postgres psql -v ON_ERROR_STOP=1 -d $DB" <<SQL
41	+	DO \$\$
42	+	DECLARE s text;
43	+	BEGIN
44	+	FOR s IN
45	+	SELECT nspname FROM pg_namespace
46	+	WHERE nspname NOT LIKE 'pg_%' AND nspname <> 'information_schema'
47	+	LOOP
48	+	EXECUTE format('DROP SCHEMA IF EXISTS %I CASCADE', s);
49	+	END LOOP;
50	+	EXECUTE 'CREATE SCHEMA public AUTHORIZATION $DB';
51	+	END \$\$;
52	+	SQL
53	+
54	+	log "restoring prod dump"
55	+	gunzip -c "$BACKUP" \| ts_ssh "sudo -u postgres psql -q -v ON_ERROR_STOP=1 -d $DB" >/dev/null
56	+
57	+	log "starting $SERVICE (applies any newer migrations on boot)"
58	+	ts_ssh "systemctl start $SERVICE"
59	+
60	+	# Boot smoke: the app must come back healthy after migrating.
61	+	for i in $(seq 1 20); do
62	+	code=$(ts_ssh "curl -s -o /dev/null -w '%{http_code}' http://127.0.0.1:8080/health" \|\| echo 000)
63	+	[ "$code" = "200" ] && { log "health OK"; exit 0; }
64	+	sleep 3
65	+	done
66	+	echo "testnot did not return healthy after refresh" >&2
67	+	exit 1

A sando/deploy/mnw-testnot-refresh.timer +17

		@@ -0,0 +1,17 @@
1	+	# Daily trigger for the testnot mirror refresh. sandod-backup-fetch runs at
2	+	# 04:00 UTC; we refresh at 05:00 UTC to leave headroom for the fetch to land.
3	+	#
4	+	# Place at /etc/systemd/system/mnw-testnot-refresh.timer on the Sando host.
5	+	# Enable: systemctl enable --now mnw-testnot-refresh.timer
6	+
7	+	[Unit]
8	+	Description=MNW: daily testnot staging-mirror refresh
9	+
10	+	[Timer]
11	+	OnCalendar=--* 05:00:00 UTC
12	+	# If the box was off when the timer fired, run on next boot.
13	+	Persistent=true
14	+	Unit=mnw-testnot-refresh.service
15	+
16	+	[Install]
17	+	WantedBy=timers.target