Skip to main content

max / makenotwork

9.7 KB · 237 lines History Blame Raw
1 #!/usr/bin/env bash
2 # Idempotent bootstrap for a fresh Sando host (the machine running sandod).
3 #
4 # Captures the three PG footguns + system user + systemd unit + scratch DB +
5 # .ssh setup + known_hosts seeding that fw13 accumulated by hand over the
6 # 2026-06-02 hardening session. Re-run any time the sando host is rebuilt.
7 #
8 # Run as root on the new host. The script is safe to run repeatedly — every
9 # step checks current state and skips if already satisfied.
10 #
11 # What it does:
12 # 1. base packages + postgresql
13 # 2. `sando` system user (login shell, /srv/sando home)
14 # 3. /srv/sando dirs (state/, work/, releases/, logs/, backups/)
15 # 4. postgres role `sando` with CREATEDB
16 # 5. `sando_scratch` database owned by `sando`
17 # 6. ALTER SCHEMA public OWNER TO sando inside sando_scratch
18 # (must be set explicitly — PG15+ no longer grants public to db owner)
19 # 7. sando's ed25519 SSH key (generated if missing)
20 # 8. /srv/sando/.ssh/config — declares port 2200 for alpha-west-1
21 # 9. known_hosts seeded for tailnet targets (testnot, alpha-west-1, etc.)
22 # 10. /etc/sando/{sando-daemon.toml,sando.toml,sando.env}
23 # 11. /etc/systemd/system/sandod.service + sandod-backup-fetch.{service,timer}
24 # 12. /usr/local/bin/sandod (built from the local checkout if missing)
25 # 13. /srv/sando/mnw.git bare repo (initial; operator pushes the working tree)
26 #
27 # What this does NOT do (operator's job):
28 # - tailscale up (auth)
29 # - Authorize sando's pubkey on each deploy target's `deploy` user
30 # (bootstrap-node.sh on the target consumes $SANDO_PUBKEY)
31 # - Populate /etc/sando/sando.env with anything beyond SANDO_DAEMON if
32 # additional secrets are needed
33 # - Push the MNW working tree to /srv/sando/mnw.git
34 # - Fix `mnw_test_template` ownership — that template gets re-created by
35 # each `cargo test` run; ownership resets when a *different* user runs
36 # tests on the host. Out of scope for one-shot bootstrap; manage by
37 # keeping the host single-user or by re-`ALTER TABLE ... OWNER TO sando`
38 # before sandod's cargo_test gate.
39
40 set -euo pipefail
41
42 if [[ $EUID -ne 0 ]]; then
43 echo "must run as root" >&2
44 exit 1
45 fi
46
47 # All paths the host should accept overrides for, with sane defaults that
48 # match the live fw13 install.
49 SANDO_USER="${SANDO_USER:-sando}"
50 SANDO_HOME="${SANDO_HOME:-/srv/sando}"
51 SANDO_DAEMON_URL="${SANDO_DAEMON_URL:-http://127.0.0.1:7766}"
52 INSTALL_POSTGRES="${INSTALL_POSTGRES:-1}"
53 BUILD_SANDOD="${BUILD_SANDOD:-1}"
54
55 # Tailnet targets to pre-seed in sando's known_hosts. Override SEED_HOSTS to
56 # add/remove. Each entry is "name[:port]"; port defaults to 22.
57 SEED_HOSTS="${SEED_HOSTS:-testnot alpha-west-1:2200}"
58
59 # Resolve the script's directory so it can copy sibling unit/config files
60 # without depending on cwd. Layout: `<SANDO_REPO>/deploy/this-script.sh`,
61 # so SANDO_REPO is one level up.
62 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
63 SANDO_REPO="$(cd "$SCRIPT_DIR/.." && pwd)"
64
65 export DEBIAN_FRONTEND=noninteractive
66
67 log() { echo "[bootstrap-sandod] $*"; }
68
69 log "1/13 base packages"
70 apt-get update -qq
71 apt-get install -y -qq curl ca-certificates rsync openssh-client git build-essential pkg-config libssl-dev > /dev/null
72
73 if [[ "$INSTALL_POSTGRES" == "1" ]]; then
74 log "2/13 postgresql"
75 apt-get install -y -qq postgresql > /dev/null
76 else
77 log "2/13 skipping postgresql"
78 fi
79
80 log "3/13 sando system user (home: $SANDO_HOME)"
81 if ! id "$SANDO_USER" &>/dev/null; then
82 useradd -m -d "$SANDO_HOME" -s /bin/bash "$SANDO_USER"
83 fi
84 # Re-assert home dir + mode in case a prior partial run left it root-owned.
85 install -d -o "$SANDO_USER" -g "$SANDO_USER" -m 0750 "$SANDO_HOME"
86
87 log "4/13 /srv/sando subdirs"
88 for sub in state work releases logs backups; do
89 install -d -o "$SANDO_USER" -g "$SANDO_USER" -m 0750 "$SANDO_HOME/$sub"
90 done
91
92 log "5/13 postgres role + scratch db"
93 # All postgres ops go through `sudo -u postgres` since the role/db live on the
94 # local cluster. Idempotency via CREATE … IF NOT EXISTS where supported, and
95 # DO blocks where it isn't (CREATE ROLE has no IF NOT EXISTS in older PG).
96 sudo -u postgres psql -v ON_ERROR_STOP=1 <<SQL
97 DO \$\$
98 BEGIN
99 IF NOT EXISTS (SELECT 1 FROM pg_roles WHERE rolname = '$SANDO_USER') THEN
100 EXECUTE format('CREATE ROLE %I LOGIN CREATEDB', '$SANDO_USER');
101 ELSE
102 EXECUTE format('ALTER ROLE %I CREATEDB', '$SANDO_USER');
103 END IF;
104 END
105 \$\$;
106 SQL
107
108 # CREATE DATABASE can't be inside a DO block, hence the separate guard.
109 if ! sudo -u postgres psql -tAc \
110 "SELECT 1 FROM pg_database WHERE datname = 'sando_scratch'" \
111 | grep -q '^1$'; then
112 sudo -u postgres createdb -O "$SANDO_USER" sando_scratch
113 fi
114
115 log "6/13 sando_scratch public schema owner = $SANDO_USER"
116 # PG15+ no longer grants public to the DB owner automatically. Without this,
117 # reset_scratch (sando/daemon/src/gates.rs::reset_scratch) silently fails
118 # every rebuild because the DROP SCHEMA public CASCADE happens but the
119 # CREATE SCHEMA public lands as postgres, not sando, owning it.
120 sudo -u postgres psql -v ON_ERROR_STOP=1 sando_scratch -c \
121 "ALTER SCHEMA public OWNER TO $SANDO_USER" >/dev/null
122
123 log "7/13 sando ssh key (ed25519)"
124 install -d -o "$SANDO_USER" -g "$SANDO_USER" -m 0700 "$SANDO_HOME/.ssh"
125 if [[ ! -f "$SANDO_HOME/.ssh/id_ed25519" ]]; then
126 sudo -u "$SANDO_USER" ssh-keygen -t ed25519 -N "" \
127 -f "$SANDO_HOME/.ssh/id_ed25519" \
128 -C "sando@$(hostname -s)"
129 fi
130
131 log "8/13 /srv/sando/.ssh/config"
132 # Declare alpha-west-1 on port 2200 (prod sshd convention). bootstrap-node.sh
133 # on each deploy target accepts SANDO_PUBKEY so we don't need to manage the
134 # remote authorized_keys here.
135 ssh_config="$SANDO_HOME/.ssh/config"
136 if ! grep -q "^Host alpha-west-1" "$ssh_config" 2>/dev/null; then
137 cat >> "$ssh_config" <<'EOF'
138 Host alpha-west-1
139 Port 2200
140
141 EOF
142 fi
143 chown "$SANDO_USER:$SANDO_USER" "$ssh_config"
144 chmod 0600 "$ssh_config"
145
146 log "9/13 known_hosts seeding ($SEED_HOSTS)"
147 # Strict-host-key-check failures on first contact would block sandod's deploy
148 # step. Pre-seed each declared tier-node host. ssh-keyscan is idempotent
149 # (running it again just appends a duplicate; we de-dup via sort -u after).
150 known="$SANDO_HOME/.ssh/known_hosts"
151 touch "$known"
152 chown "$SANDO_USER:$SANDO_USER" "$known"
153 chmod 0600 "$known"
154 for entry in $SEED_HOSTS; do
155 host="${entry%%:*}"
156 port="${entry#*:}"
157 [[ "$port" == "$host" ]] && port=22
158 # `ssh-keyscan` returns the host keys without contacting the user; on
159 # unreachable hosts it logs a warning and exits 0. We tolerate that —
160 # operator can re-run after the target is up.
161 sudo -u "$SANDO_USER" ssh-keyscan -p "$port" -T 5 "$host" 2>/dev/null \
162 >> "$known" || log " warn: ssh-keyscan $host:$port returned nothing"
163 done
164 # De-dup in place. sort+mv keeps ownership/mode via install.
165 sudo -u "$SANDO_USER" sort -u "$known" -o "$known"
166
167 log "10/13 /etc/sando configs"
168 install -d -m 0755 /etc/sando
169 # sando-daemon.toml.example is the canonical production config (per the
170 # header comment). Install as-is; operator edits the listen address if
171 # binding to a non-fw13 tailnet IP.
172 install -m 0644 -o root -g root \
173 "$SCRIPT_DIR/sando-daemon.toml.example" \
174 /etc/sando/sando-daemon.toml
175 install -m 0644 -o root -g root \
176 "$SANDO_REPO/sando.toml" \
177 /etc/sando/sando.toml
178 # sando.env carries non-secret operator settings consumed by sandod and the
179 # backup-fetch timer. Only write if missing — operator may have customized.
180 if [[ ! -f /etc/sando/sando.env ]]; then
181 echo "SANDO_DAEMON=$SANDO_DAEMON_URL" > /etc/sando/sando.env
182 chown root:"$SANDO_USER" /etc/sando/sando.env
183 chmod 0640 /etc/sando/sando.env
184 fi
185
186 log "11/13 systemd units"
187 install -m 0644 -o root -g root \
188 "$SCRIPT_DIR/sandod.service" \
189 /etc/systemd/system/sandod.service
190 install -m 0644 -o root -g root \
191 "$SCRIPT_DIR/sandod-backup-fetch.service" \
192 /etc/systemd/system/sandod-backup-fetch.service
193 install -m 0644 -o root -g root \
194 "$SCRIPT_DIR/sandod-backup-fetch.timer" \
195 /etc/systemd/system/sandod-backup-fetch.timer
196 systemctl daemon-reload
197
198 if [[ "$BUILD_SANDOD" == "1" ]]; then
199 log "12/13 sandod binary (cargo build --release → /usr/local/bin/sandod)"
200 daemon_dir="$SANDO_REPO/daemon"
201 if [[ ! -d "$daemon_dir" ]]; then
202 log " warn: cannot locate sando/daemon source at $daemon_dir; skipping build"
203 else
204 # cargo only needs network + tmp; resulting binary owned root, mode 755.
205 (cd "$daemon_dir" && cargo build --release --quiet)
206 install -m 0755 "$daemon_dir/target/release/sandod" /usr/local/bin/sandod
207 fi
208 else
209 log "12/13 skipping sandod build (BUILD_SANDOD=0)"
210 fi
211
212 log "13/13 bare mnw.git + post-receive hook"
213 if [[ ! -d "$SANDO_HOME/mnw.git" ]]; then
214 sudo -u "$SANDO_USER" git init --bare --initial-branch=main "$SANDO_HOME/mnw.git" >/dev/null
215 fi
216 # Install (or refresh) the post-receive hook that POSTs to sandod on push.
217 # Sourced from the repo so updates here propagate to the next bootstrap run.
218 install -m 0755 -o "$SANDO_USER" -g "$SANDO_USER" \
219 "$SCRIPT_DIR/post-receive" \
220 "$SANDO_HOME/mnw.git/hooks/post-receive"
221
222 # Enable services last so a partial bootstrap doesn't leave a service trying
223 # to start against an incomplete environment.
224 systemctl enable sandod.service >/dev/null 2>&1 || true
225 systemctl enable sandod-backup-fetch.timer >/dev/null 2>&1 || true
226
227 echo
228 log "Done. Next steps for the operator:"
229 echo " - tailscale up (auth this node onto the tailnet)"
230 echo " - on each deploy target, run bootstrap-node.sh with:"
231 echo " SANDO_PUBKEY=\"\$(cat $SANDO_HOME/.ssh/id_ed25519.pub)\""
232 echo " - push the MNW working tree to $SANDO_HOME/mnw.git:"
233 echo " git remote add sando $SANDO_USER@<host>:$SANDO_HOME/mnw.git"
234 echo " git push sando main"
235 echo " - sudo systemctl start sandod"
236 echo " - sudo systemctl start sandod-backup-fetch.timer"
237