Skip to main content

max / makenotwork

Release sando v0.2.1: controller self-update path sandod can now deploy its own new code via POST /self-update {sha} (bearer- gated). It runs unprivileged (User=sando, NoNewPrivileges, ProtectSystem= strict) so it cannot install a binary or restart its own service; it only triggers the root oneshot sando-update@<sha>.service, which the sando user is authorized to start (and nothing else) by a scoped polkit rule. That unit builds sando/daemon as the sando user in a dedicated checkout, installs the binary, and restarts sandod. - error.rs: Error::BadRequest -> 400 (malformed sha). - routes.rs: /self-update handler (sha validation, in-flight-build guard, systemctl trigger) + sandod_version in /state for post-restart verification. - deploy/: sando-update@.service, sando-self-update.sh, 10-sando-update.rules, README self-update section. - Tests: self_update_unit_maps_sha_to_instance, self_update_rejects_bad_sha. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Author: Max Johnson <me@maxj.phd> · 2026-06-12 21:57 UTC
Commit: 20cf471f318ba2bfc05df24b5a36e507eff0ef24
Parent: a27ad4d
10 files changed, +250 insertions, -6 deletions
@@ -1637,7 +1637,7 @@ checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f"
1637 1637
1638 1638 [[package]]
1639 1639 name = "sando-daemon"
1640 - version = "0.2.0"
1640 + version = "0.2.1"
1641 1641 dependencies = [
1642 1642 "anyhow",
1643 1643 "async-trait",
@@ -1,6 +1,6 @@
1 1 [package]
2 2 name = "sando-daemon"
3 - version = "0.2.0"
3 + version = "0.2.1"
4 4 edition = "2024"
5 5 license = "MIT"
6 6
@@ -5,6 +5,8 @@ use axum::response::{IntoResponse, Response};
5 5 pub enum Error {
6 6 #[error("not found")]
7 7 NotFound,
8 + #[error("bad request: {0}")]
9 + BadRequest(String),
8 10 #[error("gate not satisfied: {0}")]
9 11 GateBlocked(String),
10 12 #[error(transparent)]
@@ -17,6 +19,7 @@ impl IntoResponse for Error {
17 19 fn into_response(self) -> Response {
18 20 let status = match &self {
19 21 Error::NotFound => StatusCode::NOT_FOUND,
22 + Error::BadRequest(_) => StatusCode::BAD_REQUEST,
20 23 Error::GateBlocked(_) => StatusCode::CONFLICT,
21 24 _ => StatusCode::INTERNAL_SERVER_ERROR,
22 25 };
@@ -18,6 +18,7 @@ pub fn router(state: AppState) -> Router {
18 18 .route("/promote/{tier}", post(promote))
19 19 .route("/rollback/{tier}", post(rollback))
20 20 .route("/rebuild", post(rebuild))
21 + .route("/self-update", post(self_update))
21 22 .route("/confirm/{tier}", post(confirm))
22 23 .route("/backup/fetch", post(backup_fetch))
23 24 .route_layer(axum::middleware::from_fn(move |req, next| {
@@ -78,6 +79,10 @@ fn ct_eq(a: &str, b: &str) -> bool {
78 79
79 80 #[derive(Serialize)]
80 81 struct StateView {
82 + /// The running sandod's own package version. Lets a self-update caller
83 + /// confirm the new binary is live after the restart (the tier versions are
84 + /// the *deployed product*, not the controller).
85 + sandod_version: &'static str,
81 86 tiers: Vec<TierView>,
82 87 }
83 88
@@ -193,7 +198,7 @@ async fn get_state(State(s): State<AppState>) -> Result<Json<StateView>> {
193 198 });
194 199 }
195 200
196 - Ok(Json(StateView { tiers }))
201 + Ok(Json(StateView { sandod_version: env!("CARGO_PKG_VERSION"), tiers }))
197 202 }
198 203
199 204 #[derive(Deserialize, Default)]
@@ -798,6 +803,63 @@ async fn rebuild(
798 803 Ok(Json(serde_json::json!({ "accepted": true, "sha": sha_response })))
799 804 }
800 805
806 + #[derive(Deserialize)]
807 + struct SelfUpdateBody {
808 + /// The commit to rebuild sandod from. Must already be on the canonical
809 + /// remote (the updater `git fetch`es it).
810 + sha: String,
811 + }
812 +
813 + /// The privileged updater unit instance for `sha`. A git sha is hex-only, so it
814 + /// is a safe systemd instance name with no escaping needed.
815 + fn self_update_unit(sha: &crate::domain::GitSha) -> String {
816 + format!("sando-update@{sha}.service")
817 + }
818 +
819 + /// Trigger a rebuild + restart of sandod *itself* to `sha`. sandod runs
820 + /// unprivileged (User=sando, NoNewPrivileges, ProtectSystem=strict) and cannot
821 + /// write `/usr/local/bin/sandod` or restart its own service — so it only
822 + /// *triggers* the root oneshot `sando-update@<sha>.service` (which the sando
823 + /// user is authorized to start by a scoped polkit rule). That unit builds
824 + /// `sando/daemon` as the sando user in a dedicated checkout, installs the new
825 + /// binary, and restarts sandod. Bearer-gated like the other mutators; the new
826 + /// version shows up in `/state`'s `sandod_version` once the restart lands.
827 + async fn self_update(
828 + State(s): State<AppState>,
829 + Json(body): Json<SelfUpdateBody>,
830 + ) -> Result<Json<serde_json::Value>> {
831 + let sha = crate::domain::GitSha::parse(&body.sha)
832 + .map_err(|e| crate::error::Error::BadRequest(format!("invalid sha: {e}")))?;
833 +
834 + // Don't restart the controller out from under an in-flight server build —
835 + // the restart would SIGKILL it mid-deploy. Make the operator retry once idle.
836 + {
837 + let slot = s.active_build.lock().await;
838 + if slot.as_ref().is_some_and(|h| !h.is_finished()) {
839 + return Err(crate::error::Error::GateBlocked(
840 + "a server build is in flight; retry /self-update once it settles".into(),
841 + ));
842 + }
843 + }
844 +
845 + let unit = self_update_unit(&sha);
846 + tracing::warn!(sha = %sha, unit = %unit, "self-update requested; triggering privileged updater");
847 + // `--no-block`: return as soon as the job is enqueued. The build+restart
848 + // outcome lands in `journalctl -u <unit>`; sandod is restarted out from
849 + // under this request, so there is nothing more to await here.
850 + let status = tokio::process::Command::new("systemctl")
851 + .args(["start", "--no-block", &unit])
852 + .status()
853 + .await
854 + .map_err(|e| crate::error::Error::Other(anyhow::anyhow!("spawning systemctl: {e}")))?;
855 + if !status.success() {
856 + return Err(crate::error::Error::Other(anyhow::anyhow!(
857 + "systemctl start {unit} exited {status}; is sando-update@.service installed and the sando-user polkit rule in place?"
858 + )));
859 + }
860 + Ok(Json(serde_json::json!({ "accepted": true, "sha": sha.to_string(), "unit": unit })))
861 + }
862 +
801 863 async fn confirm(
802 864 State(s): State<AppState>,
803 865 Path(tier): Path<String>,
@@ -1323,6 +1385,32 @@ mod tests {
1323 1385 assert_eq!(resp.status(), StatusCode::NOT_FOUND);
1324 1386 }
1325 1387
1388 + #[test]
1389 + fn self_update_unit_maps_sha_to_instance() {
1390 + let sha = crate::domain::GitSha::parse("abc1234def5678").unwrap();
1391 + assert_eq!(self_update_unit(&sha), "sando-update@abc1234def5678.service");
1392 + }
1393 +
1394 + #[tokio::test]
1395 + async fn self_update_rejects_bad_sha_with_400() {
1396 + // A malformed sha is a client error and must be rejected *before* any
1397 + // privileged unit is triggered (so this test never shells out).
1398 + let state = test_state().await;
1399 + let app = router(state);
1400 + let resp = app
1401 + .oneshot(
1402 + Request::builder()
1403 + .method("POST")
1404 + .uri("/self-update")
1405 + .header("Content-Type", "application/json")
1406 + .body(Body::from(r#"{"sha":"not-a-sha!"}"#))
1407 + .unwrap(),
1408 + )
1409 + .await
1410 + .unwrap();
1411 + assert_eq!(resp.status(), StatusCode::BAD_REQUEST);
1412 + }
1413 +
1326 1414 // ---- /promote/{tier} default-version resolution ----
1327 1415
1328 1416 #[tokio::test]
@@ -0,0 +1,20 @@
1 + // polkit rule: let the `sando` user START (only) the self-update template unit.
2 + //
3 + // Install at /etc/polkit-1/rules.d/10-sando-update.rules.
4 + //
5 + // This is the one privilege bridge that makes controller self-deploy work:
6 + // sandod runs unprivileged and triggers `systemctl start sando-update@<sha>`,
7 + // which polkit authorizes here without a password. The grant is deliberately
8 + // narrow — only `start`, only units whose name begins `sando-update@`, only the
9 + // sando user. stop/restart/enable and every other unit fall through to the
10 + // system default policy (i.e. denied for an unprivileged caller).
11 + polkit.addRule(function(action, subject) {
12 + if (action.id === "org.freedesktop.systemd1.manage-units" &&
13 + subject.user === "sando") {
14 + var unit = action.lookup("unit");
15 + var verb = action.lookup("verb");
16 + if (verb === "start" && unit && unit.indexOf("sando-update@") === 0) {
17 + return polkit.Result.YES;
18 + }
19 + }
20 + });
@@ -15,6 +15,47 @@ live in the Syncthing private layer (`_private/infra/`, `_private/deploy`).
15 15 | `post-receive` | git remote | Push-to-deploy hook. |
16 16 | `sandod-backup-fetch.{service,timer}` | Sando host | Daily pull of the prod backup to `/srv/sando/backups/latest.sql.gz` (04:00 UTC). |
17 17 | `mnw-testnot-refresh.{sh,service,timer}` | Sando host | Daily refresh of the testnot.work staging mirror (05:00 UTC). |
18 + | `sando-update@.service` + `sando-self-update.sh` | Sando host | Self-update: rebuild + restart `sandod` to a target sha. |
19 + | `10-sando-update.rules` | Sando host | polkit grant letting the `sando` user start (only) `sando-update@*`. |
20 +
21 + ## Self-update (deploying the controller itself)
22 +
23 + The deploy pipeline ships the *MNW server*, not `sandod`. To deploy a new
24 + `sandod` (the controller's own code), `sandod` exposes `POST /self-update
25 + {"sha":"<hex>"}` (bearer-gated like the other mutators). It cannot do the work
26 + itself — it runs `User=sando`, `NoNewPrivileges`, `ProtectSystem=strict`, so it
27 + can neither write `/usr/local/bin/sandod` nor restart its own service. It only
28 + *triggers* the root oneshot `sando-update@<sha>.service`, which the `sando` user
29 + is allowed to `start` (and nothing else) by `10-sando-update.rules`. That unit
30 + runs `sando-self-update.sh`: it builds `sando/daemon` **as the sando user** in a
31 + dedicated checkout (`/srv/sando/self-update`, never the operator's dev tree),
32 + then — as root — installs the binary and `systemctl restart sandod`.
33 +
34 + Verify a self-update landed by polling `/state`: the new `sandod_version` field
35 + reports the running controller's package version (distinct from the tier
36 + versions, which are the deployed product).
37 +
38 + One-time install (as root), then every future controller deploy is one API call:
39 +
40 + ```sh
41 + sudo install -d /usr/local/lib/sando
42 + sudo install -m 0755 sando-self-update.sh /usr/local/lib/sando/sando-self-update.sh
43 + sudo install -m 0644 sando-update@.service /etc/systemd/system/
44 + sudo install -m 0644 10-sando-update.rules /etc/polkit-1/rules.d/
45 + sudo systemctl daemon-reload
46 + ```
47 +
48 + Bootstrap caveat: the *first* `sandod` carrying `/self-update` still has to be
49 + installed by hand (build + `install` + `systemctl restart sandod`) — there is no
50 + endpoint to call until it is running. Self-update is for every deploy after that.
51 + Trigger + watch:
52 +
53 + ```sh
54 + SHA=$(git -C ~/Code/MNW rev-parse HEAD)
55 + curl -sS -X POST "$BASE/self-update" -H 'Content-Type: application/json' \
56 + -H "Authorization: Bearer $SANDO_API_TOKEN" -d "{\"sha\":\"$SHA\"}"
57 + journalctl -u "sando-update@$SHA" -f
58 + ```
18 59
19 60 ## testnot.work staging mirror
20 61
@@ -0,0 +1,63 @@
1 + #!/usr/bin/env bash
2 + # Rebuild and restart sandod itself to a target commit.
3 + #
4 + # Runs as ROOT, invoked by the oneshot unit `sando-update@<sha>.service` (the
5 + # sha is the instance name, passed here as $1). sandod cannot do this itself:
6 + # it runs User=sando with NoNewPrivileges + ProtectSystem=strict, so it can
7 + # neither write /usr/local/bin/sandod nor restart its own service. sandod only
8 + # *triggers* this unit (authorized for the sando user by a scoped polkit rule);
9 + # the actual privileged work lives here.
10 + #
11 + # Build runs as the unprivileged build user (the sando user already carries a
12 + # rustup toolchain at /srv/sando/.cargo/bin); only the install + restart run as
13 + # root. The build uses a dedicated checkout, never the operator's dev tree.
14 + #
15 + # Config via environment (defaults shown), set in the unit or /etc/sando/sando.env:
16 + # SANDO_SELF_UPDATE_DIR /srv/sando/self-update build checkout parent (build-user-owned)
17 + # SANDO_UPSTREAM_URL git@ssh.makenot.work:max/makenotwork.git
18 + # SANDO_BUILD_USER sando
19 + # SANDO_BIN /usr/local/bin/sandod install destination
20 + set -euo pipefail
21 +
22 + SHA="${1:-}"
23 + if [[ ! "$SHA" =~ ^[0-9a-f]{7,40}$ ]]; then
24 + echo "sando-self-update: refusing non-hex sha: '$SHA'" >&2
25 + exit 2
26 + fi
27 +
28 + SELF_DIR="${SANDO_SELF_UPDATE_DIR:-/srv/sando/self-update}"
29 + UPSTREAM_URL="${SANDO_UPSTREAM_URL:-git@ssh.makenot.work:max/makenotwork.git}"
30 + BUILD_USER="${SANDO_BUILD_USER:-sando}"
31 + BIN="${SANDO_BIN:-/usr/local/bin/sandod}"
32 + REPO_DIR="$SELF_DIR/MNW"
33 + BUILD_HOME="$(getent passwd "$BUILD_USER" | cut -d: -f6)"
34 +
35 + echo "sando-self-update: building sandod @ $SHA as $BUILD_USER"
36 +
37 + # Fetch + checkout + build, all as the unprivileged build user. The clone is
38 + # created once; thereafter we just fetch the new sha. Detached checkout so the
39 + # dedicated tree never carries a branch to drift.
40 + install -d -o "$BUILD_USER" -g "$BUILD_USER" "$SELF_DIR"
41 + runuser -u "$BUILD_USER" -- env \
42 + HOME="$BUILD_HOME" \
43 + PATH="$BUILD_HOME/.cargo/bin:/usr/local/bin:/usr/bin:/bin" \
44 + bash -euo pipefail -c "
45 + if [[ ! -d '$REPO_DIR/.git' ]]; then
46 + git clone '$UPSTREAM_URL' '$REPO_DIR'
47 + fi
48 + cd '$REPO_DIR'
49 + git fetch --prune origin
50 + git checkout --detach '$SHA'
51 + cd sando/daemon
52 + cargo build --release --locked
53 + "
54 +
55 + NEW_BIN="$REPO_DIR/sando/daemon/target/release/sandod"
56 + [[ -x "$NEW_BIN" ]] || { echo "sando-self-update: build produced no binary at $NEW_BIN" >&2; exit 3; }
57 +
58 + # Install + restart as root. install is atomic (writes a temp then renames), so
59 + # a concurrent exec of $BIN never sees a half-written file.
60 + echo "sando-self-update: installing $NEW_BIN -> $BIN and restarting sandod"
61 + install -m 0755 "$NEW_BIN" "$BIN"
62 + systemctl restart sandod
63 + echo "sando-self-update: done ($SHA live)"
@@ -0,0 +1,29 @@
1 + # Privileged self-update unit for the Sando controller.
2 + #
3 + # Instance name = the target git sha, e.g. `sando-update@<40-hex>.service`.
4 + # Triggered by sandod's POST /self-update (the sando user is authorized to
5 + # start this template by the polkit rule 10-sando-update.rules) — sandod itself
6 + # runs unprivileged and cannot install a binary or restart its own service.
7 + #
8 + # Install (one-time, as root):
9 + # sudo install -m 0755 sando-self-update.sh /usr/local/lib/sando/sando-self-update.sh
10 + # sudo install -m 0644 sando-update@.service /etc/systemd/system/
11 + # sudo install -m 0644 10-sando-update.rules /etc/polkit-1/rules.d/
12 + # sudo systemctl daemon-reload
13 + #
14 + # Watch a run: journalctl -u sando-update@<sha> -f
15 + [Unit]
16 + Description=Sando controller self-update to %i
17 + # Don't tear sandod down underneath an in-flight job needlessly; ordering only.
18 + After=network-online.target
19 +
20 + [Service]
21 + Type=oneshot
22 + # Root: the build drops to the sando user internally; install + restart need root.
23 + ExecStart=/usr/local/lib/sando/sando-self-update.sh %i
24 + # Inherit the operator-set knobs (upstream URL, build user, paths) if present.
25 + EnvironmentFile=-/etc/sando/sando.env
26 + TimeoutStartSec=1800
27 + StandardOutput=journal
28 + StandardError=journal
29 + SyslogIdentifier=sando-update
@@ -1793,7 +1793,7 @@ checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f"
1793 1793
1794 1794 [[package]]
1795 1795 name = "sando-daemon"
1796 - version = "0.2.0"
1796 + version = "0.2.1"
1797 1797 dependencies = [
1798 1798 "anyhow",
1799 1799 "async-trait",
@@ -1815,7 +1815,7 @@ dependencies = [
1815 1815
1816 1816 [[package]]
1817 1817 name = "sando-tui"
1818 - version = "0.2.0"
1818 + version = "0.2.1"
1819 1819 dependencies = [
1820 1820 "anyhow",
1821 1821 "chrono",
@@ -1,6 +1,6 @@
1 1 [package]
2 2 name = "sando-tui"
3 - version = "0.2.0"
3 + version = "0.2.1"
4 4 edition = "2024"
5 5 license = "MIT"
6 6