max / makenotwork
10 files changed,
+250 insertions,
-6 deletions
| @@ -1637,7 +1637,7 @@ checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" | |||
| 1637 | 1637 | ||
| 1638 | 1638 | [[package]] | |
| 1639 | 1639 | name = "sando-daemon" | |
| 1640 | - | version = "0.2.0" | |
| 1640 | + | version = "0.2.1" | |
| 1641 | 1641 | dependencies = [ | |
| 1642 | 1642 | "anyhow", | |
| 1643 | 1643 | "async-trait", |
| @@ -1,6 +1,6 @@ | |||
| 1 | 1 | [package] | |
| 2 | 2 | name = "sando-daemon" | |
| 3 | - | version = "0.2.0" | |
| 3 | + | version = "0.2.1" | |
| 4 | 4 | edition = "2024" | |
| 5 | 5 | license = "MIT" | |
| 6 | 6 |
| @@ -5,6 +5,8 @@ use axum::response::{IntoResponse, Response}; | |||
| 5 | 5 | pub enum Error { | |
| 6 | 6 | #[error("not found")] | |
| 7 | 7 | NotFound, | |
| 8 | + | #[error("bad request: {0}")] | |
| 9 | + | BadRequest(String), | |
| 8 | 10 | #[error("gate not satisfied: {0}")] | |
| 9 | 11 | GateBlocked(String), | |
| 10 | 12 | #[error(transparent)] | |
| @@ -17,6 +19,7 @@ impl IntoResponse for Error { | |||
| 17 | 19 | fn into_response(self) -> Response { | |
| 18 | 20 | let status = match &self { | |
| 19 | 21 | Error::NotFound => StatusCode::NOT_FOUND, | |
| 22 | + | Error::BadRequest(_) => StatusCode::BAD_REQUEST, | |
| 20 | 23 | Error::GateBlocked(_) => StatusCode::CONFLICT, | |
| 21 | 24 | _ => StatusCode::INTERNAL_SERVER_ERROR, | |
| 22 | 25 | }; |
| @@ -18,6 +18,7 @@ pub fn router(state: AppState) -> Router { | |||
| 18 | 18 | .route("/promote/{tier}", post(promote)) | |
| 19 | 19 | .route("/rollback/{tier}", post(rollback)) | |
| 20 | 20 | .route("/rebuild", post(rebuild)) | |
| 21 | + | .route("/self-update", post(self_update)) | |
| 21 | 22 | .route("/confirm/{tier}", post(confirm)) | |
| 22 | 23 | .route("/backup/fetch", post(backup_fetch)) | |
| 23 | 24 | .route_layer(axum::middleware::from_fn(move |req, next| { | |
| @@ -78,6 +79,10 @@ fn ct_eq(a: &str, b: &str) -> bool { | |||
| 78 | 79 | ||
| 79 | 80 | #[derive(Serialize)] | |
| 80 | 81 | struct StateView { | |
| 82 | + | /// The running sandod's own package version. Lets a self-update caller | |
| 83 | + | /// confirm the new binary is live after the restart (the tier versions are | |
| 84 | + | /// the *deployed product*, not the controller). | |
| 85 | + | sandod_version: &'static str, | |
| 81 | 86 | tiers: Vec<TierView>, | |
| 82 | 87 | } | |
| 83 | 88 | ||
| @@ -193,7 +198,7 @@ async fn get_state(State(s): State<AppState>) -> Result<Json<StateView>> { | |||
| 193 | 198 | }); | |
| 194 | 199 | } | |
| 195 | 200 | ||
| 196 | - | Ok(Json(StateView { tiers })) | |
| 201 | + | Ok(Json(StateView { sandod_version: env!("CARGO_PKG_VERSION"), tiers })) | |
| 197 | 202 | } | |
| 198 | 203 | ||
| 199 | 204 | #[derive(Deserialize, Default)] | |
| @@ -798,6 +803,63 @@ async fn rebuild( | |||
| 798 | 803 | Ok(Json(serde_json::json!({ "accepted": true, "sha": sha_response }))) | |
| 799 | 804 | } | |
| 800 | 805 | ||
| 806 | + | #[derive(Deserialize)] | |
| 807 | + | struct SelfUpdateBody { | |
| 808 | + | /// The commit to rebuild sandod from. Must already be on the canonical | |
| 809 | + | /// remote (the updater `git fetch`es it). | |
| 810 | + | sha: String, | |
| 811 | + | } | |
| 812 | + | ||
| 813 | + | /// The privileged updater unit instance for `sha`. A git sha is hex-only, so it | |
| 814 | + | /// is a safe systemd instance name with no escaping needed. | |
| 815 | + | fn self_update_unit(sha: &crate::domain::GitSha) -> String { | |
| 816 | + | format!("sando-update@{sha}.service") | |
| 817 | + | } | |
| 818 | + | ||
| 819 | + | /// Trigger a rebuild + restart of sandod *itself* to `sha`. sandod runs | |
| 820 | + | /// unprivileged (User=sando, NoNewPrivileges, ProtectSystem=strict) and cannot | |
| 821 | + | /// write `/usr/local/bin/sandod` or restart its own service — so it only | |
| 822 | + | /// *triggers* the root oneshot `sando-update@<sha>.service` (which the sando | |
| 823 | + | /// user is authorized to start by a scoped polkit rule). That unit builds | |
| 824 | + | /// `sando/daemon` as the sando user in a dedicated checkout, installs the new | |
| 825 | + | /// binary, and restarts sandod. Bearer-gated like the other mutators; the new | |
| 826 | + | /// version shows up in `/state`'s `sandod_version` once the restart lands. | |
| 827 | + | async fn self_update( | |
| 828 | + | State(s): State<AppState>, | |
| 829 | + | Json(body): Json<SelfUpdateBody>, | |
| 830 | + | ) -> Result<Json<serde_json::Value>> { | |
| 831 | + | let sha = crate::domain::GitSha::parse(&body.sha) | |
| 832 | + | .map_err(|e| crate::error::Error::BadRequest(format!("invalid sha: {e}")))?; | |
| 833 | + | ||
| 834 | + | // Don't restart the controller out from under an in-flight server build — | |
| 835 | + | // the restart would SIGKILL it mid-deploy. Make the operator retry once idle. | |
| 836 | + | { | |
| 837 | + | let slot = s.active_build.lock().await; | |
| 838 | + | if slot.as_ref().is_some_and(|h| !h.is_finished()) { | |
| 839 | + | return Err(crate::error::Error::GateBlocked( | |
| 840 | + | "a server build is in flight; retry /self-update once it settles".into(), | |
| 841 | + | )); | |
| 842 | + | } | |
| 843 | + | } | |
| 844 | + | ||
| 845 | + | let unit = self_update_unit(&sha); | |
| 846 | + | tracing::warn!(sha = %sha, unit = %unit, "self-update requested; triggering privileged updater"); | |
| 847 | + | // `--no-block`: return as soon as the job is enqueued. The build+restart | |
| 848 | + | // outcome lands in `journalctl -u <unit>`; sandod is restarted out from | |
| 849 | + | // under this request, so there is nothing more to await here. | |
| 850 | + | let status = tokio::process::Command::new("systemctl") | |
| 851 | + | .args(["start", "--no-block", &unit]) | |
| 852 | + | .status() | |
| 853 | + | .await | |
| 854 | + | .map_err(|e| crate::error::Error::Other(anyhow::anyhow!("spawning systemctl: {e}")))?; | |
| 855 | + | if !status.success() { | |
| 856 | + | return Err(crate::error::Error::Other(anyhow::anyhow!( | |
| 857 | + | "systemctl start {unit} exited {status}; is sando-update@.service installed and the sando-user polkit rule in place?" | |
| 858 | + | ))); | |
| 859 | + | } | |
| 860 | + | Ok(Json(serde_json::json!({ "accepted": true, "sha": sha.to_string(), "unit": unit }))) | |
| 861 | + | } | |
| 862 | + | ||
| 801 | 863 | async fn confirm( | |
| 802 | 864 | State(s): State<AppState>, | |
| 803 | 865 | Path(tier): Path<String>, | |
| @@ -1323,6 +1385,32 @@ mod tests { | |||
| 1323 | 1385 | assert_eq!(resp.status(), StatusCode::NOT_FOUND); | |
| 1324 | 1386 | } | |
| 1325 | 1387 | ||
| 1388 | + | #[test] | |
| 1389 | + | fn self_update_unit_maps_sha_to_instance() { | |
| 1390 | + | let sha = crate::domain::GitSha::parse("abc1234def5678").unwrap(); | |
| 1391 | + | assert_eq!(self_update_unit(&sha), "sando-update@abc1234def5678.service"); | |
| 1392 | + | } | |
| 1393 | + | ||
| 1394 | + | #[tokio::test] | |
| 1395 | + | async fn self_update_rejects_bad_sha_with_400() { | |
| 1396 | + | // A malformed sha is a client error and must be rejected *before* any | |
| 1397 | + | // privileged unit is triggered (so this test never shells out). | |
| 1398 | + | let state = test_state().await; | |
| 1399 | + | let app = router(state); | |
| 1400 | + | let resp = app | |
| 1401 | + | .oneshot( | |
| 1402 | + | Request::builder() | |
| 1403 | + | .method("POST") | |
| 1404 | + | .uri("/self-update") | |
| 1405 | + | .header("Content-Type", "application/json") | |
| 1406 | + | .body(Body::from(r#"{"sha":"not-a-sha!"}"#)) | |
| 1407 | + | .unwrap(), | |
| 1408 | + | ) | |
| 1409 | + | .await | |
| 1410 | + | .unwrap(); | |
| 1411 | + | assert_eq!(resp.status(), StatusCode::BAD_REQUEST); | |
| 1412 | + | } | |
| 1413 | + | ||
| 1326 | 1414 | // ---- /promote/{tier} default-version resolution ---- | |
| 1327 | 1415 | ||
| 1328 | 1416 | #[tokio::test] |
| @@ -0,0 +1,20 @@ | |||
| 1 | + | // polkit rule: let the `sando` user START (only) the self-update template unit. | |
| 2 | + | // | |
| 3 | + | // Install at /etc/polkit-1/rules.d/10-sando-update.rules. | |
| 4 | + | // | |
| 5 | + | // This is the one privilege bridge that makes controller self-deploy work: | |
| 6 | + | // sandod runs unprivileged and triggers `systemctl start sando-update@<sha>`, | |
| 7 | + | // which polkit authorizes here without a password. The grant is deliberately | |
| 8 | + | // narrow — only `start`, only units whose name begins `sando-update@`, only the | |
| 9 | + | // sando user. stop/restart/enable and every other unit fall through to the | |
| 10 | + | // system default policy (i.e. denied for an unprivileged caller). | |
| 11 | + | polkit.addRule(function(action, subject) { | |
| 12 | + | if (action.id === "org.freedesktop.systemd1.manage-units" && | |
| 13 | + | subject.user === "sando") { | |
| 14 | + | var unit = action.lookup("unit"); | |
| 15 | + | var verb = action.lookup("verb"); | |
| 16 | + | if (verb === "start" && unit && unit.indexOf("sando-update@") === 0) { | |
| 17 | + | return polkit.Result.YES; | |
| 18 | + | } | |
| 19 | + | } | |
| 20 | + | }); |
| @@ -15,6 +15,47 @@ live in the Syncthing private layer (`_private/infra/`, `_private/deploy`). | |||
| 15 | 15 | | `post-receive` | git remote | Push-to-deploy hook. | | |
| 16 | 16 | | `sandod-backup-fetch.{service,timer}` | Sando host | Daily pull of the prod backup to `/srv/sando/backups/latest.sql.gz` (04:00 UTC). | | |
| 17 | 17 | | `mnw-testnot-refresh.{sh,service,timer}` | Sando host | Daily refresh of the testnot.work staging mirror (05:00 UTC). | | |
| 18 | + | | `sando-update@.service` + `sando-self-update.sh` | Sando host | Self-update: rebuild + restart `sandod` to a target sha. | | |
| 19 | + | | `10-sando-update.rules` | Sando host | polkit grant letting the `sando` user start (only) `sando-update@*`. | | |
| 20 | + | ||
| 21 | + | ## Self-update (deploying the controller itself) | |
| 22 | + | ||
| 23 | + | The deploy pipeline ships the *MNW server*, not `sandod`. To deploy a new | |
| 24 | + | `sandod` (the controller's own code), `sandod` exposes `POST /self-update | |
| 25 | + | {"sha":"<hex>"}` (bearer-gated like the other mutators). It cannot do the work | |
| 26 | + | itself — it runs `User=sando`, `NoNewPrivileges`, `ProtectSystem=strict`, so it | |
| 27 | + | can neither write `/usr/local/bin/sandod` nor restart its own service. It only | |
| 28 | + | *triggers* the root oneshot `sando-update@<sha>.service`, which the `sando` user | |
| 29 | + | is allowed to `start` (and nothing else) by `10-sando-update.rules`. That unit | |
| 30 | + | runs `sando-self-update.sh`: it builds `sando/daemon` **as the sando user** in a | |
| 31 | + | dedicated checkout (`/srv/sando/self-update`, never the operator's dev tree), | |
| 32 | + | then — as root — installs the binary and `systemctl restart sandod`. | |
| 33 | + | ||
| 34 | + | Verify a self-update landed by polling `/state`: the new `sandod_version` field | |
| 35 | + | reports the running controller's package version (distinct from the tier | |
| 36 | + | versions, which are the deployed product). | |
| 37 | + | ||
| 38 | + | One-time install (as root), then every future controller deploy is one API call: | |
| 39 | + | ||
| 40 | + | ```sh | |
| 41 | + | sudo install -d /usr/local/lib/sando | |
| 42 | + | sudo install -m 0755 sando-self-update.sh /usr/local/lib/sando/sando-self-update.sh | |
| 43 | + | sudo install -m 0644 sando-update@.service /etc/systemd/system/ | |
| 44 | + | sudo install -m 0644 10-sando-update.rules /etc/polkit-1/rules.d/ | |
| 45 | + | sudo systemctl daemon-reload | |
| 46 | + | ``` | |
| 47 | + | ||
| 48 | + | Bootstrap caveat: the *first* `sandod` carrying `/self-update` still has to be | |
| 49 | + | installed by hand (build + `install` + `systemctl restart sandod`) — there is no | |
| 50 | + | endpoint to call until it is running. Self-update is for every deploy after that. | |
| 51 | + | Trigger + watch: | |
| 52 | + | ||
| 53 | + | ```sh | |
| 54 | + | SHA=$(git -C ~/Code/MNW rev-parse HEAD) | |
| 55 | + | curl -sS -X POST "$BASE/self-update" -H 'Content-Type: application/json' \ | |
| 56 | + | -H "Authorization: Bearer $SANDO_API_TOKEN" -d "{\"sha\":\"$SHA\"}" | |
| 57 | + | journalctl -u "sando-update@$SHA" -f | |
| 58 | + | ``` | |
| 18 | 59 | ||
| 19 | 60 | ## testnot.work staging mirror | |
| 20 | 61 |
| @@ -0,0 +1,63 @@ | |||
| 1 | + | #!/usr/bin/env bash | |
| 2 | + | # Rebuild and restart sandod itself to a target commit. | |
| 3 | + | # | |
| 4 | + | # Runs as ROOT, invoked by the oneshot unit `sando-update@<sha>.service` (the | |
| 5 | + | # sha is the instance name, passed here as $1). sandod cannot do this itself: | |
| 6 | + | # it runs User=sando with NoNewPrivileges + ProtectSystem=strict, so it can | |
| 7 | + | # neither write /usr/local/bin/sandod nor restart its own service. sandod only | |
| 8 | + | # *triggers* this unit (authorized for the sando user by a scoped polkit rule); | |
| 9 | + | # the actual privileged work lives here. | |
| 10 | + | # | |
| 11 | + | # Build runs as the unprivileged build user (the sando user already carries a | |
| 12 | + | # rustup toolchain at /srv/sando/.cargo/bin); only the install + restart run as | |
| 13 | + | # root. The build uses a dedicated checkout, never the operator's dev tree. | |
| 14 | + | # | |
| 15 | + | # Config via environment (defaults shown), set in the unit or /etc/sando/sando.env: | |
| 16 | + | # SANDO_SELF_UPDATE_DIR /srv/sando/self-update build checkout parent (build-user-owned) | |
| 17 | + | # SANDO_UPSTREAM_URL git@ssh.makenot.work:max/makenotwork.git | |
| 18 | + | # SANDO_BUILD_USER sando | |
| 19 | + | # SANDO_BIN /usr/local/bin/sandod install destination | |
| 20 | + | set -euo pipefail | |
| 21 | + | ||
| 22 | + | SHA="${1:-}" | |
| 23 | + | if [[ ! "$SHA" =~ ^[0-9a-f]{7,40}$ ]]; then | |
| 24 | + | echo "sando-self-update: refusing non-hex sha: '$SHA'" >&2 | |
| 25 | + | exit 2 | |
| 26 | + | fi | |
| 27 | + | ||
| 28 | + | SELF_DIR="${SANDO_SELF_UPDATE_DIR:-/srv/sando/self-update}" | |
| 29 | + | UPSTREAM_URL="${SANDO_UPSTREAM_URL:-git@ssh.makenot.work:max/makenotwork.git}" | |
| 30 | + | BUILD_USER="${SANDO_BUILD_USER:-sando}" | |
| 31 | + | BIN="${SANDO_BIN:-/usr/local/bin/sandod}" | |
| 32 | + | REPO_DIR="$SELF_DIR/MNW" | |
| 33 | + | BUILD_HOME="$(getent passwd "$BUILD_USER" | cut -d: -f6)" | |
| 34 | + | ||
| 35 | + | echo "sando-self-update: building sandod @ $SHA as $BUILD_USER" | |
| 36 | + | ||
| 37 | + | # Fetch + checkout + build, all as the unprivileged build user. The clone is | |
| 38 | + | # created once; thereafter we just fetch the new sha. Detached checkout so the | |
| 39 | + | # dedicated tree never carries a branch to drift. | |
| 40 | + | install -d -o "$BUILD_USER" -g "$BUILD_USER" "$SELF_DIR" | |
| 41 | + | runuser -u "$BUILD_USER" -- env \ | |
| 42 | + | HOME="$BUILD_HOME" \ | |
| 43 | + | PATH="$BUILD_HOME/.cargo/bin:/usr/local/bin:/usr/bin:/bin" \ | |
| 44 | + | bash -euo pipefail -c " | |
| 45 | + | if [[ ! -d '$REPO_DIR/.git' ]]; then | |
| 46 | + | git clone '$UPSTREAM_URL' '$REPO_DIR' | |
| 47 | + | fi | |
| 48 | + | cd '$REPO_DIR' | |
| 49 | + | git fetch --prune origin | |
| 50 | + | git checkout --detach '$SHA' | |
| 51 | + | cd sando/daemon | |
| 52 | + | cargo build --release --locked | |
| 53 | + | " | |
| 54 | + | ||
| 55 | + | NEW_BIN="$REPO_DIR/sando/daemon/target/release/sandod" | |
| 56 | + | [[ -x "$NEW_BIN" ]] || { echo "sando-self-update: build produced no binary at $NEW_BIN" >&2; exit 3; } | |
| 57 | + | ||
| 58 | + | # Install + restart as root. install is atomic (writes a temp then renames), so | |
| 59 | + | # a concurrent exec of $BIN never sees a half-written file. | |
| 60 | + | echo "sando-self-update: installing $NEW_BIN -> $BIN and restarting sandod" | |
| 61 | + | install -m 0755 "$NEW_BIN" "$BIN" | |
| 62 | + | systemctl restart sandod | |
| 63 | + | echo "sando-self-update: done ($SHA live)" |
| @@ -0,0 +1,29 @@ | |||
| 1 | + | # Privileged self-update unit for the Sando controller. | |
| 2 | + | # | |
| 3 | + | # Instance name = the target git sha, e.g. `sando-update@<40-hex>.service`. | |
| 4 | + | # Triggered by sandod's POST /self-update (the sando user is authorized to | |
| 5 | + | # start this template by the polkit rule 10-sando-update.rules) — sandod itself | |
| 6 | + | # runs unprivileged and cannot install a binary or restart its own service. | |
| 7 | + | # | |
| 8 | + | # Install (one-time, as root): | |
| 9 | + | # sudo install -m 0755 sando-self-update.sh /usr/local/lib/sando/sando-self-update.sh | |
| 10 | + | # sudo install -m 0644 sando-update@.service /etc/systemd/system/ | |
| 11 | + | # sudo install -m 0644 10-sando-update.rules /etc/polkit-1/rules.d/ | |
| 12 | + | # sudo systemctl daemon-reload | |
| 13 | + | # | |
| 14 | + | # Watch a run: journalctl -u sando-update@<sha> -f | |
| 15 | + | [Unit] | |
| 16 | + | Description=Sando controller self-update to %i | |
| 17 | + | # Don't tear sandod down underneath an in-flight job needlessly; ordering only. | |
| 18 | + | After=network-online.target | |
| 19 | + | ||
| 20 | + | [Service] | |
| 21 | + | Type=oneshot | |
| 22 | + | # Root: the build drops to the sando user internally; install + restart need root. | |
| 23 | + | ExecStart=/usr/local/lib/sando/sando-self-update.sh %i | |
| 24 | + | # Inherit the operator-set knobs (upstream URL, build user, paths) if present. | |
| 25 | + | EnvironmentFile=-/etc/sando/sando.env | |
| 26 | + | TimeoutStartSec=1800 | |
| 27 | + | StandardOutput=journal | |
| 28 | + | StandardError=journal | |
| 29 | + | SyslogIdentifier=sando-update |
| @@ -1793,7 +1793,7 @@ checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" | |||
| 1793 | 1793 | ||
| 1794 | 1794 | [[package]] | |
| 1795 | 1795 | name = "sando-daemon" | |
| 1796 | - | version = "0.2.0" | |
| 1796 | + | version = "0.2.1" | |
| 1797 | 1797 | dependencies = [ | |
| 1798 | 1798 | "anyhow", | |
| 1799 | 1799 | "async-trait", | |
| @@ -1815,7 +1815,7 @@ dependencies = [ | |||
| 1815 | 1815 | ||
| 1816 | 1816 | [[package]] | |
| 1817 | 1817 | name = "sando-tui" | |
| 1818 | - | version = "0.2.0" | |
| 1818 | + | version = "0.2.1" | |
| 1819 | 1819 | dependencies = [ | |
| 1820 | 1820 | "anyhow", | |
| 1821 | 1821 | "chrono", |
| @@ -1,6 +1,6 @@ | |||
| 1 | 1 | [package] | |
| 2 | 2 | name = "sando-tui" | |
| 3 | - | version = "0.2.0" | |
| 3 | + | version = "0.2.1" | |
| 4 | 4 | edition = "2024" | |
| 5 | 5 | license = "MIT" | |
| 6 | 6 |