Skip to main content

max / makenotwork

sando: Session 1 — ship the full versioned bundle; mm → host; hook fix Phase A landed the observability. Session 1 (per plans/session-1-bundle.md) fills out the bundle so a sando release contains everything that has to change in lockstep with the binary, not just the binary itself. Changes: - config.rs adds ReleaseEntry + release_contents: Vec<ReleaseEntry>. Each entry copies worktree/<src> into staged_release/<dst>; required=false warns on missing source (older shas mid-bisect don't break the build). - build.rs gains stage_entry() helper. Drives all non-binary staging off cfg.release_contents instead of hard-coded MNW knowledge. Sando code is now project-agnostic — error-pages, static, docs are config rows. - sando-daemon.toml.example wires the MNW release_contents (5 entries: error-pages, static, docs/public, docs/examples, docs/assumptions.toml — last is a 3-source merge into one docs/ dir, matching deploy.sh). - deploy.rs rsync: add --delete (no stale assets across versions) and swap --chmod=F0755 for F+X (preserves source exec bit per-file, so binaries land 0755 and CSS/docs/etc. land 0644). - Tier "mm" renamed to "host" (§6.5 step 10 folded in). sqlite migration 002 updates tiers + nodes + deploys + gate_runs + tier_state with PRAGMA defer_foreign_keys = ON inside the migration transaction. Code sweep across events.rs, sync.rs, build.rs, topology.rs, routes.rs and test fixtures. build_and_run_mm → build_and_run_host. - bootstrap-node.sh writes the new FHS-style systemd unit: EnvironmentFile=/etc/mnw/makenotwork.env (not <release>/.env), adds ReadWritePaths=/var/lib/mnw, WorkingDirectory=<release>/current. Pre- creates /etc/mnw (root:service 0750) and /var/lib/mnw (service:service). - post-receive hook lives in the repo now (sando/deploy/post-receive), installed by bootstrap-sandod-host.sh. Sources /etc/sando/sando.env so $SANDO_DAEMON resolves to the tailnet listener instead of the 127.0.0.1 default that's been failing every push since launch. 44/44 daemon tests still green. Mac compile clean. No prod or testnot changes yet — Sessions 2/3 cover those.
Author: Max Johnson <me@maxj.phd> · 2026-06-03 01:44 UTC
Commit: f0970b802067f914611c4898f5af0a6fd44a0b7c
Parent: 3548282
13 files changed, +258 insertions, -80 deletions
@@ -0,0 +1,18 @@
1 + -- Rename the host-build tier from "mm" to "host". §6.5 step 10 of
2 + -- launchplan_final.md (folded into Session 1 since we're already touching
3 + -- topology code). "mm" was misleading — sandod runs on whatever machine
4 + -- ends up being the Sando host (currently pop-os, not a MakeMachine).
5 + --
6 + -- All five tables that key by tier name get the same UPDATE. SQLite defers
7 + -- FK checks to commit when defer_foreign_keys is on inside a transaction,
8 + -- so order between parent (tiers) + children (nodes, deploys, gate_runs,
9 + -- tier_state) doesn't matter — the transaction is consistent at COMMIT.
10 + -- sqlx wraps each migration in a transaction.
11 +
12 + PRAGMA defer_foreign_keys = ON;
13 +
14 + UPDATE tiers SET name = 'host' WHERE name = 'mm';
15 + UPDATE nodes SET tier = 'host' WHERE tier = 'mm';
16 + UPDATE deploys SET tier = 'host' WHERE tier = 'mm';
17 + UPDATE gate_runs SET tier = 'host' WHERE tier = 'mm';
18 + UPDATE tier_state SET tier = 'host' WHERE tier = 'mm';
@@ -115,10 +115,12 @@ pub async fn run(
115 115 Ok(BuildArtifact { version, git_sha: sha, worktree, binary_paths })
116 116 }
117 117
118 - /// Full MM-tier pipeline: build, deploy the binary into MM's release_root,
119 - /// run MM's configured gates against the worktree, set tier_state.mm if all
120 - /// pass. Errors propagate back to the spawned task and get logged.
121 - pub async fn build_and_run_mm(
118 + /// Full host-tier pipeline: build, stage the bundle into the host's
119 + /// release_root, run the host tier's configured gates, advance tier_state
120 + /// for "host" if all pass. Errors propagate back to the spawned task and
121 + /// get logged. (Tier was called "mm" pre-Session-1; renamed to "host"
122 + /// since sandod runs on whatever machine ends up being the Sando host.)
123 + pub async fn build_and_run_host(
122 124 pool: SqlitePool,
123 125 cfg: Arc<Config>,
124 126 topo: Arc<Topology>,
@@ -127,29 +129,17 @@ pub async fn build_and_run_mm(
127 129 ) -> Result<()> {
128 130 let art = run(pool.clone(), cfg.clone(), topo.clone(), sha, events.clone()).await?;
129 131
130 - // Stage the binary in MM's release_root so future gates and the MM
131 - // self-deploy point at a stable path, not the worktree's target/.
132 - let mm_release_root = &cfg.release_root;
133 - let staged = deploy::deploy_local(mm_release_root, &art.version, &art.binary_paths).await?;
134 -
135 - // Bring error-pages alongside the binaries so the deploy rsync ships the
136 - // static HTML to every node. Caddy on each node references
137 - // <release_root>/current/error-pages/. Skipped silently if the worktree
138 - // doesn't have them (older shas, or non-MNW projects using this daemon).
139 - let error_pages_src = art.worktree.join("server/deploy/error-pages");
140 - if error_pages_src.exists() {
141 - let out = Command::new("cp")
142 - .arg("-a")
143 - .arg(&error_pages_src)
144 - .arg(staged.join("error-pages"))
145 - .output()
146 - .await
147 - .context("spawning cp for error-pages")?;
148 - anyhow::ensure!(
149 - out.status.success(),
150 - "copying error-pages into staged dir: {}",
151 - String::from_utf8_lossy(&out.stderr),
152 - );
132 + // Stage the binary in the host's release_root so future gates and the
133 + // host self-deploy point at a stable path, not the worktree's target/.
134 + let host_release_root = &cfg.release_root;
135 + let staged = deploy::deploy_local(host_release_root, &art.version, &art.binary_paths).await?;
136 +
137 + // Stage every entry from cfg.release_contents into the staged release dir.
138 + // This is how non-binary version-coupled content (static assets, docs,
139 + // error-pages, ...) makes it into the atomic deploy bundle. Projects opt
140 + // in via daemon config — the sando code carries no MNW-specific knowledge.
141 + for entry in &cfg.release_contents {
142 + stage_entry(&art.worktree, &staged, entry).await?;
153 143 }
154 144
155 145 let staged_bin = staged.join(cfg.primary_bin());
@@ -159,37 +149,36 @@ pub async fn build_and_run_mm(
159 149 .execute(&pool)
160 150 .await?;
161 151
162 - // Find the MM tier's gate list. MM is conventionally named "mm".
163 - let mm = topo.tiers.iter().find(|t| t.name == "mm")
164 - .context("topology has no `mm` tier")?;
152 + let host = topo.tiers.iter().find(|t| t.name == "host")
153 + .context("topology has no `host` tier")?;
165 154
166 155 let ctx = GateCtx {
167 156 pool: pool.clone(),
168 157 cfg: cfg.clone(),
169 - tier: "mm".to_string(),
158 + tier: "host".to_string(),
170 159 version: art.version.clone(),
171 160 worktree: art.worktree.clone(),
172 161 events: events.clone(),
173 162 };
174 - let ok = gates::run_all(&ctx, &mm.gates).await?;
163 + let ok = gates::run_all(&ctx, &host.gates).await?;
175 164
176 165 if ok {
177 166 let prev: Option<String> = sqlx::query_scalar(
178 - "SELECT current_version FROM tier_state WHERE tier = 'mm'",
167 + "SELECT current_version FROM tier_state WHERE tier = 'host'",
179 168 )
180 169 .fetch_optional(&pool).await?.flatten();
181 170 sqlx::query(
182 171 "UPDATE tier_state SET previous_version = ?, current_version = ?, burn_in_started_at = ?
183 - WHERE tier = 'mm'",
172 + WHERE tier = 'host'",
184 173 )
185 174 .bind(prev)
186 175 .bind(&art.version)
187 176 .bind(Utc::now().to_rfc3339())
188 177 .execute(&pool)
189 178 .await?;
190 - tracing::info!(version = %art.version, "MM pipeline green; ready to promote to next tier");
179 + tracing::info!(version = %art.version, "host pipeline green; ready to promote to next tier");
191 180 } else {
192 - tracing::warn!(version = %art.version, "MM pipeline red; not advancing tier_state");
181 + tracing::warn!(version = %art.version, "host pipeline red; not advancing tier_state");
193 182 }
194 183 Ok(())
195 184 }
@@ -209,3 +198,60 @@ fn tail(buf: &[u8], max: usize) -> String {
209 198 let s = String::from_utf8_lossy(buf);
210 199 if s.len() <= max { s.into_owned() } else { s[s.len() - max..].to_string() }
211 200 }
201 +
202 + /// Copy `worktree/<entry.src>` into `staged/<entry.dst>`. Handles file or
203 + /// directory sources transparently. Missing source policy depends on
204 + /// `entry.required`:
205 + /// - required=true -> error (build fails)
206 + /// - required=false -> log warn + skip (e.g. older shas missing a dir)
207 + ///
208 + /// Uses `cp -a` to preserve modes/symlinks/etc; parent of dst is created if
209 + /// needed so entries like `dst = "docs/assumptions.toml"` work without
210 + /// extra config.
211 + async fn stage_entry(
212 + worktree: &Path,
213 + staged: &Path,
214 + entry: &crate::config::ReleaseEntry,
215 + ) -> Result<()> {
216 + let src = worktree.join(&entry.src);
217 + let dst = staged.join(&entry.dst);
218 + if !src.exists() {
219 + if entry.required {
220 + anyhow::bail!("required release_contents source missing: {}", src.display());
221 + }
222 + tracing::warn!(src = %src.display(), "release_contents source missing (optional); skipping");
223 + return Ok(());
224 + }
225 + if let Some(parent) = dst.parent() {
226 + tokio::fs::create_dir_all(parent).await
227 + .with_context(|| format!("create staged parent {}", parent.display()))?;
228 + }
229 + // Multiple entries with the same dst (e.g. site-docs/public/ +
230 + // site-docs/examples/ both landing under docs/) need additive merging.
231 + // `cp -a SRC/. DST/` copies SRC's contents into DST without overwriting
232 + // the dst dir itself; that's the merge-friendly form when dst is a dir
233 + // that may already exist from a prior entry. For non-dir sources or a
234 + // missing dst we fall back to the plain `cp -a SRC DST` form.
235 + let merge_into_existing_dir = src.is_dir() && dst.is_dir();
236 + let mut cmd = Command::new("cp");
237 + cmd.arg("-a");
238 + if merge_into_existing_dir {
239 + let mut src_arg = src.clone().into_os_string();
240 + src_arg.push("/.");
241 + cmd.arg(src_arg);
242 + let mut dst_arg = dst.clone().into_os_string();
243 + dst_arg.push("/");
244 + cmd.arg(dst_arg);
245 + } else {
246 + cmd.arg(&src).arg(&dst);
247 + }
248 + let out = cmd.output().await
249 + .with_context(|| format!("spawning cp for {} -> {}", src.display(), dst.display()))?;
250 + anyhow::ensure!(
251 + out.status.success(),
252 + "stage {} -> {}: {}",
253 + src.display(), dst.display(),
254 + String::from_utf8_lossy(&out.stderr),
255 + );
256 + Ok(())
257 + }
@@ -26,6 +26,29 @@ pub struct Config {
26 26 /// Served via `GET /logs/{version}/{gate}`. Defaults to `/srv/sando/logs`.
27 27 #[serde(default = "default_logs_root")]
28 28 pub logs_root: PathBuf,
29 + /// Non-binary contents to stage into each release dir alongside
30 + /// `bin_names`. Each entry copies `worktree/<src>` into
31 + /// `<release>/<dst>`. `required=false` makes a missing source a warn
32 + /// (older shas missing one of these don't break sando mid-bisect);
33 + /// `required=true` errors. Default is empty — projects opt-in via
34 + /// daemon config so the sando code stays project-agnostic.
35 + #[serde(default)]
36 + pub release_contents: Vec<ReleaseEntry>,
37 + }
38 +
39 + /// A directory or file copied from the worktree into the staged release dir.
40 + /// Multiple entries with the same `dst` are allowed and merged (used by MNW
41 + /// to build `docs/` from three different worktree sources).
42 + #[derive(Debug, Clone, Deserialize)]
43 + pub struct ReleaseEntry {
44 + /// Path relative to the worktree root (e.g. `server/static`).
45 + pub src: PathBuf,
46 + /// Path relative to the release dir (e.g. `static`). Parent dirs are
47 + /// created as needed.
48 + pub dst: PathBuf,
49 + /// If true, a missing source aborts the build. If false, log warn + skip.
50 + #[serde(default)]
51 + pub required: bool,
29 52 }
30 53
31 54 fn default_bin_names() -> Vec<String> { vec!["server".into()] }
@@ -124,17 +124,22 @@ async fn deploy_remote(
124 124 .context("creating remote release dir")?;
125 125
126 126 tracing::info!(node = %node.name, version, primary = %primary_bin, "deploy: rsync release dir");
127 - // Rsync the whole staged dir (all binaries + any sibling artifacts like
128 - // error-pages). Trailing slash on source = contents of dir, not the dir
129 - // itself. --chmod ensures binaries land executable; the regular-file
130 - // mask leaves data files at 0644.
127 + // Rsync the whole staged dir (binaries + every release_contents entry).
128 + // Trailing slash on source = contents of dir, not the dir itself.
129 + //
130 + // --delete: removed assets across versions don't accumulate on the
131 + // target. Bundle stays self-contained per version.
132 + // --chmod: `F+X` preserves execute bit per-file (binaries land 0755,
133 + // data files 0644) instead of the old blanket-0755 that was wrong for
134 + // static assets + docs.
131 135 let rsync_src = format!("{}/", staged_release_dir.display());
132 136 let rsync_dest = format!("{ssh_target}:{release_dir}/");
133 137 let mut rsync = Command::new("rsync");
134 138 rsync
135 139 .arg("-az")
136 140 .arg("--partial")
137 - .arg("--chmod=F0755,D0755")
141 + .arg("--delete")
142 + .arg("--chmod=Du=rwx,Dgo=rx,Fu=rw,Fgo=r,F+X")
138 143 .arg("-e")
139 144 .arg(format!(
140 145 "ssh {}",
@@ -93,7 +93,7 @@ mod tests {
93 93 let env = EventEnvelope {
94 94 at: Utc::now(),
95 95 event: Event::GateStart {
96 - tier: "mm".into(),
96 + tier: "host".into(),
97 97 version: "0.8.12".into(),
98 98 gate: "cargo_test".into(),
99 99 },
@@ -101,7 +101,7 @@ mod tests {
101 101 let s = serde_json::to_string(&env).unwrap();
102 102 let v: serde_json::Value = serde_json::from_str(&s).unwrap();
103 103 assert_eq!(v["kind"], "gate_start");
104 - assert_eq!(v["tier"], "mm");
104 + assert_eq!(v["tier"], "host");
105 105 assert_eq!(v["gate"], "cargo_test");
106 106 // No nested `event` object.
107 107 assert!(v.get("event").is_none());
@@ -417,7 +417,7 @@ async fn rebuild(
417 417 let events_for_task = s.events.clone();
418 418 let sha_for_task = sha.clone();
419 419 let handle = tokio::spawn(async move {
420 - if let Err(e) = crate::build::build_and_run_mm(pool, cfg, topo, sha_for_task.clone(), events_for_task).await {
420 + if let Err(e) = crate::build::build_and_run_host(pool, cfg, topo, sha_for_task.clone(), events_for_task).await {
421 421 tracing::error!(sha = %sha_for_task, error = %e, "rebuild pipeline failed");
422 422 }
423 423 });
@@ -573,7 +573,7 @@ mod tests {
573 573 },
574 574 tiers: vec![
575 575 Tier {
576 - name: "mm".into(),
576 + name: "host".into(),
577 577 provisioned: true,
578 578 gates: vec![],
579 579 canary: CanaryPolicy::Sequential,
@@ -605,13 +605,14 @@ mod tests {
605 605 scratch_db_url: None,
606 606 bin_names: vec!["makenotwork".into()],
607 607 logs_root: PathBuf::from("/tmp/sando-logs"),
608 + release_contents: vec![],
608 609 }
609 610 }
610 611
611 612 async fn test_state() -> AppState {
612 613 let pool = fresh_pool().await;
613 614 // Seed tier rows so FKs on tier_state / gate_runs are satisfied.
614 - for (i, name) in ["mm", "a"].iter().enumerate() {
615 + for (i, name) in ["host", "a"].iter().enumerate() {
615 616 sqlx::query(
616 617 "INSERT INTO tiers (name, ord, provisioned, canary) VALUES (?, ?, 1, 'sequential')",
617 618 )
@@ -664,18 +665,18 @@ mod tests {
664 665 // empty as "all green" which is correct iff the predecessor tier
665 666 // has no configured gates. The topology validation is upstream.
666 667 let pool = fresh_pool().await;
667 - seed(&pool, "mm", "0.8.12").await;
668 - let pending = unsatisfied_gates(&pool, "mm", "0.8.12", false).await.unwrap();
668 + seed(&pool, "host", "0.8.12").await;
669 + let pending = unsatisfied_gates(&pool, "host", "0.8.12", false).await.unwrap();
669 670 assert_eq!(pending, Vec::<String>::new());
670 671 }
671 672
672 673 #[tokio::test]
673 674 async fn unsatisfied_gates_flags_failed_kind() {
674 675 let pool = fresh_pool().await;
675 - seed(&pool, "mm", "0.8.12").await;
676 - insert_gate(&pool, "mm", "0.8.12", "cargo_test", 0).await;
677 - insert_gate(&pool, "mm", "0.8.12", "boot_smoke", 1).await;
678 - let pending = unsatisfied_gates(&pool, "mm", "0.8.12", false).await.unwrap();
676 + seed(&pool, "host", "0.8.12").await;
677 + insert_gate(&pool, "host", "0.8.12", "cargo_test", 0).await;
678 + insert_gate(&pool, "host", "0.8.12", "boot_smoke", 1).await;
679 + let pending = unsatisfied_gates(&pool, "host", "0.8.12", false).await.unwrap();
679 680 assert_eq!(pending, vec!["cargo_test".to_string()]);
680 681 }
681 682
@@ -684,10 +685,10 @@ mod tests {
684 685 // Two runs of the same gate; only the latest counts. A flap from
685 686 // red to green should clear the pending entry.
686 687 let pool = fresh_pool().await;
687 - seed(&pool, "mm", "0.8.12").await;
688 - insert_gate(&pool, "mm", "0.8.12", "cargo_test", 0).await;
689 - insert_gate(&pool, "mm", "0.8.12", "cargo_test", 1).await;
690 - let pending = unsatisfied_gates(&pool, "mm", "0.8.12", false).await.unwrap();
688 + seed(&pool, "host", "0.8.12").await;
689 + insert_gate(&pool, "host", "0.8.12", "cargo_test", 0).await;
690 + insert_gate(&pool, "host", "0.8.12", "cargo_test", 1).await;
691 + let pending = unsatisfied_gates(&pool, "host", "0.8.12", false).await.unwrap();
691 692 assert!(pending.is_empty());
692 693 }
693 694
@@ -713,16 +714,16 @@ mod tests {
713 714 #[tokio::test]
714 715 async fn unsatisfied_gates_ignores_other_tiers_and_versions() {
715 716 let pool = fresh_pool().await;
716 - seed(&pool, "mm", "0.8.12").await;
717 - seed(&pool, "mm", "0.8.11").await;
717 + seed(&pool, "host", "0.8.12").await;
718 + seed(&pool, "host", "0.8.11").await;
718 719 seed(&pool, "a", "0.8.12").await;
719 720 // Mark mm/0.8.12 cargo_test failing, but unrelated tiers/versions
720 721 // shouldn't pollute the query.
721 - insert_gate(&pool, "mm", "0.8.12", "cargo_test", 0).await;
722 + insert_gate(&pool, "host", "0.8.12", "cargo_test", 0).await;
722 723 insert_gate(&pool, "a", "0.8.12", "cargo_test", 0).await;
723 - insert_gate(&pool, "mm", "0.8.11", "cargo_test", 0).await;
724 + insert_gate(&pool, "host", "0.8.11", "cargo_test", 0).await;
724 725
725 - let pending = unsatisfied_gates(&pool, "mm", "0.8.12", false).await.unwrap();
726 + let pending = unsatisfied_gates(&pool, "host", "0.8.12", false).await.unwrap();
726 727 assert_eq!(pending, vec!["cargo_test".to_string()]);
727 728 }
728 729
@@ -732,14 +733,14 @@ mod tests {
732 733 // should NOT be treated as green. Otherwise a race could promote
733 734 // before the gate concludes.
734 735 let pool = fresh_pool().await;
735 - seed(&pool, "mm", "0.8.12").await;
736 + seed(&pool, "host", "0.8.12").await;
736 737 sqlx::query(
737 738 "INSERT INTO gate_runs (version, tier, gate_kind, started_at) \
738 - VALUES ('0.8.12', 'mm', 'cargo_test', datetime('now'))",
739 + VALUES ('0.8.12', 'host', 'cargo_test', datetime('now'))",
739 740 )
740 741 .execute(&pool).await.unwrap();
741 742
742 - let pending = unsatisfied_gates(&pool, "mm", "0.8.12", false).await.unwrap();
743 + let pending = unsatisfied_gates(&pool, "host", "0.8.12", false).await.unwrap();
743 744 assert_eq!(pending, vec!["cargo_test".to_string()]);
744 745 }
745 746
@@ -822,14 +823,14 @@ mod tests {
822 823
823 824 #[tokio::test]
824 825 async fn promote_to_first_tier_is_rejected() {
825 - // tier 0 is mm — you /rebuild, not /promote.
826 + // tier 0 is host — you /rebuild, not /promote.
826 827 let state = test_state().await;
827 828 let app = router(state);
828 829 let resp = app
829 830 .oneshot(
830 831 Request::builder()
831 832 .method("POST")
832 - .uri("/promote/mm")
833 + .uri("/promote/host")
833 834 .body(Body::empty())
834 835 .unwrap(),
835 836 )
@@ -159,7 +159,7 @@ mod tests {
159 159 async fn syncs_tiers_nodes_and_inits_tier_state() {
160 160 let pool = fresh_pool().await;
161 161 let t = topo(vec![
162 - tier("mm", true, vec![]),
162 + tier("host", true, vec![]),
163 163 tier("a", true, vec![node("testnot-1")]),
164 164 tier("c", false, vec![]),
165 165 ]);
@@ -168,7 +168,7 @@ mod tests {
168 168
169 169 let tier_names: Vec<String> = sqlx::query_scalar("SELECT name FROM tiers ORDER BY ord")
170 170 .fetch_all(&pool).await.unwrap();
171 - assert_eq!(tier_names, vec!["mm", "a", "c"]);
171 + assert_eq!(tier_names, vec!["host", "a", "c"]);
172 172
173 173 let node_names: Vec<String> = sqlx::query_scalar("SELECT name FROM nodes")
174 174 .fetch_all(&pool).await.unwrap();
@@ -182,7 +182,7 @@ mod tests {
182 182 #[tokio::test]
183 183 async fn second_sync_is_idempotent() {
184 184 let pool = fresh_pool().await;
185 - let t = topo(vec![tier("mm", true, vec![]), tier("a", true, vec![node("n1")])]);
185 + let t = topo(vec![tier("host", true, vec![]), tier("a", true, vec![node("n1")])]);
186 186 sync(&pool, &t).await.unwrap();
187 187 sync(&pool, &t).await.unwrap();
188 188
@@ -210,7 +210,7 @@ mod tests {
210 210 #[tokio::test]
211 211 async fn refuses_to_drop_tier_with_pinned_version() {
212 212 let pool = fresh_pool().await;
213 - let t1 = topo(vec![tier("mm", true, vec![]), tier("a", true, vec![])]);
213 + let t1 = topo(vec![tier("host", true, vec![]), tier("a", true, vec![])]);
214 214 sync(&pool, &t1).await.unwrap();
215 215
216 216 // Simulate a version being deployed on tier a.
@@ -219,7 +219,7 @@ mod tests {
219 219 sqlx::query("UPDATE tier_state SET current_version = '0.1.0' WHERE tier = 'a'")
220 220 .execute(&pool).await.unwrap();
221 221
222 - let t2 = topo(vec![tier("mm", true, vec![])]);
222 + let t2 = topo(vec![tier("host", true, vec![])]);
223 223 let err = sync(&pool, &t2).await.unwrap_err();
224 224 assert!(err.to_string().contains("tier_state still pins"), "got: {err}");
225 225 }
@@ -86,7 +86,7 @@ impl Topology {
86 86 fn validate(&self) -> Result<()> {
87 87 anyhow::ensure!(!self.tiers.is_empty(), "topology must declare at least one tier");
88 88 for t in &self.tiers {
89 - if t.provisioned && t.nodes.is_empty() && t.name != "mm" {
89 + if t.provisioned && t.nodes.is_empty() && t.name != "host" {
90 90 anyhow::bail!("tier {} is provisioned but has no nodes", t.name);
91 91 }
92 92 }
@@ -40,6 +40,12 @@ if [[ -z "${SANDO_PUBKEY:-}" ]]; then
40 40 fi
41 41
42 42 DEPLOY_ROOT="${DEPLOY_ROOT:-/opt/mnw}"
43 + # FHS-style sidecar paths the systemd unit references. Bootstrap creates the
44 + # dirs but does not populate `ENV_FILE` — operator drops secrets in after the
45 + # bootstrap finishes, before starting the service.
46 + ETC_DIR="${ETC_DIR:-/etc/mnw}"
47 + ENV_FILE="${ENV_FILE:-$ETC_DIR/makenotwork.env}"
48 + STATE_DIR="${STATE_DIR:-/var/lib/mnw}"
43 49 BIN_NAME="${BIN_NAME:-makenotwork}"
44 50 SERVICE_NAME="${SERVICE_NAME:-makenotwork.service}"
45 51 SERVICE_USER="${SERVICE_USER:-deploy}"
@@ -106,6 +112,11 @@ fi
106 112 chown "$SERVICE_USER:$SERVICE_USER" "/home/$SERVICE_USER/.ssh/authorized_keys"
107 113 chmod 0600 "/home/$SERVICE_USER/.ssh/authorized_keys"
108 114 install -d -o "$SERVICE_USER" -g "$SERVICE_USER" -m 0755 "$DEPLOY_ROOT" "$DEPLOY_ROOT/releases"
115 + # FHS sidecars: /etc/mnw owned root:service (so the service can read the env
116 + # file but not edit it); /var/lib/mnw owned service:service for runtime
117 + # state (backups, scan-spool, anything else the binary writes).
118 + install -d -o root -g "$SERVICE_USER" -m 0750 "$ETC_DIR"
119 + install -d -o "$SERVICE_USER" -g "$SERVICE_USER" -m 0750 "$STATE_DIR"
109 120
110 121 log "6/8 sudoers (systemctl on $SERVICE_NAME for $SERVICE_USER)"
111 122 cat > "/etc/sudoers.d/${SERVICE_USER}-mnw" <<EOF
@@ -124,9 +135,14 @@ After=network.target
124 135 Type=simple
125 136 User=$SERVICE_USER
126 137 Group=$SERVICE_USER
127 - WorkingDirectory=$DEPLOY_ROOT
138 + WorkingDirectory=$DEPLOY_ROOT/current
128 139 ExecStart=$DEPLOY_ROOT/current/$BIN_NAME
129 - EnvironmentFile=-$DEPLOY_ROOT/.env
140 + # Secrets live outside the release dir so they survive deploys + rollbacks.
141 + # Bootstrap creates ETC_DIR but not ENV_FILE — operator populates that.
142 + EnvironmentFile=$ENV_FILE
143 + # Runtime state (backups, spool, etc.) on FHS path; never inside the release
144 + # dir or the deploy will erase it.
145 + ReadWritePaths=$STATE_DIR
130 146 Restart=on-failure
131 147 RestartSec=30
132 148 # Exit 2 = migration failure (MNW server convention). Don't restart;
@@ -160,5 +176,5 @@ log "Done. Next steps for the operator:"
160 176 echo " - tailscale up (auth this node to the tailnet)"
161 177 echo " - DNS A/AAAA records for the domain you'll serve"
162 178 echo " - Install /etc/caddy/Caddyfile + Cloudflare Origin CA cert + key"
163 - echo " - postgres: create role+db, drop secrets into $DEPLOY_ROOT/.env"
179 + echo " - postgres: create role+db, drop secrets into $ENV_FILE (chmod 0640, chown root:$SERVICE_USER)"
164 180 echo " - Run a sando deploy from the Sando host: POST /promote/<tier>"
@@ -209,10 +209,15 @@ else
209 209 log "12/13 skipping sandod build (BUILD_SANDOD=0)"
210 210 fi
211 211
212 - log "13/13 bare mnw.git"
212 + log "13/13 bare mnw.git + post-receive hook"
213 213 if [[ ! -d "$SANDO_HOME/mnw.git" ]]; then
214 214 sudo -u "$SANDO_USER" git init --bare --initial-branch=main "$SANDO_HOME/mnw.git" >/dev/null
215 215 fi
216 + # Install (or refresh) the post-receive hook that POSTs to sandod on push.
217 + # Sourced from the repo so updates here propagate to the next bootstrap run.
218 + install -m 0755 -o "$SANDO_USER" -g "$SANDO_USER" \
219 + "$SCRIPT_DIR/post-receive" \
220 + "$SANDO_HOME/mnw.git/hooks/post-receive"
216 221
217 222 # Enable services last so a partial bootstrap doesn't leave a service trying
218 223 # to start against an incomplete environment.
@@ -0,0 +1,37 @@
1 + #!/usr/bin/env bash
2 + # Sando bare-repo post-receive hook.
3 + #
4 + # Installed at <bare repo>/hooks/post-receive by bootstrap-sandod-host.sh.
5 + # Reads each updated ref from stdin (old new ref) and posts the new sha to
6 + # the daemon's /rebuild endpoint. Only the configured deploy branch is
7 + # acted on; pushes to other refs are silently ignored.
8 +
9 + set -euo pipefail
10 +
11 + # Source the operator's sando.env so $SANDO_DAEMON resolves to the tailnet
12 + # listener, not the 127.0.0.1 default. Hooks run in the ssh push context with
13 + # no environment, so this source step is load-bearing. Tolerate missing file
14 + # so the hook still works in a dev clone.
15 + if [[ -f /etc/sando/sando.env ]]; then
16 + # shellcheck disable=SC1091
17 + source /etc/sando/sando.env
18 + fi
19 +
20 + DAEMON_URL="${SANDO_DAEMON:-http://127.0.0.1:7766}"
21 + DEPLOY_BRANCH="${SANDO_BRANCH:-main}"
22 +
23 + while read -r oldsha newsha ref; do
24 + if [[ "$ref" != "refs/heads/$DEPLOY_BRANCH" ]]; then
25 + continue
26 + fi
27 + if [[ "$newsha" == "0000000000000000000000000000000000000000" ]]; then
28 + # Branch deletion; nothing to build.
29 + continue
30 + fi
31 + echo "sando: posting rebuild for $newsha"
32 + curl --silent --show-error --fail \
33 + -X POST "$DAEMON_URL/rebuild" \
34 + -H 'Content-Type: application/json' \
35 + -d "{\"sha\":\"$newsha\"}" \
36 + || echo "sando: rebuild trigger failed; check daemon"
37 + done
@@ -9,3 +9,30 @@ release_root = "/srv/sando"
9 9 scratch_db_url = "postgres:///sando_scratch?host=/var/run/postgresql"
10 10 bin_names = ["makenotwork", "mnw-admin"]
11 11 logs_root = "/srv/sando/logs"
12 +
13 + # Non-binary content shipped as part of each release. Multiple entries can
14 + # target the same `dst` (additive merge — used to build `docs/` from three
15 + # worktree sources). Sources are relative to the worktree root; dsts are
16 + # relative to <release_root>/releases/<v>/.
17 + [[release_contents]]
18 + src = "server/deploy/error-pages"
19 + dst = "error-pages"
20 +
21 + [[release_contents]]
22 + src = "server/static"
23 + dst = "static"
24 + required = true
25 +
26 + [[release_contents]]
27 + src = "server/site-docs/public"
28 + dst = "docs/public"
29 + required = true
30 +
31 + [[release_contents]]
32 + src = "server/site-docs/examples"
33 + dst = "docs/examples"
34 +
35 + [[release_contents]]
36 + src = "server/docs/business/assumptions.toml"
37 + dst = "docs/assumptions.toml"
38 + required = true
@@ -8,9 +8,9 @@
8 8 # declared but not provisioned; adding the second prod node later is a config
9 9 # edit (set provisioned = true, fill in [[tier.node]]).
10 10 #
11 - # Note: the host tier is named "mm" for legacy reasons (string identifier baked
12 - # into the schema + code). It refers to whatever machine sandod runs on —
13 - # currently pop-os, not a MakeMachine. Rename is a follow-up cleanup.
11 + # The first tier is "host" — it refers to whatever machine sandod runs on
12 + # (currently pop-os). Renamed from the legacy "mm" name in Session 1 of
13 + # the sando bundle redesign.
14 14
15 15 [repo]
16 16 bare_path = "/srv/sando/mnw.git"
@@ -25,7 +25,7 @@ local_path = "/srv/sando/backups/latest.sql.gz"
25 25
26 26 # ---- host: pop-os local pre-staging gate ----
27 27 [[tier]]
28 - name = "mm"
28 + name = "host"
29 29 provisioned = true
30 30 canary = "sequential"
31 31 gates = [