Skip to main content

max / makenotwork

sando: close Phase B — drop gate_runs shadow columns Migration 005 drops `gate_runs.passed` and `gate_runs.detail`. The shadow window after migration 003's typed-outcomes landing has elapsed and every reader now consults `status` / `outcome_json`. Runner-side: gates.rs no longer double-writes shadow columns; Event::GateDone drops its `passed` field; manual_confirm's lookup keys on `status='passed'`. Routes: GateView slimmed to typed fields, unsatisfied_gates rewired off `status` (NULL or non-passed counts as unsatisfied), POST /confirm inserts typed `outcome_json`. TUI's GateView drops the `passed` shim. outcome::legacy_passed renames to is_passed and legacy_detail goes away. 77 daemon + 19 TUI tests passing.
Author: Max Johnson <me@maxj.phd> · 2026-06-04 02:26 UTC
Commit: 2ca0e4b8dec763dd201736156e2e5d1ed51e4729
Parent: 4694f44
7 files changed, +70 insertions, -119 deletions
@@ -1614,7 +1614,7 @@ checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f"
1614 1614
1615 1615 [[package]]
1616 1616 name = "sando-daemon"
1617 - version = "0.1.0"
1617 + version = "0.2.0"
1618 1618 dependencies = [
1619 1619 "anyhow",
1620 1620 "axum",
@@ -0,0 +1,18 @@
1 + -- Drop the shadow `passed` and `detail` columns on `gate_runs`.
2 + --
3 + -- These were retained for one release after migration 003 (per the
4 + -- observability plan's "shadow for one release" decision) so any consumer
5 + -- still on the pre-typed schema kept working through a rollback window.
6 + -- That window has elapsed: prod has been running typed outcomes since
7 + -- 0.9.x, and every reader (sandod's /state, TUI, gate runners) now
8 + -- consults `status` / `outcome_json`.
9 + --
10 + -- After this migration:
11 + -- - `gate_runs.status` is the high-level word for filtering
12 + -- - `gate_runs.outcome_json` is the source of truth (typed GateOutcome)
13 + -- - `gate_runs.log_ref` points to on-disk stdout/stderr
14 + --
15 + -- Routes, runners, and the TUI no longer reference the dropped columns.
16 +
17 + ALTER TABLE gate_runs DROP COLUMN passed;
18 + ALTER TABLE gate_runs DROP COLUMN detail;
@@ -53,14 +53,13 @@ pub enum Event {
53 53 seq: u32,
54 54 text: String,
55 55 },
56 - /// `passed` is a shadow field. `outcome` is the source of truth and
57 - /// carries classification, blocker variants, and the log_ref.
56 + /// `outcome` carries classification, blocker variants, and the log_ref —
57 + /// consumers should read `outcome.status` to decide pass/fail/blocked.
58 58 GateDone {
59 59 run_id: GateRunId,
60 60 tier: TierId,
61 61 version: Version,
62 62 gate: GateKind,
63 - passed: bool,
64 63 outcome: GateOutcome,
65 64 },
66 65 DeployStart { tier: TierId, node: NodeId, version: Version },
@@ -68,20 +68,14 @@ pub async fn run(ctx: &GateCtx, gate: &Gate) -> Result<GateOutcome> {
68 68 legacy_detail: Some(format!("gate runner errored: {e}")),
69 69 }));
70 70
71 - // Migration 003 added status/outcome_json/log_ref. Until migration 004
72 - // drops them, we double-write the shadow `passed` and `detail` columns
73 - // so any consumer still reading the old schema keeps working.
74 71 let outcome_json = serde_json::to_string(&outcome)
75 72 .unwrap_or_else(|e| format!("{{\"_serialize_error\":{e:?}}}"));
76 73 sqlx::query(
77 74 "UPDATE gate_runs
78 - SET finished_at = ?, passed = ?, detail = ?,
79 - status = ?, outcome_json = ?, log_ref = ?
75 + SET finished_at = ?, status = ?, outcome_json = ?, log_ref = ?
80 76 WHERE id = ?",
81 77 )
82 78 .bind(Utc::now().to_rfc3339())
83 - .bind(outcome.legacy_passed() as i64)
84 - .bind(outcome.legacy_detail())
85 79 .bind(outcome.status_str())
86 80 .bind(&outcome_json)
87 81 .bind(outcome.log_ref.as_ref().map(|l| l.as_str()))
@@ -98,7 +92,6 @@ pub async fn run(ctx: &GateCtx, gate: &Gate) -> Result<GateOutcome> {
98 92 tier: ctx.tier.clone(),
99 93 version: ctx.version.clone(),
100 94 gate: kind,
101 - passed: outcome.legacy_passed(),
102 95 outcome: outcome.clone(),
103 96 });
104 97
@@ -114,7 +107,7 @@ pub async fn run_all(ctx: &GateCtx, gates: &[Gate]) -> Result<bool> {
114 107 let mut all_ok = true;
115 108 for g in gates {
116 109 let o = run(ctx, g).await?;
117 - if !o.legacy_passed() {
110 + if !o.is_passed() {
118 111 all_ok = false;
119 112 }
120 113 }
@@ -477,12 +470,13 @@ async fn burn_in(ctx: &GateCtx, hours: u32) -> Result<GateOutcome> {
477 470 }
478 471
479 472 async fn manual_confirm(ctx: &GateCtx) -> Result<GateOutcome> {
480 - // Pass iff a row in gate_runs exists with passed=1 for this (tier, version, manual_confirm)
481 - // that was inserted out-of-band by an operator action. Since the harness inserts the
482 - // in-flight row itself, look for a prior confirmation row.
473 + // Pass iff a row in gate_runs exists with status='passed' for this
474 + // (tier, version, manual_confirm) that was inserted out-of-band by an
475 + // operator action. Since the harness inserts the in-flight row itself,
476 + // look for a prior confirmation row.
483 477 let prior_at: Option<String> = sqlx::query_scalar(
484 478 "SELECT finished_at FROM gate_runs
485 - WHERE tier = ? AND version = ? AND gate_kind = 'manual_confirm' AND passed = 1
479 + WHERE tier = ? AND version = ? AND gate_kind = 'manual_confirm' AND status = 'passed'
486 480 ORDER BY id DESC LIMIT 1",
487 481 )
488 482 .bind(&ctx.tier)
@@ -537,10 +531,9 @@ mod tests {
537 531
538 532 /// burn_in returns a typed Blocked when the clock isn't started; the
539 533 /// runner persists status='blocked' + outcome_json (the json carries
540 - /// blocker.kind = 'burn_in_clock_not_started'), AND the shadow `passed`
541 - /// + `detail` columns are still populated for pre-step-5 consumers.
534 + /// blocker.kind = 'burn_in_clock_not_started').
542 535 #[tokio::test]
543 - async fn burn_in_blocked_persists_typed_and_shadow_columns() {
536 + async fn burn_in_blocked_persists_typed_outcome() {
544 537 let pool = SqlitePoolOptions::new()
545 538 .max_connections(1)
546 539 .connect("sqlite::memory:")
@@ -567,17 +560,14 @@ mod tests {
567 560 };
568 561 let out = run(&ctx, &Gate::BurnIn { hours: 24 }).await.unwrap();
569 562 assert_eq!(out.status_str(), "blocked");
570 - assert!(!out.legacy_passed());
571 - assert_eq!(out.legacy_detail(), "burn-in clock not started");
563 + assert!(!out.is_passed());
572 564
573 565 // Read the persisted row.
574 - let row: (Option<i64>, Option<String>, Option<String>, Option<String>) = sqlx::query_as(
575 - "SELECT passed, detail, status, outcome_json FROM gate_runs ORDER BY id DESC LIMIT 1",
566 + let row: (Option<String>, Option<String>) = sqlx::query_as(
567 + "SELECT status, outcome_json FROM gate_runs ORDER BY id DESC LIMIT 1",
576 568 ).fetch_one(&pool).await.unwrap();
577 - assert_eq!(row.0, Some(0), "shadow `passed` = 0 for blocked");
578 - assert_eq!(row.1.as_deref(), Some("burn-in clock not started"), "shadow `detail` matches summary");
579 - assert_eq!(row.2.as_deref(), Some("blocked"), "typed status");
580 - let json: serde_json::Value = serde_json::from_str(row.3.as_deref().unwrap()).unwrap();
569 + assert_eq!(row.0.as_deref(), Some("blocked"), "typed status");
570 + let json: serde_json::Value = serde_json::from_str(row.1.as_deref().unwrap()).unwrap();
581 571 assert_eq!(json["status"]["kind"], "blocked");
582 572 assert_eq!(json["status"]["blocker"]["kind"], "burn_in_clock_not_started");
583 573 }
@@ -41,23 +41,13 @@ impl GateOutcome {
41 41 self
42 42 }
43 43
44 - /// Shadow column: until migration 004 drops `passed`, every write
45 - /// also populates the legacy boolean. `Blocked` reads as failing
46 - /// because gates that are blocked have not satisfied the pipeline.
47 - pub fn legacy_passed(&self) -> bool {
44 + /// True iff the gate ran and succeeded. `Blocked` is not passing:
45 + /// the gate has not satisfied the pipeline, the operator just owes
46 + /// it a precondition.
47 + pub fn is_passed(&self) -> bool {
48 48 matches!(self.status, GateStatus::Passed { .. })
49 49 }
50 50
51 - /// Shadow column: human-readable single-line summary for the legacy
52 - /// `gate_runs.detail` column. Goes away when migration 004 drops it.
53 - pub fn legacy_detail(&self) -> String {
54 - match &self.status {
55 - GateStatus::Passed { note } => note.summary(),
56 - GateStatus::Failed { failure } => failure.summary(),
57 - GateStatus::Blocked { blocker } => blocker.summary(),
58 - }
59 - }
60 -
61 51 /// The high-level status word for the `gate_runs.status` column.
62 52 pub fn status_str(&self) -> &'static str {
63 53 match self.status {
@@ -322,51 +312,18 @@ mod tests {
322 312 let o = GateOutcome::passed(PassNote::TestsPassed { duration_s: 42 });
323 313 let s = serde_json::to_string(&o).unwrap();
324 314 let back: GateOutcome = serde_json::from_str(&s).unwrap();
325 - assert!(back.legacy_passed());
315 + assert!(back.is_passed());
326 316 assert_eq!(back.status_str(), "passed");
327 317 }
328 318
329 319 #[test]
330 - fn blocked_legacy_passed_is_false() {
320 + fn blocked_is_not_passed() {
331 321 let o = GateOutcome::blocked(GateBlocker::BurnInClockNotStarted);
332 - assert!(!o.legacy_passed());
322 + assert!(!o.is_passed());
333 323 assert_eq!(o.status_str(), "blocked");
334 324 }
335 325
336 326 #[test]
337 - fn legacy_detail_summaries_match_pre_typed_strings() {
338 - // The pre-typed gate runner wrote specific prose for each
339 - // pass/blocker case. The summary impl is what populates the
340 - // shadow `detail` column during the migration-003 shadow period
341 - // so old consumers (TUI, /state) keep working unchanged.
342 - assert_eq!(
343 - GateOutcome::blocked(GateBlocker::BurnInClockNotStarted).legacy_detail(),
344 - "burn-in clock not started",
345 - );
346 - assert_eq!(
347 - GateOutcome::blocked(GateBlocker::ScratchDbUrlUnset).legacy_detail(),
348 - "scratch_db_url unset in daemon config",
349 - );
350 - assert_eq!(
351 - GateOutcome::blocked(GateBlocker::NoBackupAvailable).legacy_detail(),
352 - "no backup fetched; call /backup/fetch first",
353 - );
354 - assert_eq!(
355 - GateOutcome::blocked(GateBlocker::AwaitingOperatorConfirmation).legacy_detail(),
356 - "waiting on operator confirmation",
357 - );
358 - assert_eq!(
359 - GateOutcome::blocked(GateBlocker::BurnInRemaining { hours_remaining: 47, hours_total: 168 })
360 - .legacy_detail(),
361 - "47 hours remaining of 168",
362 - );
363 - assert_eq!(
364 - GateOutcome::passed(PassNote::StayedUp { duration_s: 3 }).legacy_detail(),
365 - "stayed up for 3s",
366 - );
367 - }
368 -
369 - #[test]
370 327 fn log_ref_construction_matches_disk_layout() {
371 328 let v: Version = "0.9.6".parse().unwrap();
372 329 let lr = LogRef::new(&v, GateKind::CargoTest);
@@ -43,16 +43,9 @@ struct TierView {
43 43 #[derive(Serialize)]
44 44 struct GateView {
45 45 kind: String,
46 - /// Shadow field — kept until step 5 (TUI typed-event handling) so
47 - /// the current TUI keeps rendering pass/fail without consulting
48 - /// `outcome`. Goes away in migration 004.
49 - passed: Option<bool>,
50 46 finished_at: Option<String>,
51 - /// Shadow field — superseded by `outcome.status`.
52 - detail: Option<String>,
53 - /// `'passed' | 'failed' | 'blocked'` or NULL while in-flight. The
54 - /// TUI can rely on this to choose green/red/yellow rendering once
55 - /// step 5 lands; until then it falls back to `passed`.
47 + /// `'passed' | 'failed' | 'blocked'` or NULL while in-flight. The TUI
48 + /// uses this to choose green/red/yellow rendering.
56 49 status: Option<String>,
57 50 /// Full typed `GateOutcome` as a JSON object, when present.
58 51 /// Deserialized lazily by the consumer; sandod doesn't re-parse it.
@@ -102,7 +95,7 @@ async fn get_state(State(s): State<AppState>) -> Result<Json<StateView>> {
102 95 let gates: Vec<GateView> = if let Some(ver) = gate_version.as_ref() {
103 96 // Most recent gate_runs row per gate_kind for (tier, ver).
104 97 sqlx::query(
105 - "SELECT gate_kind, passed, finished_at, detail, status, outcome_json, log_ref
98 + "SELECT gate_kind, finished_at, status, outcome_json, log_ref
106 99 FROM gate_runs g
107 100 WHERE tier = ?1 AND version = ?2
108 101 AND id = (SELECT MAX(id) FROM gate_runs
@@ -116,9 +109,7 @@ async fn get_state(State(s): State<AppState>) -> Result<Json<StateView>> {
116 109 .into_iter()
117 110 .map(|gr| GateView {
118 111 kind: gr.get("gate_kind"),
119 - passed: gr.get::<Option<i64>, _>("passed").map(|v| v != 0),
120 112 finished_at: gr.get("finished_at"),
121 - detail: gr.get("detail"),
122 113 status: gr.get("status"),
123 114 outcome: gr.get::<Option<String>, _>("outcome_json")
124 115 .and_then(|s| serde_json::from_str(&s).ok()),
@@ -322,8 +313,8 @@ async fn unsatisfied_gates(
322 313 // the DB also captures it implicitly via gate_runs rows. Simplest correct
323 314 // answer: re-read from topology via tier name; the caller has it.
324 315 // For now we inspect the latest gate_runs.
325 - let rows: Vec<(String, Option<i64>)> = sqlx::query_as(
326 - "SELECT gate_kind, passed FROM gate_runs g
316 + let rows: Vec<(String, Option<String>)> = sqlx::query_as(
317 + "SELECT gate_kind, status FROM gate_runs g
327 318 WHERE tier = ?1 AND version = ?2
328 319 AND id = (SELECT MAX(id) FROM gate_runs
329 320 WHERE tier = ?1 AND version = ?2 AND gate_kind = g.gate_kind)",
@@ -331,11 +322,13 @@ async fn unsatisfied_gates(
331 322 .bind(tier).bind(version)
332 323 .fetch_all(pool).await.map_err(crate::error::Error::Db)?;
333 324 let mut bad = Vec::new();
334 - for (kind, passed) in rows {
325 + for (kind, status) in rows {
335 326 if hotfix && kind == "burn_in" {
336 327 continue;
337 328 }
338 - if passed.unwrap_or(0) == 0 {
329 + // NULL status (in-flight) and any non-passed status both count as
330 + // unsatisfied; only an explicit 'passed' clears the gate.
331 + if status.as_deref() != Some("passed") {
339 332 bad.push(kind);
340 333 }
341 334 }
@@ -489,11 +482,16 @@ async fn confirm(
489 482 .map_err(|e| crate::error::Error::Other(anyhow::anyhow!(e)))?;
490 483
491 484 let now = chrono::Utc::now().to_rfc3339();
485 + let outcome = crate::outcome::GateOutcome::passed(
486 + crate::outcome::PassNote::OperatorConfirmed { at: chrono::Utc::now() },
487 + );
488 + let outcome_json = serde_json::to_string(&outcome)
489 + .map_err(|e| crate::error::Error::Other(anyhow::anyhow!(e)))?;
492 490 sqlx::query(
493 - "INSERT INTO gate_runs (version, tier, gate_kind, started_at, finished_at, passed, detail)
494 - VALUES (?, ?, 'manual_confirm', ?, ?, 1, 'operator confirmed via POST /confirm')",
491 + "INSERT INTO gate_runs (version, tier, gate_kind, started_at, finished_at, status, outcome_json)
492 + VALUES (?, ?, 'manual_confirm', ?, ?, 'passed', ?)",
495 493 )
496 - .bind(&version).bind(&target.name).bind(&now).bind(&now)
494 + .bind(&version).bind(&target.name).bind(&now).bind(&now).bind(&outcome_json)
497 495 .execute(&s.pool).await.map_err(crate::error::Error::Db)?;
498 496
499 497 tracing::info!(tier = %tier, version = %version, "manual_confirm recorded");
@@ -692,11 +690,12 @@ mod tests {
692 690 }
693 691
694 692 async fn insert_gate(pool: &SqlitePool, tier: &str, version: &str, kind: &str, passed: i64) {
693 + let status = if passed == 1 { "passed" } else { "failed" };
695 694 sqlx::query(
696 - "INSERT INTO gate_runs (version, tier, gate_kind, started_at, finished_at, passed) \
695 + "INSERT INTO gate_runs (version, tier, gate_kind, started_at, finished_at, status) \
697 696 VALUES (?, ?, ?, datetime('now'), datetime('now'), ?)",
698 697 )
699 - .bind(version).bind(tier).bind(kind).bind(passed)
698 + .bind(version).bind(tier).bind(kind).bind(status)
700 699 .execute(pool).await.unwrap();
701 700 }
702 701
@@ -771,8 +770,8 @@ mod tests {
771 770 }
772 771
773 772 #[tokio::test]
774 - async fn unsatisfied_gates_null_passed_is_treated_as_failing() {
775 - // An in-flight gate (started_at set, finished_at + passed NULL)
773 + async fn unsatisfied_gates_null_status_is_treated_as_failing() {
774 + // An in-flight gate (started_at set, finished_at + status NULL)
776 775 // should NOT be treated as green. Otherwise a race could promote
777 776 // before the gate concludes.
778 777 let pool = fresh_pool().await;
@@ -837,7 +836,7 @@ mod tests {
837 836
838 837 // A passing gate_runs row was inserted.
839 838 let count: (i64,) = sqlx::query_as(
840 - "SELECT COUNT(*) FROM gate_runs WHERE tier='a' AND gate_kind='manual_confirm' AND passed=1",
839 + "SELECT COUNT(*) FROM gate_runs WHERE tier='a' AND gate_kind='manual_confirm' AND status='passed'",
841 840 )
842 841 .fetch_one(&state.pool)
843 842 .await
@@ -64,10 +64,6 @@ struct TierView {
64 64 #[derive(Clone, Debug, Deserialize)]
65 65 struct GateView {
66 66 kind: String,
67 - /// Shadow field — still populated by sandod, deserialized for
68 - /// compatibility. Step 5+ TUI prefers `status`.
69 - #[allow(dead_code)]
70 - passed: Option<bool>,
71 67 #[allow(dead_code)]
72 68 finished_at: Option<String>,
73 69 /// `'passed' | 'failed' | 'blocked'` or NULL. NULL = in-flight.
@@ -332,21 +328,13 @@ fn failure_short(f: &GateFailure) -> String {
332 328 }
333 329
334 330 /// Pick the one-word mark + color for a gate row in the tiers table.
335 - /// `status` is the typed column from migration 003. We accept `None` as
336 - /// "in-flight" rather than treating it as failure, which is what the
337 - /// shadow `passed` column would have done.
331 + /// `status` is NULL while the gate is in-flight.
338 332 fn gate_mark_and_style(g: &GateView) -> (&'static str, Style) {
339 333 match g.status.as_deref() {
340 334 Some("passed") => ("ok", Style::default().fg(Color::Green)),
341 335 Some("failed") => ("FAIL", Style::default().fg(Color::Red).add_modifier(Modifier::BOLD)),
342 336 Some("blocked") => ("blocked", Style::default().fg(Color::Yellow)),
343 - Some(_) | None => match g.passed {
344 - // Legacy fallback during the shadow period: rows from before
345 - // 003 won't have status set but will have passed.
346 - Some(true) => ("ok", Style::default().fg(Color::Green)),
347 - Some(false) => ("FAIL", Style::default().fg(Color::Red).add_modifier(Modifier::BOLD)),
348 - None => ("...", Style::default().fg(Color::DarkGray)),
349 - },
337 + Some(_) | None => ("...", Style::default().fg(Color::DarkGray)),
350 338 }
351 339 }
352 340
@@ -698,7 +686,7 @@ mod tests {
698 686 fn format_event_gate_done_passed_uses_pass_note() {
699 687 // Step 5+: GateDone carries a typed `outcome` and `run_id`.
700 688 let pass = format_event(
701 - r#"{"at":"2026-06-01T02:55:39Z","kind":"gate_done","run_id":17,"tier":"mm","version":"0.8.12","gate":"cargo_test","passed":true,"outcome":{"status":{"kind":"passed","note":{"kind":"tests_passed","duration_s":42}}}}"#,
689 + r#"{"at":"2026-06-01T02:55:39Z","kind":"gate_done","run_id":17,"tier":"mm","version":"0.8.12","gate":"cargo_test","outcome":{"status":{"kind":"passed","note":{"kind":"tests_passed","duration_s":42}}}}"#,
702 690 )
703 691 .unwrap();
704 692 assert!(pass.contains("[17]"), "got: {pass}");
@@ -709,7 +697,7 @@ mod tests {
709 697 #[test]
710 698 fn format_event_gate_done_failed_uses_failure_variant() {
711 699 let fail = format_event(
712 - r#"{"at":"2026-06-01T02:55:39Z","kind":"gate_done","run_id":18,"tier":"mm","version":"0.8.12","gate":"cargo_test","passed":false,"outcome":{"status":{"kind":"failed","failure":{"kind":"cargo_test","failed_count":3,"first_failed":"foo::bar"}}}}"#,
700 + r#"{"at":"2026-06-01T02:55:39Z","kind":"gate_done","run_id":18,"tier":"mm","version":"0.8.12","gate":"cargo_test","outcome":{"status":{"kind":"failed","failure":{"kind":"cargo_test","failed_count":3,"first_failed":"foo::bar"}}}}"#,
713 701 )
714 702 .unwrap();
715 703 assert!(fail.contains("FAIL"), "got: {fail}");
@@ -723,7 +711,7 @@ mod tests {
723 711 // clock is not a defect, so the TUI renders it distinctly from a
724 712 // genuine failure. No "FAIL" string.
725 713 let blocked = format_event(
726 - r#"{"at":"2026-06-01T02:55:39Z","kind":"gate_done","run_id":19,"tier":"a","version":"0.9.6","gate":"burn_in","passed":false,"outcome":{"status":{"kind":"blocked","blocker":{"kind":"burn_in_clock_not_started"}}}}"#,
714 + r#"{"at":"2026-06-01T02:55:39Z","kind":"gate_done","run_id":19,"tier":"a","version":"0.9.6","gate":"burn_in","outcome":{"status":{"kind":"blocked","blocker":{"kind":"burn_in_clock_not_started"}}}}"#,
727 715 )
728 716 .unwrap();
729 717 assert!(blocked.contains("burn_in blocked"), "got: {blocked}");