//! Build-run tracking: one `build_runs` row per `/rebuild`, updated as the //! pipeline moves through its phases, terminating in passed/failed/aborted. //! //! This is the resource that makes Sando driveable headlessly. `/state` only //! ever reflects the last *successful* deploy, so on a red pipeline a poller //! of `/state` sees stale-green for the whole build (the 0.10.2 incident). A //! `RunId` returned by `/rebuild` + `GET /runs/{id}` gives a non-TUI caller //! one pollable resource tied to the build it triggered, carrying the phase, //! the per-gate status, and — the highest-value bit — a `failure_summary` //! (first compile error / first failed gate) so the cause is in the API, not //! behind `sudo journalctl`. //! //! Terminal writes (`mark_passed`/`mark_failed`/`mark_aborted`) are guarded on //! `result = 'building'`, so whichever site settles the run first wins: a //! build-step compile error, the first red gate, or the task-level catch for //! pre-build bails. Later writes are silent no-ops. use crate::domain::{RunId, Version}; use anyhow::Result; use chrono::Utc; use serde::Serialize; use sqlx::{Row, SqlitePool}; /// In-flight sub-state. Plain strings in the DB; this enum names the values so /// call sites can't typo them. #[derive(Debug, Clone, Copy)] pub enum Phase { Fetching, Compiling, Staging, Gating, } impl Phase { pub fn as_str(self) -> &'static str { match self { Phase::Fetching => "fetching", Phase::Compiling => "compiling", Phase::Staging => "staging", Phase::Gating => "gating", } } } /// Insert a fresh `building` run for `sha` and return its id. pub async fn create(pool: &SqlitePool, sha: &str) -> Result { let id: i64 = sqlx::query_scalar( "INSERT INTO build_runs (sha, phase, result, started_at) VALUES (?, 'queued', 'building', ?) RETURNING id", ) .bind(sha) .bind(Utc::now().to_rfc3339()) .fetch_one(pool) .await?; Ok(RunId(id)) } /// Advance the in-flight phase. No-op once the run is terminal so a late /// phase write can't resurrect a finished row. pub async fn set_phase(pool: &SqlitePool, run_id: RunId, phase: Phase) -> Result<()> { sqlx::query("UPDATE build_runs SET phase = ? WHERE id = ? AND result = 'building'") .bind(phase.as_str()) .bind(run_id.0) .execute(pool) .await?; Ok(()) } /// Record the version once it's been read from the worktree's Cargo.toml. pub async fn set_version(pool: &SqlitePool, run_id: RunId, version: &Version) -> Result<()> { sqlx::query("UPDATE build_runs SET version = ? WHERE id = ? AND result = 'building'") .bind(version.to_string()) .bind(run_id.0) .execute(pool) .await?; Ok(()) } /// Settle the run green. First terminal write wins (guarded on `building`). pub async fn mark_passed(pool: &SqlitePool, run_id: RunId) -> Result<()> { sqlx::query( "UPDATE build_runs SET result = 'passed', phase = 'done', finished_at = ? WHERE id = ? AND result = 'building'", ) .bind(Utc::now().to_rfc3339()) .bind(run_id.0) .execute(pool) .await?; Ok(()) } /// Settle the run red with a human-readable cause. First terminal write wins, /// so the most specific failure (build compile error, first red gate) recorded /// before the task-level catch is the one that sticks. pub async fn mark_failed(pool: &SqlitePool, run_id: RunId, summary: &str) -> Result<()> { // Bound the stored summary — it's a headline, not the log. The full output // is at the gate's log_ref / journald. let summary: String = summary.chars().take(600).collect(); sqlx::query( "UPDATE build_runs SET result = 'failed', phase = 'done', failure_summary = ?, finished_at = ? WHERE id = ? AND result = 'building'", ) .bind(&summary) .bind(Utc::now().to_rfc3339()) .bind(run_id.0) .execute(pool) .await?; Ok(()) } /// Settle the run as superseded by a newer `/rebuild`. pub async fn mark_aborted(pool: &SqlitePool, run_id: RunId) -> Result<()> { sqlx::query( "UPDATE build_runs SET result = 'aborted', phase = 'done', failure_summary = 'superseded by a newer /rebuild', finished_at = ? WHERE id = ? AND result = 'building'", ) .bind(Utc::now().to_rfc3339()) .bind(run_id.0) .execute(pool) .await?; Ok(()) } /// One gate's status within a run view. #[derive(Debug, Serialize)] pub struct RunGateView { pub kind: String, /// `'passed' | 'failed' | 'blocked'` or NULL while in-flight. pub status: Option, /// Relative path under `cfg.logs_root` for the full byte stream. pub log_ref: Option, } /// The `GET /runs/{id}` payload. #[derive(Debug, Serialize)] pub struct RunView { pub run_id: i64, pub sha: String, pub version: Option, pub phase: String, /// `'building' | 'passed' | 'failed' | 'aborted'`. pub result: String, pub started_at: String, pub finished_at: Option, /// Headline cause when `result = 'failed'`: first compile error or first /// red gate. NULL otherwise. pub failure_summary: Option, /// Gates run on the host tier for this run's version, latest row per kind. /// Empty until the run reaches a version + the gating phase. pub gates: Vec, } /// Load a run plus its host-tier gate statuses. `None` if the id is unknown. pub async fn get(pool: &SqlitePool, run_id: RunId) -> Result> { let Some(row) = sqlx::query( "SELECT id, sha, version, phase, result, started_at, finished_at, failure_summary FROM build_runs WHERE id = ?", ) .bind(run_id.0) .fetch_optional(pool) .await? else { return Ok(None); }; let version: Option = row.get("version"); // Gates are keyed by (tier, version); a build run drives the `host` tier. // Latest row per gate_kind, matching `/state`'s per-tier query shape. let gates: Vec = if let Some(ver) = version.as_deref() { sqlx::query( "SELECT gate_kind, status, log_ref FROM gate_runs g WHERE tier = 'host' AND version = ?1 AND id = (SELECT MAX(id) FROM gate_runs WHERE tier = 'host' AND version = ?1 AND gate_kind = g.gate_kind) ORDER BY gate_kind", ) .bind(ver) .fetch_all(pool) .await? .into_iter() .map(|gr| RunGateView { kind: gr.get("gate_kind"), status: gr.get("status"), log_ref: gr.get("log_ref"), }) .collect() } else { Vec::new() }; Ok(Some(RunView { run_id: row.get("id"), sha: row.get("sha"), version, phase: row.get("phase"), result: row.get("result"), started_at: row.get("started_at"), finished_at: row.get("finished_at"), failure_summary: row.get("failure_summary"), gates, })) } /// Compact view of the latest build run for `/state`'s liveness line. #[derive(Debug, Serialize)] pub struct BuildSummary { pub run_id: i64, pub sha: String, pub version: Option, pub phase: String, pub result: String, pub failure_summary: Option, /// Seconds from start to finish (or to now while building). Lets a /// `/state` poller show "building , phase=, elapsed Ns" instead of /// a version frozen at the last success for the whole ~10-min build. pub elapsed_s: i64, } /// The most recent build run, for `/state`. `None` until the first `/rebuild`. pub async fn latest_summary(pool: &SqlitePool) -> Result> { let Some(row) = sqlx::query( "SELECT id, sha, version, phase, result, failure_summary, started_at, finished_at FROM build_runs ORDER BY id DESC LIMIT 1", ) .fetch_optional(pool) .await? else { return Ok(None); }; let started_at: String = row.get("started_at"); let finished_at: Option = row.get("finished_at"); Ok(Some(BuildSummary { run_id: row.get("id"), sha: row.get("sha"), version: row.get("version"), phase: row.get("phase"), result: row.get("result"), failure_summary: row.get("failure_summary"), elapsed_s: elapsed_seconds(&started_at, finished_at.as_deref()), })) } /// Seconds between an rfc3339 `started_at` and (`finished_at` or now), clamped /// at 0. A parse failure yields 0 rather than erroring the whole `/state` call. fn elapsed_seconds(started_at: &str, finished_at: Option<&str>) -> i64 { let Ok(start) = chrono::DateTime::parse_from_rfc3339(started_at) else { return 0; }; let end = match finished_at { Some(f) => chrono::DateTime::parse_from_rfc3339(f) .map(|d| d.with_timezone(&Utc)) .unwrap_or_else(|_| Utc::now()), None => Utc::now(), }; (end - start.with_timezone(&Utc)).num_seconds().max(0) } /// The summary of the first failed gate for `version` on the host tier, if /// any — used by the build pipeline to populate `failure_summary` when /// `run_all` reports a red pipeline. Reads the typed `outcome_json` so the /// stored headline matches what the TUI renders. pub async fn first_failed_gate_summary(pool: &SqlitePool, version: &Version) -> Option { let row = sqlx::query( "SELECT gate_kind, outcome_json FROM gate_runs WHERE tier = 'host' AND version = ? AND status = 'failed' ORDER BY id ASC LIMIT 1", ) .bind(version.to_string()) .fetch_optional(pool) .await .ok() .flatten()?; let kind: String = row.get("gate_kind"); let outcome_json: Option = row.get("outcome_json"); let summary = outcome_json .and_then(|s| serde_json::from_str::(&s).ok()) .map(|o| match o.status { crate::outcome::GateStatus::Failed { failure } => failure.summary(), other => format!("{:?}", other), }) .unwrap_or_else(|| "gate failed".to_string()); Some(format!("{kind}: {summary}")) } #[cfg(test)] mod tests { use super::*; use sqlx::sqlite::SqlitePoolOptions; async fn pool() -> SqlitePool { let pool = SqlitePoolOptions::new() .max_connections(1) .connect("sqlite::memory:") .await .unwrap(); crate::db::migrate(&pool).await.unwrap(); pool } #[tokio::test] async fn create_then_get_roundtrips_building() { let pool = pool().await; let id = create(&pool, "abc1234").await.unwrap(); let v = get(&pool, id).await.unwrap().expect("run exists"); assert_eq!(v.sha, "abc1234"); assert_eq!(v.result, "building"); assert_eq!(v.phase, "queued"); assert!(v.version.is_none()); assert!(v.gates.is_empty()); assert!(v.failure_summary.is_none()); } #[tokio::test] async fn phase_and_version_advance_then_pass() { let pool = pool().await; let id = create(&pool, "abc1234").await.unwrap(); set_phase(&pool, id, Phase::Compiling).await.unwrap(); let ver: Version = "0.10.2".parse().unwrap(); set_version(&pool, id, &ver).await.unwrap(); mark_passed(&pool, id).await.unwrap(); let v = get(&pool, id).await.unwrap().unwrap(); assert_eq!(v.result, "passed"); assert_eq!(v.phase, "done"); assert_eq!(v.version.as_deref(), Some("0.10.2")); assert!(v.finished_at.is_some()); } #[tokio::test] async fn first_terminal_write_wins() { let pool = pool().await; let id = create(&pool, "abc1234").await.unwrap(); mark_failed(&pool, id, "error[E0063]: missing field user_pages_host").await.unwrap(); // A later pass attempt (e.g. the task catch racing a build-step error) // must not overwrite the recorded failure. mark_passed(&pool, id).await.unwrap(); // And a second failure summary doesn't clobber the first. mark_failed(&pool, id, "something else").await.unwrap(); let v = get(&pool, id).await.unwrap().unwrap(); assert_eq!(v.result, "failed"); assert_eq!(v.failure_summary.as_deref(), Some("error[E0063]: missing field user_pages_host")); } #[tokio::test] async fn phase_write_after_terminal_is_noop() { let pool = pool().await; let id = create(&pool, "abc1234").await.unwrap(); mark_passed(&pool, id).await.unwrap(); set_phase(&pool, id, Phase::Gating).await.unwrap(); let v = get(&pool, id).await.unwrap().unwrap(); assert_eq!(v.phase, "done", "a late phase write must not move a finished run"); } #[test] fn elapsed_seconds_uses_finished_when_present() { // Both timestamps present → exact span, no wall-clock dependency. let s = elapsed_seconds("2026-06-13T00:00:00Z", Some("2026-06-13T00:02:05Z")); assert_eq!(s, 125); // Unparseable start → 0, never a panic / negative. assert_eq!(elapsed_seconds("not-a-date", None), 0); } #[tokio::test] async fn latest_summary_reports_most_recent_run() { let pool = pool().await; assert!(latest_summary(&pool).await.unwrap().is_none()); let _old = create(&pool, "old1234").await.unwrap(); let new = create(&pool, "new5678").await.unwrap(); set_phase(&pool, new, Phase::Compiling).await.unwrap(); let sum = latest_summary(&pool).await.unwrap().expect("a run exists"); assert_eq!(sum.run_id, new.0); assert_eq!(sum.sha, "new5678"); assert_eq!(sum.phase, "compiling"); assert_eq!(sum.result, "building"); } #[tokio::test] async fn get_unknown_id_is_none() { let pool = pool().await; assert!(get(&pool, RunId(999)).await.unwrap().is_none()); } #[tokio::test] async fn failure_summary_is_bounded() { let pool = pool().await; let id = create(&pool, "abc1234").await.unwrap(); mark_failed(&pool, id, &"x".repeat(5_000)).await.unwrap(); let v = get(&pool, id).await.unwrap().unwrap(); assert!(v.failure_summary.unwrap().len() <= 600); } }