//! Typed gate outcomes. //! //! Replaces the `(passed: bool, detail: Option)` pair on //! `GateOutcome`. The point is to push failure classification into the //! type itself: a `GateFailure::MigrationDrift { migration }` is what it //! says, not a string the operator has to parse. See //! `plans/observability.md` for the full argument. //! //! The variants here describe what the gate runner actually observed. //! Mapping raw process output (stderr tails, exit codes) to these //! variants is the classifier's job — `classify.rs`. use crate::domain::{GateKind, Version}; use chrono::{DateTime, Utc}; use serde::{Deserialize, Serialize}; /// A gate's result, persisted to `gate_runs.outcome_json` and emitted /// over WS in `GateDone`. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct GateOutcome { pub status: GateStatus, /// Relative path under `cfg.logs_root` to the persisted stdout/stderr /// for this run. `None` for gates that don't produce process output /// (burn_in, manual_confirm). #[serde(skip_serializing_if = "Option::is_none", default)] pub log_ref: Option, } impl GateOutcome { pub fn passed(note: PassNote) -> Self { Self { status: GateStatus::Passed { note }, log_ref: None } } pub fn failed(failure: GateFailure) -> Self { Self { status: GateStatus::Failed { failure }, log_ref: None } } pub fn blocked(blocker: GateBlocker) -> Self { Self { status: GateStatus::Blocked { blocker }, log_ref: None } } pub fn with_log_ref(mut self, log_ref: LogRef) -> Self { self.log_ref = Some(log_ref); self } /// True iff the gate ran and succeeded. `Blocked` is not passing: /// the gate has not satisfied the pipeline, the operator just owes /// it a precondition. pub fn is_passed(&self) -> bool { matches!(self.status, GateStatus::Passed { .. }) } /// The high-level status word for the `gate_runs.status` column. pub fn status_str(&self) -> &'static str { match self.status { GateStatus::Passed { .. } => "passed", GateStatus::Failed { .. } => "failed", GateStatus::Blocked { .. } => "blocked", } } } #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(tag = "kind", rename_all = "snake_case")] pub enum GateStatus { /// Gate ran and succeeded. The note carries gate-specific evidence /// (e.g. `TestsPassed { duration_s }`). Passed { note: PassNote }, /// Gate ran and failed. Two-layer tag: outer `kind = "failed"`, inner /// `failure.kind` names the classified variant. If no classifier /// matched, that's `unclassified`. Failed { failure: GateFailure }, /// Gate cannot run yet. Burn-in clock not started, scratch DB not /// configured, backup missing — pre-conditions the operator can fix /// out of band. Distinguished from `Failed` so the TUI can render /// these yellow rather than red. Blocked { blocker: GateBlocker }, } #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(tag = "kind", rename_all = "snake_case")] pub enum PassNote { /// `boot_smoke` — the binary stayed up for the smoke window. StayedUp { duration_s: u32 }, /// `burn_in` — the configured number of hours have elapsed since /// the gate's clock started. BurnInElapsed { hours: u32 }, /// `migration_dry_run` — scratch DB restored from `backup_path` and /// every migration ran without error. Migrated { backup_path: String }, /// `cargo_test` — `cargo test --release` exited 0. TestsPassed { duration_s: u32 }, /// `manual_confirm` — an operator inserted a passing row out-of-band. OperatorConfirmed { at: DateTime }, /// Legacy rows backfilled from the pre-typed schema. Carries the /// original `detail` string so nothing is lost. Legacy { text: String }, } impl PassNote { pub fn summary(&self) -> String { match self { PassNote::StayedUp { duration_s } => format!("stayed up for {duration_s}s"), PassNote::BurnInElapsed { hours } => format!("{hours} hours elapsed"), PassNote::Migrated { backup_path } => format!("restored {backup_path} + migrated"), PassNote::TestsPassed { duration_s } => format!("tests passed in {duration_s}s"), PassNote::OperatorConfirmed { at } => format!("operator confirmed at {at}"), PassNote::Legacy { text } => text.clone(), } } } #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(tag = "kind", rename_all = "snake_case")] pub enum GateBlocker { /// `burn_in`: the tier's `tier_state.burn_in_started_at` is NULL. BurnInClockNotStarted, /// `burn_in`: clock running but not enough time elapsed yet. BurnInRemaining { hours_remaining: u32, hours_total: u32 }, /// `manual_confirm`: no out-of-band passing row exists for this /// (tier, version). AwaitingOperatorConfirmation, /// `migration_dry_run`: no row in `backups` to restore from. NoBackupAvailable, /// `migration_dry_run` / `boot_smoke` / `cargo_test`: daemon config /// has no `scratch_db_url`. ScratchDbUrlUnset, /// `boot_smoke`: no `artifact_path` in `versions` for this version. ArtifactMissing { version: Version }, } impl GateBlocker { pub fn summary(&self) -> String { match self { GateBlocker::BurnInClockNotStarted => "burn-in clock not started".into(), GateBlocker::BurnInRemaining { hours_remaining, hours_total } => format!("{hours_remaining} hours remaining of {hours_total}"), GateBlocker::AwaitingOperatorConfirmation => "waiting on operator confirmation".into(), GateBlocker::NoBackupAvailable => "no backup fetched; call /backup/fetch first".into(), GateBlocker::ScratchDbUrlUnset => "scratch_db_url unset in daemon config".into(), GateBlocker::ArtifactMissing { version } => format!("no artifact for version {version}"), } } } #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(tag = "kind", rename_all = "snake_case")] pub enum GateFailure { /// `cargo_test` exited non-zero. `failed_count` may be 0 if the /// classifier couldn't parse the count (e.g. compile error). CargoTest { failed_count: u32, first_failed: Option }, /// `migration_dry_run`: a migration that was previously applied is /// no longer present in the resolved migrations directory. MigrationDrift { migration: String }, /// `migration_dry_run`: a migration that was previously applied has /// been modified (checksum mismatch). MigrationModified { migration: String }, /// `migration_dry_run`: postgres rejected a migration's SQL. MigrationSqlError { migration: String, sqlstate: Option }, /// `migration_dry_run`: scratch DB reset or dump restore failed. RestoreFailed { reason: String }, /// `boot_smoke`: binary exited with a non-zero status during the /// smoke window. Most likely a panic; `exit_code` carries the OS /// status when one is available. BootPanic { exit_code: Option }, /// `boot_smoke`: binary exited 0 before the smoke window elapsed. BootExitedEarly { exit_code: Option }, /// `cargo_test` / `boot_smoke`: tokio could not spawn the child. SpawnFailed { message: String }, /// Gate took longer than the configured ceiling. Timeout { gate: GateKind, after_s: u32 }, /// Classifier could not match the output to any known variant. The /// `log_ref` on the enclosing `GateOutcome` is the diagnostic path. Unclassified { legacy_detail: Option }, } impl GateFailure { pub fn summary(&self) -> String { match self { GateFailure::CargoTest { failed_count, first_failed: Some(name) } => format!("{failed_count} test(s) failed; first: {name}"), GateFailure::CargoTest { failed_count, first_failed: None } => format!("{failed_count} test(s) failed"), GateFailure::MigrationDrift { migration } => format!("migration {migration} previously applied but missing"), GateFailure::MigrationModified { migration } => format!("migration {migration} previously applied but modified"), GateFailure::MigrationSqlError { migration, sqlstate: Some(s) } => format!("migration {migration} sql error ({s})"), GateFailure::MigrationSqlError { migration, sqlstate: None } => format!("migration {migration} sql error"), GateFailure::RestoreFailed { reason } => format!("restore: {reason}"), GateFailure::BootPanic { exit_code: Some(c) } => format!("binary panicked: exit {c}"), GateFailure::BootPanic { exit_code: None } => "binary panicked".into(), GateFailure::BootExitedEarly { exit_code: Some(c) } => format!("binary exited early: exit {c}"), GateFailure::BootExitedEarly { exit_code: None } => "binary exited early".into(), GateFailure::SpawnFailed { message } => format!("spawn: {message}"), GateFailure::Timeout { gate, after_s } => format!("{gate} timed out after {after_s}s"), GateFailure::Unclassified { legacy_detail: Some(d) } => d.clone(), GateFailure::Unclassified { legacy_detail: None } => "unclassified failure".into(), } } } // --------------------------------------------------------------------- // Deploy outcomes (step 7) // --------------------------------------------------------------------- /// Typed outcome of one node-deploy attempt. Stored as `outcome_json` in /// the `deploys` table and emitted in `Event::DeployFailed` so consumers /// can distinguish a node-unreachable error (operator: check the box) /// from rsync mid-transfer corruption (operator: check disk/network). #[derive(Debug, Clone, Serialize, Deserialize)] pub struct DeployOutcome { pub status: DeployStatus, } impl DeployOutcome { pub fn ok() -> Self { Self { status: DeployStatus::Ok } } pub fn failed(failure: DeployFailureKind) -> Self { Self { status: DeployStatus::Failed { failure } } } pub fn in_progress() -> Self { Self { status: DeployStatus::InProgress } } /// `'in_progress' | 'ok' | 'failed'` — the value of the legacy /// `deploys.outcome` column. pub fn status_str(&self) -> &'static str { match self.status { DeployStatus::InProgress => "in_progress", DeployStatus::Ok => "ok", DeployStatus::Failed { .. } => "failed", } } } #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(tag = "kind", rename_all = "snake_case")] pub enum DeployStatus { InProgress, Ok, Failed { failure: DeployFailureKind }, } #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(tag = "kind", rename_all = "snake_case")] pub enum DeployFailureKind { /// SSH to the node failed before any state changed. Typically a dead /// host, network partition, or stale known_hosts. NodeUnreachable { detail: String }, /// rsync exited non-zero mid-transfer. The on-target release dir may /// be partially populated, but the `current` symlink is untouched. RsyncFailed { detail: String }, /// Files copied successfully but the atomic symlink swap step /// failed. The new release is on disk; the service is still running /// the old one. SymlinkSwapFailed { detail: String }, /// Symlink swapped but `systemctl reload-or-restart` returned /// non-zero. The new code is current but the service may have /// crashed on startup. ServiceRestartFailed { detail: String }, /// Classifier couldn't match the error to a known variant. The full /// anyhow chain is in `detail`. Unclassified { detail: String }, } impl DeployFailureKind { pub fn summary(&self) -> String { match self { DeployFailureKind::NodeUnreachable { detail } => format!("node unreachable: {detail}"), DeployFailureKind::RsyncFailed { detail } => format!("rsync: {detail}"), DeployFailureKind::SymlinkSwapFailed { detail } => format!("symlink swap: {detail}"), DeployFailureKind::ServiceRestartFailed { detail } => format!("service restart: {detail}"), DeployFailureKind::Unclassified { detail } => detail.chars().take(200).collect(), } } } /// Pointer to the on-disk gate log: a path relative to `cfg.logs_root` /// of the form `/.log`. Stored in `gate_runs.log_ref` /// and surfaced in `/state` so the TUI/operator can request the full /// tail via `GET /logs//` only when needed. #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(transparent)] pub struct LogRef(pub String); impl LogRef { pub fn new(version: &Version, gate: GateKind) -> Self { Self(format!("{}/{}.log", version, gate.as_str())) } pub fn as_str(&self) -> &str { &self.0 } } impl std::fmt::Display for LogRef { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { self.0.fmt(f) } } #[cfg(test)] mod tests { use super::*; #[test] fn outcome_serialization_is_two_layer_tagged() { let o = GateOutcome::failed(GateFailure::MigrationDrift { migration: "0047_widgets".into(), }); let v: serde_json::Value = serde_json::to_value(&o).unwrap(); assert_eq!(v["status"]["kind"], "failed"); assert_eq!(v["status"]["failure"]["kind"], "migration_drift"); assert_eq!(v["status"]["failure"]["migration"], "0047_widgets"); } #[test] fn outcome_round_trips_through_json() { let o = GateOutcome::passed(PassNote::TestsPassed { duration_s: 42 }); let s = serde_json::to_string(&o).unwrap(); let back: GateOutcome = serde_json::from_str(&s).unwrap(); assert!(back.is_passed()); assert_eq!(back.status_str(), "passed"); } #[test] fn blocked_is_not_passed() { let o = GateOutcome::blocked(GateBlocker::BurnInClockNotStarted); assert!(!o.is_passed()); assert_eq!(o.status_str(), "blocked"); } #[test] fn log_ref_construction_matches_disk_layout() { let v: Version = "0.9.6".parse().unwrap(); let lr = LogRef::new(&v, GateKind::CargoTest); assert_eq!(lr.as_str(), "0.9.6/cargo_test.log"); } #[test] fn unclassified_preserves_legacy_detail() { let o = GateOutcome::failed(GateFailure::Unclassified { legacy_detail: Some("binary exited early: exit status: 101\n==== stdout ====\n...".into()), }); let v: serde_json::Value = serde_json::to_value(&o).unwrap(); assert_eq!(v["status"]["failure"]["kind"], "unclassified"); assert!(v["status"]["failure"]["legacy_detail"] .as_str().unwrap().contains("exit status: 101")); } }