//! Gate-output classifiers. //! //! Each `classify_*` function takes the raw signals produced by a gate //! runner (exit status, stdout/stderr tails, sqlx error strings) and //! maps them to a typed `GateFailure`. Anything that doesn't match a //! known pattern returns `GateFailure::Unclassified` with the original //! detail attached — the on-disk gate log is the ultimate fallback. //! //! Classifiers are pure functions: no IO, no async. That makes them //! fixture-testable, and it keeps the `gates.rs` runner code in charge //! of side effects (process spawning, log persistence). use crate::outcome::GateFailure; /// `cargo_test`: derive a `CargoTest` failure with whatever counts can /// be lifted out of the test runner's output. /// /// libtest emits a `test result: FAILED. P passed; F failed; ...` line /// near the end of stdout. We grab `F` from that. If the output never /// reached that line (compile error, runtime panic in the harness), we /// fall through to `Unclassified`. pub fn classify_cargo_test(stdout: &[u8], stderr: &[u8]) -> GateFailure { let stdout_s = String::from_utf8_lossy(stdout); let mut failed_count: u32 = 0; let mut first_failed: Option = None; // `test result: FAILED. P passed; F failed; ...` lives near the // end. Walk backwards to find it cheaply on very large outputs. for line in stdout_s.lines().rev().take(50) { if let Some(rest) = line.strip_prefix("test result: FAILED.") { // Expect "P passed; F failed; ..." for piece in rest.split(';') { let p = piece.trim(); if let Some(num_str) = p.strip_suffix(" failed") { if let Ok(n) = num_str.parse::() { failed_count = n; } } } break; } } // libtest prints "failures:\n foo::bar" near the end too. Grab // the first one for the summary line. if let Some(idx) = stdout_s.find("\nfailures:\n") { for line in stdout_s[idx + 11..].lines() { let trimmed = line.trim(); if trimmed.is_empty() { break; } // The "failures:" block repeats — once with stdout per // failure, once as a plain name list. Either way the first // non-empty line is a candidate. first_failed = Some(trimmed.to_string()); break; } } if failed_count == 0 && first_failed.is_none() { // Compile error or harness panic — no usable signal in stdout. return GateFailure::Unclassified { legacy_detail: Some(combined_tail_for_classifier(stdout, stderr)), }; } GateFailure::CargoTest { failed_count, first_failed } } /// `migration_dry_run` is staged: scratch reset → restore dump → run /// migrator. Each stage has its own failure mode. The caller (the gate /// runner) knows which stage tripped; classifiers here turn the stage's /// error string into a typed variant. /// /// Inputs are the migration name (when known) and the error string sqlx /// returned. `migration` defaults to "?" when sqlx couldn't tell us /// which file blew up. pub fn classify_migration_error(err: &str, migration_hint: Option<&str>) -> GateFailure { // sqlx::migrate::MigrateError variants are stringified consistently. // Examples from `plans/migration-dryrun-failures.md`: // "migration 47 was previously applied but is missing in the resolved migrations" // "migration 47 was previously applied but has been modified" // sqlx::Error::Database with sqlstate (e.g. "42P01" relation does not exist) if let Some(m) = extract_drift(err) { return GateFailure::MigrationDrift { migration: m }; } if let Some(m) = extract_modified(err) { return GateFailure::MigrationModified { migration: m }; } let sqlstate = extract_sqlstate(err); let migration = migration_hint.map(str::to_owned).unwrap_or_else(|| "?".to_owned()); if sqlstate.is_some() { return GateFailure::MigrationSqlError { migration, sqlstate }; } GateFailure::Unclassified { legacy_detail: Some(err.chars().take(4_000).collect()), } } fn extract_drift(err: &str) -> Option { // "migration N was previously applied but is missing in the resolved migrations" let idx = err.find(" was previously applied but is missing")?; let prefix = &err[..idx]; let mig = prefix.rsplit_once(' ').map(|(_, m)| m).unwrap_or(prefix); Some(mig.to_string()) } fn extract_modified(err: &str) -> Option { let idx = err.find(" was previously applied but has been modified")?; let prefix = &err[..idx]; let mig = prefix.rsplit_once(' ').map(|(_, m)| m).unwrap_or(prefix); Some(mig.to_string()) } fn extract_sqlstate(err: &str) -> Option { // Postgres errors surface as `... code: "42P01" ...` in the Debug // form sqlx produces. Be tolerant of the surrounding quoting. let idx = err.find("code: \"")?; let rest = &err[idx + 7..]; let end = rest.find('"')?; Some(rest[..end].to_string()) } /// `boot_smoke`: process exit info is the dominant signal. If the /// binary exited with a status during the smoke window, we map exit /// code 101 (Rust default for panic) to `BootPanic`, everything else /// to `BootExitedEarly`. If it never exited (stayed up), the caller /// constructs `PassNote::StayedUp` directly without consulting this. pub fn classify_boot_smoke(exit_code: Option) -> GateFailure { match exit_code { Some(101) => GateFailure::BootPanic { exit_code: Some(101) }, Some(c) if c < 0 => GateFailure::BootPanic { exit_code: Some(c) }, // killed by signal Some(c) => GateFailure::BootExitedEarly { exit_code: Some(c) }, None => GateFailure::BootExitedEarly { exit_code: None }, } } /// `Event::DeployFailed`: classify an anyhow chain produced by /// `deploy::deploy_node` into a typed `DeployFailureKind`. /// /// The anyhow chain is the `format!("{e:#}")` string the caller built, /// which joins each `.context(...)` layer with ": ". We probe for the /// contexts attached by `deploy_remote` (and well-known stderr patterns /// from ssh/rsync) in order of specificity. pub fn classify_deploy_error(err: &str) -> crate::outcome::DeployFailureKind { use crate::outcome::DeployFailureKind as K; // SSH-level transport failures bubble up under whatever context // their caller attached. Probe for the canonical OpenSSH stderr // patterns first so a "creating remote release dir: ... Connection // refused" doesn't get filed under NodeUnreachable's prose label. let unreachable_signals = [ "Connection refused", "Connection timed out", "Network is unreachable", "No route to host", "Could not resolve hostname", "Host key verification failed", "Permission denied (publickey", ]; if unreachable_signals.iter().any(|p| err.contains(p)) { return K::NodeUnreachable { detail: err.chars().take(400).collect() }; } // The contexts attached by `deploy_remote` (deploy.rs) are stable // strings; treat them as anchors. Order matters — "symlink swap + // systemctl" appears after a successful rsync, so probe rsync first // to avoid catching it under the swap heading. if err.contains("rsync failed") || err.contains("spawning rsync") { return K::RsyncFailed { detail: err.chars().take(400).collect() }; } if err.contains("creating remote release dir") { return K::NodeUnreachable { detail: err.chars().take(400).collect() }; } if err.contains("symlink swap + systemctl") { // Heuristic split inside the combined step: stderr containing // "systemctl" suggests the swap succeeded and the restart failed. if err.contains("systemctl") && !err.contains("ln:") { return K::ServiceRestartFailed { detail: err.chars().take(400).collect() }; } return K::SymlinkSwapFailed { detail: err.chars().take(400).collect() }; } if err.contains("symlink swap failed") { return K::SymlinkSwapFailed { detail: err.chars().take(400).collect() }; } K::Unclassified { detail: err.chars().take(400).collect() } } /// Concatenate stdout + stderr tails the way the legacy runner did, so /// `Unclassified.legacy_detail` looks like what operators are used to /// seeing in `gate_runs.detail` today. fn combined_tail_for_classifier(stdout: &[u8], stderr: &[u8]) -> String { let mut joined = Vec::with_capacity(stdout.len() + stderr.len() + 32); joined.extend_from_slice(b"==== stdout ====\n"); joined.extend_from_slice(stdout); if !stdout.last().is_some_and(|b| *b == b'\n') { joined.push(b'\n'); } joined.extend_from_slice(b"==== stderr ====\n"); joined.extend_from_slice(stderr); let s = String::from_utf8_lossy(&joined); if s.len() <= 4_000 { s.into_owned() } else { format!("...{}", &s[s.len() - 4_000..]) } } #[cfg(test)] mod tests { use super::*; #[test] fn cargo_test_extracts_failed_count() { let stdout = b"running 12 tests\n\ test foo ... ok\n\ test bar ... FAILED\n\ test baz ... FAILED\n\ \n\ failures:\n\ foo::bar\n\ foo::baz\n\ \n\ test result: FAILED. 10 passed; 2 failed; 0 ignored\n"; let GateFailure::CargoTest { failed_count, first_failed } = classify_cargo_test(stdout, b"") else { panic!("expected CargoTest variant"); }; assert_eq!(failed_count, 2); assert_eq!(first_failed.as_deref(), Some("foo::bar")); } #[test] fn cargo_test_compile_error_is_unclassified() { // No "test result:" line because cargo never got to running. let stderr = b"error[E0382]: borrow of moved value: `x`\n"; let f = classify_cargo_test(b"", stderr); match f { GateFailure::Unclassified { legacy_detail: Some(d) } => { assert!(d.contains("borrow of moved value")); } other => panic!("expected Unclassified, got {other:?}"), } } #[test] fn migration_drift_extracts_name() { let err = "migration 0047_widgets was previously applied but is missing in the resolved migrations"; let f = classify_migration_error(err, None); match f { GateFailure::MigrationDrift { migration } => assert_eq!(migration, "0047_widgets"), other => panic!("expected MigrationDrift, got {other:?}"), } } #[test] fn migration_modified_extracts_name() { let err = "migration 0042_seed was previously applied but has been modified"; let f = classify_migration_error(err, None); match f { GateFailure::MigrationModified { migration } => assert_eq!(migration, "0042_seed"), other => panic!("expected MigrationModified, got {other:?}"), } } #[test] fn migration_sql_error_extracts_sqlstate() { let err = r#"while executing migrations: error returned from database: code: "42P01" message: "relation \"widgets\" does not exist""#; let f = classify_migration_error(err, Some("0050_drop_widgets")); match f { GateFailure::MigrationSqlError { migration, sqlstate } => { assert_eq!(migration, "0050_drop_widgets"); assert_eq!(sqlstate.as_deref(), Some("42P01")); } other => panic!("expected MigrationSqlError, got {other:?}"), } } #[test] fn migration_unknown_error_is_unclassified() { let err = "something went wrong with the universe"; let f = classify_migration_error(err, None); match f { GateFailure::Unclassified { legacy_detail: Some(d) } => { assert!(d.contains("universe")); } other => panic!("expected Unclassified, got {other:?}"), } } #[test] fn boot_smoke_101_is_panic() { match classify_boot_smoke(Some(101)) { GateFailure::BootPanic { exit_code: Some(101) } => {} other => panic!("expected BootPanic(101), got {other:?}"), } } #[test] fn boot_smoke_signal_is_panic() { match classify_boot_smoke(Some(-9)) { GateFailure::BootPanic { exit_code: Some(-9) } => {} other => panic!("expected BootPanic(-9), got {other:?}"), } } #[test] fn boot_smoke_other_exit_is_exited_early() { match classify_boot_smoke(Some(2)) { GateFailure::BootExitedEarly { exit_code: Some(2) } => {} other => panic!("expected BootExitedEarly(2), got {other:?}"), } } #[test] fn deploy_connection_refused_is_node_unreachable() { use crate::outcome::DeployFailureKind as K; let err = "creating remote release dir: ssh testnot-1 failed: ssh: connect to host testnot-1 port 22: Connection refused"; match classify_deploy_error(err) { K::NodeUnreachable { .. } => {} other => panic!("expected NodeUnreachable, got {other:?}"), } } #[test] fn deploy_rsync_failure_is_rsync_failed() { use crate::outcome::DeployFailureKind as K; let err = "rsync failed (current symlink left intact): rsync: write failed on \"/srv/.../makenotwork\": No space left on device (28)"; match classify_deploy_error(err) { K::RsyncFailed { detail } => assert!(detail.contains("No space left")), other => panic!("expected RsyncFailed, got {other:?}"), } } #[test] fn deploy_systemctl_failure_is_service_restart_failed() { use crate::outcome::DeployFailureKind as K; // The combined "swap + restart" step where stderr mentions systemctl. let err = "symlink swap + systemctl reload-or-restart: ssh testnot-1 failed: Failed to restart makenotwork.service: Unit makenotwork.service failed to start"; match classify_deploy_error(err) { K::ServiceRestartFailed { .. } => {} other => panic!("expected ServiceRestartFailed, got {other:?}"), } } #[test] fn deploy_ln_failure_is_symlink_swap_failed() { use crate::outcome::DeployFailureKind as K; let err = "symlink swap + systemctl reload-or-restart: ssh testnot-1 failed: ln: failed to create symbolic link: Permission denied"; match classify_deploy_error(err) { K::SymlinkSwapFailed { .. } => {} other => panic!("expected SymlinkSwapFailed, got {other:?}"), } } #[test] fn deploy_unknown_is_unclassified() { use crate::outcome::DeployFailureKind as K; let err = "something went wrong in a way we did not anticipate"; match classify_deploy_error(err) { K::Unclassified { detail } => assert!(detail.contains("anticipate")), other => panic!("expected Unclassified, got {other:?}"), } } }