v0.4.2: Graceful migration failure handling
Replace panic on migration failure with clean exit (code 2) + WAM alert.
systemd RestartPreventExitStatus=2 prevents crash loops. Learned from
v0.4.1 deploy where duplicate migration 067 caused ~4min of restarts.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
3 files changed,
+28 insertions,
-6 deletions
| 1 |
1 |
|
[package]
|
| 2 |
2 |
|
name = "makenotwork"
|
| 3 |
|
- |
version = "0.4.1"
|
|
3 |
+ |
version = "0.4.2"
|
| 4 |
4 |
|
edition = "2024"
|
| 5 |
5 |
|
license-file = "LICENSE"
|
| 6 |
6 |
|
|
| 22 |
22 |
|
ExecStart=/opt/makenotwork/makenotwork
|
| 23 |
23 |
|
Restart=always
|
| 24 |
24 |
|
RestartSec=5
|
|
25 |
+ |
# Exit code 2 = migration failure. Don't restart — operator must intervene.
|
|
26 |
+ |
RestartPreventExitStatus=2
|
| 25 |
27 |
|
|
| 26 |
28 |
|
# Environment file with secrets
|
| 27 |
29 |
|
EnvironmentFile=/opt/makenotwork/.env
|
| 69 |
69 |
|
|
| 70 |
70 |
|
tracing::info!("Database connected");
|
| 71 |
71 |
|
|
| 72 |
|
- |
// Run migrations
|
| 73 |
|
- |
sqlx::migrate!("./migrations")
|
| 74 |
|
- |
.run(&db)
|
| 75 |
|
- |
.await
|
| 76 |
|
- |
.expect("Failed to run migrations");
|
|
72 |
+ |
// Run migrations — exit cleanly (code 2) on failure instead of panicking.
|
|
73 |
+ |
// This prevents systemd from crash-looping on migration errors (e.g. version
|
|
74 |
+ |
// conflicts after a bad deploy). WAM ticket alerts the operator.
|
|
75 |
+ |
if let Err(e) = sqlx::migrate!("./migrations").run(&db).await {
|
|
76 |
+ |
tracing::error!(error = %e, "Migration failed — exiting without restart");
|
|
77 |
+ |
|
|
78 |
+ |
// Best-effort WAM alert (DB is up, WAM may be reachable)
|
|
79 |
+ |
if let Ok(wam_url) = std::env::var("WAM_URL") {
|
|
80 |
+ |
let body = format!("Migration error on startup:\n{e}");
|
|
81 |
+ |
let _ = reqwest::Client::new()
|
|
82 |
+ |
.post(format!("{wam_url}/tickets"))
|
|
83 |
+ |
.json(&serde_json::json!({
|
|
84 |
+ |
"title": "Migration failure — server not starting",
|
|
85 |
+ |
"body": body,
|
|
86 |
+ |
"priority": "critical",
|
|
87 |
+ |
"source": "migration-failure",
|
|
88 |
+ |
}))
|
|
89 |
+ |
.timeout(std::time::Duration::from_secs(5))
|
|
90 |
+ |
.send()
|
|
91 |
+ |
.await;
|
|
92 |
+ |
}
|
|
93 |
+ |
|
|
94 |
+ |
// Exit code 2 — systemd configured not to restart on this code
|
|
95 |
+ |
std::process::exit(2);
|
|
96 |
+ |
}
|
| 77 |
97 |
|
|
| 78 |
98 |
|
tracing::info!("Migrations complete");
|
| 79 |
99 |
|
|