Skip to main content

max / makenotwork

3.1 KB · 115 lines History Blame Raw
1 //! Health monitor database operations.
2
3 use chrono::{DateTime, Utc};
4 use sqlx::PgPool;
5
6 /// Insert a health check snapshot into the history table.
7 #[tracing::instrument(skip_all)]
8 pub async fn insert_health_history(
9 pool: &PgPool,
10 status: &str,
11 db_ok: bool,
12 s3_ok: bool,
13 sessions_ok: bool,
14 check_duration_ms: i32,
15 details: Option<&serde_json::Value>,
16 ) -> Result<(), sqlx::Error> {
17 sqlx::query(
18 "INSERT INTO health_history (status, db_ok, s3_ok, sessions_ok, check_duration_ms, details)
19 VALUES ($1, $2, $3, $4, $5, $6)",
20 )
21 .bind(status)
22 .bind(db_ok)
23 .bind(s3_ok)
24 .bind(sessions_ok)
25 .bind(check_duration_ms)
26 .bind(details)
27 .execute(pool)
28 .await?;
29 Ok(())
30 }
31
32 /// A single health check snapshot from the database.
33 #[derive(Debug, sqlx::FromRow)]
34 #[allow(dead_code)] // Fields used by the FromRow derive and available for future display
35 pub struct DbHealthSnapshot {
36 pub checked_at: DateTime<Utc>,
37 pub status: String,
38 pub db_ok: bool,
39 pub s3_ok: bool,
40 pub sessions_ok: bool,
41 pub check_duration_ms: i32,
42 }
43
44 /// Fetch the most recent health check snapshots.
45 #[tracing::instrument(skip_all)]
46 pub async fn get_recent_health_history(
47 pool: &PgPool,
48 limit: i64,
49 ) -> Result<Vec<DbHealthSnapshot>, sqlx::Error> {
50 sqlx::query_as::<_, DbHealthSnapshot>(
51 "SELECT checked_at, status, db_ok, s3_ok, sessions_ok, check_duration_ms
52 FROM health_history
53 ORDER BY checked_at DESC
54 LIMIT $1",
55 )
56 .bind(limit)
57 .fetch_all(pool)
58 .await
59 }
60
61 /// Calculate uptime percentage over the last `hours` hours.
62 ///
63 /// Returns `None` if there are no records in the time window.
64 #[tracing::instrument(skip_all)]
65 pub async fn get_health_uptime_percent(
66 pool: &PgPool,
67 hours: i32,
68 ) -> Result<Option<f64>, sqlx::Error> {
69 let row: Option<(i64, i64)> = sqlx::query_as(
70 "SELECT
71 COUNT(*) AS total,
72 COUNT(*) FILTER (WHERE status = 'operational') AS up
73 FROM health_history
74 WHERE checked_at >= NOW() - make_interval(hours => $1)",
75 )
76 .bind(hours)
77 .fetch_optional(pool)
78 .await?;
79
80 Ok(row.and_then(|(total, up)| {
81 if total == 0 {
82 None
83 } else {
84 Some((up as f64 / total as f64) * 100.0)
85 }
86 }))
87 }
88
89 /// Find the most recent non-operational snapshot (for "last incident" display).
90 #[tracing::instrument(skip_all)]
91 pub async fn get_last_incident(
92 pool: &PgPool,
93 ) -> Result<Option<DateTime<Utc>>, sqlx::Error> {
94 sqlx::query_scalar(
95 "SELECT checked_at FROM health_history
96 WHERE status != 'operational'
97 ORDER BY checked_at DESC
98 LIMIT 1",
99 )
100 .fetch_optional(pool)
101 .await
102 }
103
104 /// Delete health history records older than `retain_days`.
105 #[tracing::instrument(skip_all)]
106 pub async fn prune_health_history(pool: &PgPool, retain_days: i64) -> Result<u64, sqlx::Error> {
107 let result = sqlx::query(
108 "DELETE FROM health_history WHERE checked_at < NOW() - make_interval(days => $1::int)",
109 )
110 .bind(retain_days as i32)
111 .execute(pool)
112 .await?;
113 Ok(result.rows_affected())
114 }
115