| 1 |
|
| 2 |
|
| 3 |
use crate::db; |
| 4 |
use crate::AppState; |
| 5 |
|
| 6 |
|
| 7 |
const WEBHOOK_MAX_RETRIES: i32 = 5; |
| 8 |
|
| 9 |
|
| 10 |
pub(super) fn is_webhook_dead(attempt: i32) -> bool { |
| 11 |
attempt >= WEBHOOK_MAX_RETRIES |
| 12 |
} |
| 13 |
|
| 14 |
|
| 15 |
pub(super) async fn retry_failed_webhooks(state: &AppState) { |
| 16 |
let events = match db::webhook_events::get_retryable_events(&state.db).await { |
| 17 |
Ok(e) if e.is_empty() => return, |
| 18 |
Ok(e) => e, |
| 19 |
Err(e) => { |
| 20 |
tracing::error!(error = ?e, "failed to fetch retryable webhook events"); |
| 21 |
return; |
| 22 |
} |
| 23 |
}; |
| 24 |
|
| 25 |
if state.stripe.is_none() { return; } |
| 26 |
|
| 27 |
for event in events { |
| 28 |
let attempt = event.attempts + 1; |
| 29 |
tracing::info!( |
| 30 |
event_id = %event.id, source = %event.source, event_type = %event.event_type, |
| 31 |
attempt = attempt, "retrying webhook event" |
| 32 |
); |
| 33 |
|
| 34 |
|
| 35 |
|
| 36 |
|
| 37 |
let result = if event.source == "stripe" { |
| 38 |
match crate::payments::UntypedEvent::from_payload(&event.payload) { |
| 39 |
Ok(parsed) => { |
| 40 |
let crate::payments::UntypedEvent { id, type_, data_object } = parsed; |
| 41 |
crate::routes::stripe::process_webhook_event(state, &type_, &id, data_object).await |
| 42 |
} |
| 43 |
Err(e) => Err(e), |
| 44 |
} |
| 45 |
} else { |
| 46 |
Err(crate::error::AppError::BadRequest(format!("Unknown webhook source: {}", event.source))) |
| 47 |
}; |
| 48 |
|
| 49 |
match result { |
| 50 |
Ok(()) => { |
| 51 |
tracing::info!(event_id = %event.id, "webhook retry succeeded"); |
| 52 |
if let Err(e) = db::webhook_events::mark_processed(&state.db, event.id).await { |
| 53 |
tracing::error!(error = ?e, "failed to mark webhook event as processed"); |
| 54 |
} |
| 55 |
} |
| 56 |
Err(e) => { |
| 57 |
let is_dead = is_webhook_dead(attempt); |
| 58 |
tracing::warn!( |
| 59 |
event_id = %event.id, attempt = attempt, error = ?e, |
| 60 |
dead = is_dead, |
| 61 |
"webhook retry failed" |
| 62 |
); |
| 63 |
if let Err(e) = db::webhook_events::schedule_retry( |
| 64 |
&state.db, event.id, attempt, &format!("{:?}", e), |
| 65 |
).await { |
| 66 |
tracing::error!(error = ?e, "failed to schedule webhook retry"); |
| 67 |
} |
| 68 |
|
| 69 |
if is_dead |
| 70 |
&& let Some(ref wam) = state.wam |
| 71 |
{ |
| 72 |
let title = format!( |
| 73 |
"Dead webhook: {} ({})", |
| 74 |
event.event_type, event.id |
| 75 |
); |
| 76 |
let body = format!( |
| 77 |
"Webhook event exhausted all {} retry attempts.\n\ |
| 78 |
Source: {}\nType: {}\nLast error: {:?}", |
| 79 |
attempt, event.source, event.event_type, e, |
| 80 |
); |
| 81 |
wam.create_ticket( |
| 82 |
&title, |
| 83 |
Some(&body), |
| 84 |
"high", |
| 85 |
"webhook-dead-letter", |
| 86 |
Some(&event.id.to_string()), |
| 87 |
) |
| 88 |
.await; |
| 89 |
} |
| 90 |
} |
| 91 |
} |
| 92 |
} |
| 93 |
} |
| 94 |
|
| 95 |
|
| 96 |
pub(super) async fn escalate_stale_refunds(state: &AppState) { |
| 97 |
let stale = match db::pending_refunds::get_stale_refunds( |
| 98 |
&state.db, |
| 99 |
chrono::Duration::hours(24), |
| 100 |
) |
| 101 |
.await |
| 102 |
{ |
| 103 |
Ok(s) if s.is_empty() => return, |
| 104 |
Ok(s) => s, |
| 105 |
Err(e) => { |
| 106 |
tracing::error!(error = ?e, "failed to query stale pending refunds"); |
| 107 |
return; |
| 108 |
} |
| 109 |
}; |
| 110 |
|
| 111 |
let alert_email = std::env::var("ALERT_EMAIL").ok(); |
| 112 |
|
| 113 |
for refund in &stale { |
| 114 |
|
| 115 |
if let Err(e) = db::pending_refunds::mark_escalated(&state.db, refund.id).await { |
| 116 |
tracing::error!(error = ?e, "failed to mark pending refund as escalated, skipping alerts"); |
| 117 |
continue; |
| 118 |
} |
| 119 |
|
| 120 |
tracing::error!( |
| 121 |
payment_intent_id = %refund.payment_intent_id, |
| 122 |
amount = refund.amount.as_i64(), |
| 123 |
amount_refunded = refund.amount_refunded.as_i64(), |
| 124 |
created_at = %refund.created_at, |
| 125 |
"STALE PENDING REFUND: unmatched for >24h, needs manual investigation" |
| 126 |
); |
| 127 |
|
| 128 |
if let Some(ref to) = alert_email { |
| 129 |
let subject = format!( |
| 130 |
"Unmatched refund: {} ({}c refunded)", |
| 131 |
refund.payment_intent_id, refund.amount_refunded |
| 132 |
); |
| 133 |
let body = format!( |
| 134 |
"A charge.refunded webhook for payment intent {} has been pending for >24 hours \ |
| 135 |
with no matching completed transaction.\n\n\ |
| 136 |
Amount: {}c\nAmount refunded: {}c\nReceived: {}\n\n\ |
| 137 |
This likely means the checkout.session.completed webhook was lost. \ |
| 138 |
Check the Stripe dashboard and reconcile manually.", |
| 139 |
refund.payment_intent_id, |
| 140 |
refund.amount, |
| 141 |
refund.amount_refunded, |
| 142 |
refund.created_at, |
| 143 |
); |
| 144 |
if let Err(e) = state.email.send_alert(to, &subject, &body).await { |
| 145 |
tracing::error!(error = ?e, "failed to send stale refund alert email"); |
| 146 |
} |
| 147 |
} |
| 148 |
|
| 149 |
if let Some(ref wam) = state.wam { |
| 150 |
let title = format!( |
| 151 |
"Unmatched refund: {} ({}c)", |
| 152 |
refund.payment_intent_id, refund.amount_refunded |
| 153 |
); |
| 154 |
let body = format!( |
| 155 |
"charge.refunded webhook pending >24h with no matching completed transaction.\n\ |
| 156 |
Amount: {}c\nRefunded: {}c\nReceived: {}\n\ |
| 157 |
Check Stripe dashboard and reconcile manually.", |
| 158 |
refund.amount, refund.amount_refunded, refund.created_at, |
| 159 |
); |
| 160 |
wam.create_ticket( |
| 161 |
&title, |
| 162 |
Some(&body), |
| 163 |
"critical", |
| 164 |
"refund-escalation", |
| 165 |
Some(&refund.payment_intent_id), |
| 166 |
) |
| 167 |
.await; |
| 168 |
} |
| 169 |
} |
| 170 |
} |
| 171 |
|
| 172 |
#[cfg(test)] |
| 173 |
mod tests { |
| 174 |
use super::*; |
| 175 |
|
| 176 |
#[test] |
| 177 |
fn webhook_not_dead_under_threshold() { |
| 178 |
assert!(!is_webhook_dead(1)); |
| 179 |
assert!(!is_webhook_dead(4)); |
| 180 |
} |
| 181 |
|
| 182 |
#[test] |
| 183 |
fn webhook_dead_at_threshold() { |
| 184 |
assert!(is_webhook_dead(5)); |
| 185 |
} |
| 186 |
|
| 187 |
#[test] |
| 188 |
fn webhook_dead_above_threshold() { |
| 189 |
assert!(is_webhook_dead(6)); |
| 190 |
assert!(is_webhook_dead(100)); |
| 191 |
} |
| 192 |
} |
| 193 |
|