Skip to main content

max / makenotwork

server: route failed/lost-race upload confirms through the S3 orphan queue confirm_upload / version_confirm_upload deleted the S3 object directly when the DB tx rolled back or lost the CAS race. A concurrent double-confirm of the same key could have committed it onto the live row first, so a blind delete would destroy the object the winning confirm points at (404ing every fan download). Enqueue to the orphan queue instead, whose is_s3_key_live check skips any key a row still references. Outstanding working-tree changes committed as-is (not authored this session). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Author: Max Johnson <me@maxj.phd> · 2026-06-12 22:19 UTC
Commit: 8b0569849438108966db85a962dcda3b7f623544
Parent: 9c59e6d
2 files changed, +20 insertions, -4 deletions
@@ -270,8 +270,13 @@ pub(super) async fn confirm_upload(
270 270
271 271 match tx_result {
272 272 Err(e) => {
273 - // tx rolled back — storage counter unchanged. Just clean up S3.
274 - s3.delete_object(&req.s3_key).await.ok();
273 + // tx rolled back — storage counter unchanged. A concurrent
274 + // double-confirm of this same key could have committed it onto the
275 + // item row before our `try_apply_storage_on` errored (storage cap
276 + // filling in between), so a blind delete could destroy the live
277 + // object the winner points at. Route through the orphan queue; its
278 + // `is_s3_key_live` check skips any key a row still references.
279 + super::enqueue_s3_orphan(&state.db, &req.s3_key, "item_confirm_failed").await;
275 280 return Err(e);
276 281 }
277 282 Ok(0) => {
@@ -213,11 +213,22 @@ pub(super) async fn version_confirm_upload(
213 213
214 214 match committed {
215 215 Err(e) => {
216 - s3.delete_object(&req.s3_key).await.ok();
216 + // The tx rolled back, so nothing this request wrote references the
217 + // key — but a concurrent double-confirm of this same key could have
218 + // committed it onto the row before our `try_apply_storage_on`
219 + // errored (e.g. the storage cap filled in between). A blind delete
220 + // would then destroy the live object the winning confirm points at.
221 + // Route through the orphan queue, whose `is_s3_key_live` check skips
222 + // any key a row still references.
223 + super::enqueue_s3_orphan(&state.db, &req.s3_key, "version_confirm_failed").await;
217 224 return Err(e);
218 225 }
219 226 Ok(false) => {
220 - s3.delete_object(&req.s3_key).await.ok();
227 + // Lost the CAS race: a concurrent confirm swapped the version's
228 + // s3_key out from under us. If it committed THIS key, a direct
229 + // delete would 404 every fan download of the version the winner
230 + // just published. The orphan queue's liveness check is the guard.
231 + super::enqueue_s3_orphan(&state.db, &req.s3_key, "version_confirm_lost_race").await;
221 232 return Err(AppError::BadRequest(
222 233 "Version was modified concurrently. Please try uploading again.".to_string(),
223 234 ));