Skip to main content

max / makenotwork

server: make S3 orphan-deletion tests deterministic (drain the queue) Three storage tests asserted an S3 object was gone right after a delete/confirm, but since the Run #18 refactor the handler only ENQUEUES the delete to pending_s3_deletions — the actual delete is the scheduler's retry worker. The test harness runs no scheduler, so the object was never deleted in-test and the asserts failed. They last passed on a Sando build predating Run #18; v0.10.2 is the first rebuild to surface it (the custom-pages feature is unrelated). Adds a test-only synchronous queue drain (drain_pending_s3_deletions_for_test, re-exported from scheduler) exposed as TestHarness::drain_s3_deletions, mirroring drain_scan_jobs, and calls it before the deletion asserts. Pre-existing bug; not a feature change. Keeps v0.10.2. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Author: Max Johnson <me@maxj.phd> · 2026-06-13 19:06 UTC
Commit: e6972c1f05359508c312baa340e9144f1c3827c8
Parent: 28a7bbb
5 files changed, +62 insertions, -0 deletions
@@ -620,6 +620,47 @@ pub(super) async fn retry_pending_s3_deletions(state: &AppState) {
620 620 }
621 621 }
622 622
623 + /// Test-only synchronous drain of the pending-S3-deletion queue (`main` bucket).
624 + ///
625 + /// The S3 delete a confirm/delete handler triggers is asynchronous: handlers
626 + /// only [`enqueue_s3_orphan`](crate::routes::storage::enqueue_s3_orphan), and
627 + /// the scheduler's [`retry_pending_s3_deletions`] performs the actual delete
628 + /// later. Integration tests can't mint [`S3DeleteAuthority`] and don't run the
629 + /// scheduler, so a test that asserts an object is gone must force the queued
630 + /// deletes first. This runs them immediately against the given pool + storage,
631 + /// mirroring the scheduler's per-row guarded-delete path. Returns the number of
632 + /// objects actually deleted. Exposed via `TestHarness::drain_s3_deletions`.
633 + #[doc(hidden)]
634 + pub async fn drain_pending_s3_deletions_for_test(
635 + pool: &sqlx::PgPool,
636 + s3: &dyn crate::storage::StorageBackend,
637 + ) -> usize {
638 + let stale = match db::pending_s3_deletions::get_stale_pending(pool, chrono::Duration::zero(), 1000).await {
639 + Ok(rows) => rows,
640 + Err(_) => return 0,
641 + };
642 + let mut completed = Vec::new();
643 + let mut deleted = 0usize;
644 + for row in &stale {
645 + // Storage tests only exercise the main bucket; synckit needs its own s3.
646 + if row.bucket != "main" {
647 + continue;
648 + }
649 + match delete_orphan_key_guarded(pool, s3, &row.bucket, &row.s3_key).await {
650 + GuardedDelete::Deleted => {
651 + deleted += 1;
652 + completed.push(row.id);
653 + }
654 + GuardedDelete::SkippedLive => completed.push(row.id),
655 + GuardedDelete::Failed => {}
656 + }
657 + }
658 + if !completed.is_empty() {
659 + let _ = db::pending_s3_deletions::remove_completed(pool, &completed).await;
660 + }
661 + deleted
662 + }
663 +
623 664 #[cfg(test)]
624 665 mod tests {
625 666 use super::*;
@@ -7,6 +7,8 @@
7 7
8 8 mod announcements;
9 9 mod cleanup;
10 + #[doc(hidden)]
11 + pub use cleanup::drain_pending_s3_deletions_for_test;
10 12 mod integrity;
11 13 mod mt_threads;
12 14 mod synckit_warnings;
@@ -535,6 +535,16 @@ impl TestHarness {
535 535 panic!("drain_scan_jobs did not terminate within 256 iterations");
536 536 }
537 537
538 + /// Synchronously perform any queued S3 object deletions (`main` bucket).
539 + /// Handler-side deletes only enqueue to `pending_s3_deletions`; the actual
540 + /// delete is the scheduler's job in production. Tests that assert an object
541 + /// was removed from storage call this first (the deterministic mirror of
542 + /// the production retry worker). Returns the number of objects deleted.
543 + pub async fn drain_s3_deletions(&self) -> usize {
544 + let Some(storage) = self.storage.as_ref() else { return 0 };
545 + makenotwork::scheduler::drain_pending_s3_deletions_for_test(&self.db, storage.as_ref()).await
546 + }
547 +
538 548 /// Log in as an existing user via POST /login. The client's session
539 549 /// cookies are updated automatically.
540 550 pub async fn login(&mut self, login: &str, password: &str) {
@@ -244,6 +244,10 @@ async fn delete_media_file_decrements_storage() {
244 244 .unwrap();
245 245 assert_eq!(count, 0, "Media file should be deleted from DB");
246 246
247 + // The handler enqueues the S3 delete to the orphan queue; run it (the
248 + // scheduler's job in production) before asserting the object is gone.
249 + h.drain_s3_deletions().await;
250 +
247 251 // Verify file is gone from S3 (via trait method)
248 252 let s3_exists = makenotwork::storage::StorageBackend::object_exists(
249 253 h.storage.as_ref().unwrap().as_ref(),
@@ -625,6 +625,9 @@ async fn confirm_over_storage_cap_does_not_charge_or_leak() {
625 625
626 626 // Counter unchanged — a failed confirm never inflates storage.
627 627 assert_eq!(storage_used(&h, &user_id).await, parked, "failed confirm must not charge storage");
628 + // The rejected confirm enqueues the orphan for deletion; run the queue (the
629 + // scheduler's job in production) before asserting the object is gone.
630 + h.drain_s3_deletions().await;
628 631 // And the object was cleaned up, not leaked.
629 632 assert!(
630 633 !h.storage.as_ref().unwrap().object_exists(&s3_key).await.unwrap(),
@@ -655,6 +658,8 @@ async fn confirm_wrong_route_file_type_deletes_object_and_does_not_charge() {
655 658 assert_eq!(resp.status.as_u16(), 400, "misrouted file type must 400, got: {} {}", resp.status, resp.text);
656 659
657 660 assert_eq!(storage_used(&h, &user_id).await, 0, "misrouted confirm must charge nothing");
661 + // Run the orphan-deletion queue (scheduler's job in production) before the assert.
662 + h.drain_s3_deletions().await;
658 663 assert!(
659 664 !h.storage.as_ref().unwrap().object_exists(&s3_key).await.unwrap(),
660 665 "misrouted confirm must delete the object (it would otherwise leak — the scan_jobs/scan_status footgun the guard prevents)"