Skip to main content

max / audiofiles

data integrity: atomic export + remove ordering + temp leak Three independent failure-mode fixes in the write paths: - export/runner.rs: added write_atomic(dest, |tmp| ...) helper. Each format branch (Original copy, Wav, Aiff) now writes to dest.audiofiles_tmp then fs::renames into place. On any error the tmp is removed, so a killed export leaves no partial files in the user's export dir. - store.rs: SampleStore::remove now deletes the file before the DB row (was: DB first, then file). ENOENT on the file is tolerated. Failure mode flipped from silent orphan-blob accumulation to a visible dangling row the user can retry. Comment + assertion test renamed; added remove_tolerates_missing_file test. - state/import_workflow.rs: finalize_edit only ran remove_file on the success branch, leaking the edit-result temp on every import_file failure. Restructured to capture the import result, unconditionally remove the temp, then match. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
Author: Max Johnson <me@maxj.phd> · 2026-06-03 01:59 UTC
Commit: 158e3097cb897f56a287595c322b15b2f8a92d5a
Parent: cf13b8e
3 files changed, +74 insertions, -25 deletions
@@ -1284,8 +1284,14 @@ impl BrowserState {
1284 1284 operation: audiofiles_core::edit::EditOperation,
1285 1285 mode: super::EditResultMode,
1286 1286 ) {
1287 - // 1. Import the result file into the content-addressed store
1288 - let new_hash = match self.backend.import_file(&result_path) {
1287 + // 1. Import the result file into the content-addressed store. Always
1288 + // clean up the temp regardless of outcome — the prior code only removed
1289 + // on the success branch, leaking the temp on every import failure.
1290 + // (The temp is the user's edit output; after import_file copies it
1291 + // into the store, we own the canonical copy by hash.)
1292 + let import_result = self.backend.import_file(&result_path);
1293 + let _ = std::fs::remove_file(&result_path);
1294 + let new_hash = match import_result {
1289 1295 Ok(h) => h,
1290 1296 Err(e) => {
1291 1297 self.status = format!("Failed to import edit result: {e}");
@@ -1293,9 +1299,6 @@ impl BrowserState {
1293 1299 }
1294 1300 };
1295 1301
1296 - // Clean up temp file
1297 - let _ = std::fs::remove_file(&result_path);
1298 -
1299 1302 // 2. Record in edit_history
1300 1303 let _ = self.backend.record_edit_history(&source_hash, &new_hash, &operation);
1301 1304
@@ -173,6 +173,31 @@ pub fn run_export(
173 173 Ok(ExportSummary { total, errors })
174 174 }
175 175
176 + /// Returns `dest` with `.audiofiles_tmp` appended. Two exports racing on the
177 + /// same dest would both pick the same tmp path, but `resolve_collision` already
178 + /// rules that out by the time we get here — each call has a unique dest.
179 + fn tmp_path_for(dest: &Path) -> PathBuf {
180 + let mut s = dest.as_os_str().to_owned();
181 + s.push(".audiofiles_tmp");
182 + PathBuf::from(s)
183 + }
184 +
185 + /// Run `write` against a temporary path, then `fs::rename` into `dest`. On any
186 + /// failure, remove the tmp file so a killed export leaves no partial files in
187 + /// the user's chosen export directory.
188 + fn write_atomic(dest: &Path, write: impl FnOnce(&Path) -> Result<()>) -> Result<()> {
189 + let tmp = tmp_path_for(dest);
190 + if let Err(e) = write(&tmp) {
191 + let _ = fs::remove_file(&tmp);
192 + return Err(e);
193 + }
194 + if let Err(e) = fs::rename(&tmp, dest) {
195 + let _ = fs::remove_file(&tmp);
196 + return Err(io_err(dest, e));
197 + }
198 + Ok(())
199 + }
200 +
176 201 /// Export a single item: either copy original or decode+convert+encode.
177 202 fn export_single_item(
178 203 source: &Path,
@@ -184,8 +209,10 @@ fn export_single_item(
184 209 ExportFormat::Original => {
185 210 // Always copy — hardlinks would let external edits mutate the
186 211 // content-addressed store, corrupting the archive.
187 - fs::copy(source, dest).map_err(|e| io_err(dest, e))?;
188 - Ok(())
212 + write_atomic(dest, |tmp| {
213 + fs::copy(source, tmp).map_err(|e| io_err(tmp, e))?;
214 + Ok(())
215 + })
189 216 }
190 217 ExportFormat::Wav => {
191 218 let decoded = decode::decode_multichannel(source)?;
@@ -197,8 +224,7 @@ fn export_single_item(
197 224 config.sample_rate,
198 225 )?;
199 226 let bit_depth = config.bit_depth.unwrap_or(24);
200 - encode::encode_wav(&converted, bit_depth, dest)?;
201 - Ok(())
227 + write_atomic(dest, |tmp| encode::encode_wav(&converted, bit_depth, tmp))
202 228 }
203 229 ExportFormat::Aiff => {
204 230 let decoded = decode::decode_multichannel(source)?;
@@ -210,8 +236,7 @@ fn export_single_item(
210 236 config.sample_rate,
211 237 )?;
212 238 let bit_depth = config.bit_depth.unwrap_or(24);
213 - encode_aiff::encode_aiff(&converted, bit_depth, dest)?;
214 - Ok(())
239 + write_atomic(dest, |tmp| encode_aiff::encode_aiff(&converted, bit_depth, tmp))
215 240 }
216 241 }
217 242 }
@@ -184,25 +184,27 @@ impl SampleStore {
184 184
185 185 /// Remove a sample from store and database. CASCADE handles VFS/tag refs.
186 186 ///
187 - /// Deletes the DB row first, then removes the file from disk. This ordering
188 - /// ensures that if the file deletion fails, the only residue is an orphaned
189 - /// blob on disk (harmless, can be cleaned up later). The reverse order would
190 - /// risk deleting the file while the DB row still references it.
187 + /// Deletes the file from disk first, then the DB row. If the file delete
188 + /// fails (in-use on Windows, permission denied, etc.), the DB row is left
189 + /// intact so the user can retry; nothing leaks. The reverse order would
190 + /// silently leak orphan blobs on every file-delete failure, accumulating
191 + /// disk waste invisible to the user.
191 192 #[instrument(skip_all)]
192 193 pub fn remove(&self, hash: &str, db: &Database) -> Result<()> {
193 - // Look up extension before deleting the row
194 194 let ext = sample_extension(db, hash)?;
195 195 let path = self.sample_path(hash, &ext)?;
196 196
197 - // Delete DB row first (CASCADE handles tags, vfs_nodes, etc.)
197 + // File first. ENOENT is fine — the row was already pointing at nothing.
198 + match fs::remove_file(&path) {
199 + Ok(()) => {}
200 + Err(e) if e.kind() == std::io::ErrorKind::NotFound => {}
201 + Err(e) => return Err(io_err(&path, e)),
202 + }
203 +
204 + // Then the DB row (CASCADE handles tags, vfs_nodes, etc.).
198 205 db.conn()
199 206 .execute("DELETE FROM samples WHERE hash = ?1", [hash])?;
200 207
201 - // Then delete the file — an orphaned blob is harmless if this fails
202 - if path.exists() {
203 - fs::remove_file(&path).map_err(|e| io_err(&path, e))?;
204 - }
205 -
206 208 Ok(())
207 209 }
208 210
@@ -846,10 +848,29 @@ mod tests {
846 848 }
847 849
848 850 #[test]
849 - fn remove_deletes_db_row_before_file() {
851 + fn remove_tolerates_missing_file() {
852 + // If the blob has already been deleted out from under us (manual rm,
853 + // crash mid-remove, etc.), the DB row should still be cleaned up.
854 + let (dir, db, store) = setup();
855 + let src = create_test_file(&dir, "ghost.wav", b"ghost data");
856 + let hash = store.import(&src, &db).unwrap();
857 + let stored = store.sample_path(&hash, "wav").unwrap();
858 + fs::remove_file(&stored).unwrap();
859 +
860 + store.remove(&hash, &db).unwrap();
861 +
862 + let count: i64 = db
863 + .conn()
864 + .query_row("SELECT COUNT(*) FROM samples", [], |row| row.get(0))
865 + .unwrap();
866 + assert_eq!(count, 0);
867 + }
868 +
869 + #[test]
870 + fn remove_deletes_file_before_db_row() {
850 871 // Verify that after remove(), both the DB row and the file are gone.
851 - // The ordering guarantee (DB first, then file) means an orphaned blob
852 - // is the only possible failure mode — never a dangling DB reference.
872 + // The ordering guarantee (file first, then DB row) means a dangling DB
873 + // row is the only possible failure mode — never an orphaned blob.
853 874 let (dir, db, store) = setup();
854 875 let src = create_test_file(&dir, "tom.wav", b"tom data");
855 876