//! Content-addressed sample storage: imports files by SHA-256 hash, deduplicates, and manages on-disk blobs. //! //! ## Why content-addressed storage //! //! - **Dedup by design:** Importing the same file twice is a no-op (same hash = same row). //! Users often have the same sample in multiple folders. //! - **Sync-friendly:** Hash is a stable, globally unique identifier across devices. No UUID //! collisions, no server-assigned IDs, no coordination needed during offline edits. //! - **Cloud eviction:** Setting `cloud_only=true` deletes the local blob while keeping the //! metadata row. The hash lets SyncKit re-download the exact file from blob storage later. use std::fs; use std::io::Read; use std::path::{Path, PathBuf}; use sha2::{Digest, Sha256}; use symphonia::core::formats::FormatOptions; use symphonia::core::io::MediaSourceStream; use symphonia::core::meta::MetadataOptions; use symphonia::core::probe::Hint; use crate::db::Database; use crate::error::{io_err, unix_now, CoreError, Result}; use tracing::instrument; /// Probe an audio file to extract its duration from metadata/headers. /// Returns `None` if the duration cannot be determined without full decode. fn probe_duration(path: &Path) -> Option { let file = fs::File::open(path).ok()?; let mss = MediaSourceStream::new(Box::new(file), Default::default()); let mut hint = Hint::new(); if let Some(ext) = path.extension().and_then(|e| e.to_str()) { hint.with_extension(ext); } if let Ok(probed) = symphonia::default::get_probe() .format(&hint, mss, &FormatOptions::default(), &MetadataOptions::default()) { let track = probed.format.default_track()?; let time_base = track.codec_params.time_base?; let n_frames = track.codec_params.n_frames?; let duration = time_base.calc_time(n_frames); return Some(duration.seconds as f64 + duration.frac); } // Fallback for WAV files Symphonia rejects (non-standard fmt chunk sizes) let is_wav = path.extension().and_then(|e| e.to_str()) .is_some_and(|e| e.eq_ignore_ascii_case("wav")); if is_wav { let reader = hound::WavReader::open(path).ok()?; let spec = reader.spec(); let n_samples = reader.len() as f64; let frames = n_samples / spec.channels as f64; return Some(frames / spec.sample_rate as f64); } None } /// Validate that a file extension contains only safe characters. /// /// Allows alphanumeric, dots, and hyphens (covers wav, mp3, flac, aiff, ogg, /// tar.gz, etc.). Rejects path separators, null bytes, and anything else that /// could be used for directory traversal. pub fn validate_extension(ext: &str) -> Result<()> { if !ext.is_empty() && !ext .bytes() .all(|b| b.is_ascii_alphanumeric() || b == b'.' || b == b'-') { return Err(CoreError::Internal(format!( "invalid file extension: {ext:?}" ))); } Ok(()) } /// Validate that a hash string is exactly 64 lowercase hex characters (SHA-256). pub fn validate_hash(hash: &str) -> Result<()> { if hash.len() != 64 || !hash.bytes().all(|b| b.is_ascii_hexdigit() && !b.is_ascii_uppercase()) { return Err(CoreError::HashInvalid(format!( "expected 64 lowercase hex chars, got {:?} ({} chars)", hash, hash.len() ))); } Ok(()) } /// Manages on-disk sample blobs in a flat directory structure, storing files as /// `{sha256_hex}.{ext}` directly in the root directory. /// /// Deduplication via SHA-256: import streams the file through a hasher and skips /// the copy if a blob with the same hash already exists. /// /// Thread-safe: all operations are stateless reads/writes against the filesystem /// (no interior mutability or locks). pub struct SampleStore { root: PathBuf, } impl SampleStore { /// Create a new sample store, ensuring the root directory exists. #[instrument(skip_all)] pub fn new(root: impl Into) -> Result { let root = root.into(); fs::create_dir_all(&root).map_err(|e| io_err(&root, e))?; Ok(Self { root }) } /// Import a file into the store: hash it, copy to content-addressed path, /// insert into DB. Returns the hex SHA-256 hash. #[instrument(skip_all)] pub fn import(&self, path: &Path, db: &Database) -> Result { if !crate::util::is_audio_file(path) { return Err(CoreError::Internal(format!( "not a supported audio file: {}", path.display() ))); } let mut file = fs::File::open(path).map_err(|e| io_err(path, e))?; let metadata = file.metadata().map_err(|e| io_err(path, e))?; let file_size = metadata.len() as i64; if file_size == 0 { return Err(CoreError::Internal(format!( "cannot import zero-byte file: {}", path.display() ))); } // Stream through SHA-256 let mut hasher = Sha256::new(); let mut buf = [0u8; 8192]; loop { let n = file.read(&mut buf).map_err(|e| io_err(path, e))?; if n == 0 { break; } hasher.update(&buf[..n]); } let hash = format!("{:x}", hasher.finalize()); // Resolve the blob extension from an existing row if this hash is already // known. The store is content-addressed (one blob per hash), so identical // bytes imported under a different extension must reuse the existing blob // rather than write a second `{hash}.{newext}` file: remove() resolves the // blob path from the DB row, so any divergent-extension copy would be // unreachable and leak disk forever. Query without the deleted_at filter so // the blob naming stays consistent even for soft-deleted rows. Fresh import // falls back to the incoming file's extension. let ext = match db.conn().query_row( "SELECT file_extension FROM samples WHERE hash = ?1", [&hash], |row| row.get::<_, String>(0), ) { Ok(existing) => existing, Err(rusqlite::Error::QueryReturnedNoRows) => crate::util::get_extension(path), Err(e) => return Err(CoreError::Db(e)), }; let original_name = crate::util::get_filename(path, "unknown"); // Probe duration from file headers (cheap, no full decode) let duration = probe_duration(path); // Copy file to store if not already present let dest = self.sample_path(&hash, &ext)?; if !dest.exists() { fs::copy(path, &dest).map_err(|e| io_err(&dest, e))?; } // Insert into DB (ignore if hash already exists) let now = unix_now(); db.conn().execute( "INSERT OR IGNORE INTO samples (hash, original_name, file_extension, file_size, import_date, last_modified, duration) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)", rusqlite::params![hash, original_name, ext, file_size, now, now, duration], )?; Ok(hash) } /// Check if a sample file exists in the store. pub fn exists(&self, hash: &str, ext: &str) -> Result { Ok(self.sample_path(hash, ext)?.exists()) } /// Get the filesystem path for a sample. /// /// Validates that `hash` is exactly 64 lowercase hex characters (SHA-256) /// to prevent directory traversal or malformed paths. pub fn sample_path(&self, hash: &str, ext: &str) -> Result { validate_hash(hash)?; validate_extension(ext)?; if ext.is_empty() { Ok(self.root.join(hash)) } else { Ok(self.root.join(format!("{hash}.{ext}"))) } } /// Remove a sample from store and database. CASCADE handles VFS/tag refs. /// /// Deletes the file from disk first, then the DB row. If the file delete /// fails (in-use on Windows, permission denied, etc.), the DB row is left /// intact so the user can retry; nothing leaks. The reverse order would /// silently leak orphan blobs on every file-delete failure, accumulating /// disk waste invisible to the user. #[instrument(skip_all)] pub fn remove(&self, hash: &str, db: &Database) -> Result<()> { let ext = sample_extension(db, hash)?; let path = self.sample_path(hash, &ext)?; // File first. ENOENT is fine — the row was already pointing at nothing. match fs::remove_file(&path) { Ok(()) => {} Err(e) if e.kind() == std::io::ErrorKind::NotFound => {} Err(e) => return Err(io_err(&path, e)), } // Then the DB row (CASCADE handles tags, vfs_nodes, etc.). db.conn() .execute("DELETE FROM samples WHERE hash = ?1", [hash])?; Ok(()) } /// Remove samples that are no longer referenced by any VFS node. /// /// Returns the number of orphaned samples removed. Each orphan is deleted /// from the database first (CASCADE handles tags, analysis, etc.), then the /// file blob is removed from disk. #[instrument(skip_all)] pub fn remove_orphaned_samples(&self, db: &Database) -> Result { // Skip tombstoned rows — the eventual hard-delete sweep // (docs/design-sample-deletion.md Phase 4) will get them when the // retention window expires. Double-processing here would race the // sweep and pre-emptively destroy still-recoverable data. let mut stmt = db.conn().prepare( "SELECT s.hash, s.file_extension FROM samples s LEFT JOIN vfs_nodes vn ON s.hash = vn.sample_hash WHERE vn.id IS NULL AND s.deleted_at IS NULL", )?; let orphans: Vec<(String, String)> = stmt .query_map([], |row| Ok((row.get(0)?, row.get(1)?)))? .collect::, _>>()?; let count = orphans.len(); // Delete all orphan DB rows in a single transaction so a concurrent // VFS link can't reference a sample between query and delete. db.transaction(|| { for (hash, _) in &orphans { db.conn() .execute("DELETE FROM samples WHERE hash = ?1", [hash])?; } Ok(()) })?; // Remove files after the transaction (orphaned blobs are harmless if this fails) for (hash, ext) in &orphans { if let Ok(path) = self.sample_path(hash, ext) && path.exists() { let _ = fs::remove_file(&path); } } Ok(count) } /// Get the store root directory. pub fn root(&self) -> &Path { &self.root } /// Re-hash a stored sample and compare against the expected hash. /// /// Returns `Ok(true)` if the file's SHA-256 matches `hash`, `Ok(false)` if /// it differs. Returns an error if the file cannot be read. #[instrument(skip_all)] pub fn verify_sample(&self, hash: &str, ext: &str) -> Result { let path = self.sample_path(hash, ext)?; let mut file = fs::File::open(&path).map_err(|e| io_err(&path, e))?; let mut hasher = Sha256::new(); let mut buf = [0u8; 8192]; loop { let n = file.read(&mut buf).map_err(|e| io_err(&path, e))?; if n == 0 { break; } hasher.update(&buf[..n]); } let computed = format!("{:x}", hasher.finalize()); Ok(computed == hash) } } // --- Sample metadata queries --- /// Helper to query a single text column from the samples table by hash. /// /// Only fields in the allowlist may be queried; any other value returns an error /// to prevent SQL injection through the interpolated column name. fn query_sample_field(db: &Database, hash: &str, field: &str) -> Result { const ALLOWED_FIELDS: &[&str] = &["file_extension", "original_name"]; if !ALLOWED_FIELDS.contains(&field) { return Err(CoreError::Internal(format!( "query_sample_field: disallowed field {field:?}" ))); } let sql = format!("SELECT {field} FROM samples WHERE hash = ?1 AND deleted_at IS NULL"); db.conn() .query_row(&sql, [hash], |row| row.get(0)) .map_err(|e| match e { rusqlite::Error::QueryReturnedNoRows => { CoreError::SampleNotFound(hash.to_string()) } other => CoreError::Db(other), }) } /// Look up the file extension for a sample by its hash. pub fn sample_extension(db: &Database, hash: &str) -> Result { query_sample_field(db, hash, "file_extension") } /// Look up the original filename for a sample by its hash. pub fn sample_original_name(db: &Database, hash: &str) -> Result { query_sample_field(db, hash, "original_name") } // --- Loose-files mode --- /// Look up the source_path for a sample (loose-files mode imports only). /// /// Returns `Ok(None)` for normal-mode samples (source_path is NULL). pub fn sample_source_path(db: &Database, hash: &str) -> Result> { db.conn() .query_row( "SELECT source_path FROM samples WHERE hash = ?1 AND deleted_at IS NULL", [hash], |row| row.get(0), ) .map_err(|e| match e { rusqlite::Error::QueryReturnedNoRows => CoreError::SampleNotFound(hash.to_string()), other => CoreError::Db(other), }) } /// Resolve the actual file path for a sample, checking source_path first. /// /// For loose-files mode samples (source_path is set), returns the source path if /// the file exists, otherwise falls back to the store path. For normal samples, /// returns the store path directly. pub fn resolve_file_path(store: &SampleStore, db: &Database, hash: &str, ext: &str) -> Result { if let Some(sp) = sample_source_path(db, hash)? { let source = PathBuf::from(&sp); if source.exists() { return Ok(source); } // Fallback: maybe user re-imported in normal mode or placed file manually let store_path = store.sample_path(hash, ext)?; if store_path.exists() { return Ok(store_path); } // Return the source path anyway — caller will handle the "not found" return Ok(source); } store.sample_path(hash, ext) } /// Update the source_path for a sample after verifying the new file's hash matches. /// /// Used to relocate an loose-files mode sample whose original file has moved. pub fn relocate_sample( store: &SampleStore, db: &Database, hash: &str, new_path: &Path, ) -> Result<()> { // Verify hash matches let mut file = fs::File::open(new_path).map_err(|e| io_err(new_path, e))?; let mut hasher = Sha256::new(); let mut buf = [0u8; 8192]; loop { let n = file.read(&mut buf).map_err(|e| io_err(new_path, e))?; if n == 0 { break; } hasher.update(&buf[..n]); } let computed = format!("{:x}", hasher.finalize()); if computed != hash { return Err(CoreError::Internal(format!( "hash mismatch: expected {hash}, got {computed} — this is a different file" ))); } let abs_path = new_path .canonicalize() .map_err(|e| io_err(new_path, e))? .to_string_lossy() .to_string(); let changed = db.conn().execute( "UPDATE samples SET source_path = ?1 WHERE hash = ?2", rusqlite::params![abs_path, hash], )?; if changed == 0 { return Err(CoreError::SampleNotFound(hash.to_string())); } let _ = store; // unused but passed for API consistency Ok(()) } /// Check integrity of loose-files mode samples. /// /// Returns `(valid, missing)` — counts of source_path entries where the file /// exists vs. does not exist on disk. pub fn check_loose_files_integrity(db: &Database) -> Result<(usize, usize)> { let mut stmt = db.conn().prepare( "SELECT source_path FROM samples WHERE source_path IS NOT NULL AND deleted_at IS NULL", )?; let paths: Vec = stmt .query_map([], |row| row.get(0))? .collect::, _>>()?; let mut valid = 0; let mut missing = 0; for p in &paths { if Path::new(p).exists() { valid += 1; } else { missing += 1; } } Ok((valid, missing)) } /// Try to re-locate loose-files mode samples whose source files have moved. /// /// Walks `search_root` recursively, building a basename map of candidate /// files. For each missing sample, looks up its basename, then hash-verifies /// candidates (cheapest: size-check before re-hashing the full file). On hash /// match, the sample's `source_path` is updated to the new location. /// /// Returns `(relocated, still_missing)`. `relocated` counts samples whose /// `source_path` was successfully repointed; `still_missing` is the residual /// count for the dialog to surface so the user can run Locate again against a /// different directory. pub fn relocate_missing_loose_files( db: &Database, search_root: &Path, ) -> Result<(usize, usize)> { // 1. Gather missing samples — hash, basename of stored source_path, and // the recorded file_size (used for cheap pre-filter before re-hashing). let mut stmt = db.conn().prepare( "SELECT hash, source_path, file_size FROM samples \ WHERE source_path IS NOT NULL AND deleted_at IS NULL", )?; let rows: Vec<(String, String, i64)> = stmt .query_map([], |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)))? .collect::, _>>()?; let missing: Vec<(String, String, i64)> = rows .into_iter() .filter(|(_, source_path, _)| !Path::new(source_path).exists()) .collect(); if missing.is_empty() { return Ok((0, 0)); } // 2. Walk search_root, build basename -> Vec. Lowercased so a // case-changed filesystem (e.g. moved between macOS/Linux) still // matches. Bounded by what the filesystem returns; large trees walk // once and stay in memory for the duration of this call. let mut candidates: std::collections::HashMap> = std::collections::HashMap::new(); let mut dirs = vec![search_root.to_path_buf()]; while let Some(d) = dirs.pop() { let Ok(entries) = std::fs::read_dir(&d) else { continue }; for entry in entries.flatten() { let path = entry.path(); if path.is_dir() { if !crate::util::is_macos_metadata_dir(&path) { dirs.push(path); } } else if let Some(name) = path.file_name().and_then(|n| n.to_str()) { candidates .entry(name.to_lowercase()) .or_default() .push(path); } } } // 3. For each missing sample, check candidates with matching basename. // Size check filters out same-name-different-file collisions before we // spend cycles hashing. Hash verify is the authoritative match. let mut relocated_pairs: Vec<(String, String)> = Vec::new(); for (hash, source_path, file_size) in &missing { let Some(basename) = Path::new(source_path) .file_name() .and_then(|n| n.to_str()) else { continue }; let key = basename.to_lowercase(); let Some(paths) = candidates.get(&key) else { continue }; for cand in paths { let Ok(md) = std::fs::metadata(cand) else { continue }; if md.len() as i64 != *file_size { continue; } // Hash verify. Bail on first match; sample hashes are unique so a // second match for the same hash would be redundant. let Ok(mut file) = fs::File::open(cand) else { continue }; let mut hasher = Sha256::new(); let mut buf = [0u8; 8192]; let ok = loop { let Ok(n) = file.read(&mut buf) else { break false }; if n == 0 { break true; } hasher.update(&buf[..n]); }; if !ok { continue; } let computed = format!("{:x}", hasher.finalize()); if computed == *hash { let abs = cand .canonicalize() .map(|p| p.to_string_lossy().to_string()) .unwrap_or_else(|_| cand.to_string_lossy().to_string()); relocated_pairs.push((hash.clone(), abs)); break; } } } // 4. Atomic update of all relocated source_paths. let relocated = relocated_pairs.len(); if relocated > 0 { db.transaction(|| { for (hash, new_path) in &relocated_pairs { db.conn().execute( "UPDATE samples SET source_path = ?1 WHERE hash = ?2", rusqlite::params![new_path, hash], )?; } Ok(()) })?; } let still_missing = missing.len() - relocated; Ok((relocated, still_missing)) } /// Delete all loose-files mode samples whose source files no longer exist on disk. /// /// Returns the number of samples purged. CASCADE handles VFS nodes, tags, etc. pub fn purge_missing_loose_files(db: &Database) -> Result { let mut stmt = db.conn().prepare( "SELECT hash, source_path FROM samples WHERE source_path IS NOT NULL AND deleted_at IS NULL", )?; let rows: Vec<(String, String)> = stmt .query_map([], |row| Ok((row.get(0)?, row.get(1)?)))? .collect::, _>>()?; // Collect hashes to purge, then delete atomically in one transaction. let to_purge: Vec<&str> = rows .iter() .filter(|(_, source_path)| !Path::new(source_path).exists()) .map(|(hash, _)| hash.as_str()) .collect(); let purged = to_purge.len(); if purged > 0 { db.transaction(|| { for hash in &to_purge { db.conn() .execute("DELETE FROM samples WHERE hash = ?1", [hash])?; } Ok(()) })?; } Ok(purged) } impl SampleStore { /// Import a file in loose-files mode: hash it but do NOT copy to the store. /// /// Records the original absolute path as `source_path` in the database. /// The file stays where it is on disk. #[instrument(skip_all)] pub fn import_loose_files(&self, path: &Path, db: &Database) -> Result { if !crate::util::is_audio_file(path) { return Err(CoreError::Internal(format!( "not a supported audio file: {}", path.display() ))); } let mut file = fs::File::open(path).map_err(|e| io_err(path, e))?; let metadata = file.metadata().map_err(|e| io_err(path, e))?; let file_size = metadata.len() as i64; if file_size == 0 { return Err(CoreError::Internal(format!( "cannot import zero-byte file: {}", path.display() ))); } // Stream through SHA-256 let mut hasher = Sha256::new(); let mut buf = [0u8; 8192]; loop { let n = file.read(&mut buf).map_err(|e| io_err(path, e))?; if n == 0 { break; } hasher.update(&buf[..n]); } let hash = format!("{:x}", hasher.finalize()); let ext = crate::util::get_extension(path); let original_name = crate::util::get_filename(path, "unknown"); // Probe duration from file headers (cheap, no full decode) let duration = probe_duration(path); // Resolve absolute path for storage let abs_path = path .canonicalize() .map_err(|e| io_err(path, e))? .to_string_lossy() .to_string(); // Insert into DB with source_path (ignore if hash already exists) let now = unix_now(); db.conn().execute( "INSERT OR IGNORE INTO samples (hash, original_name, file_extension, file_size, import_date, last_modified, duration, source_path) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)", rusqlite::params![hash, original_name, ext, file_size, now, now, duration, abs_path], )?; Ok(hash) } } #[cfg(test)] mod tests { use super::*; use std::io::Write; use tempfile::TempDir; fn setup() -> (TempDir, Database, SampleStore) { let dir = TempDir::new().unwrap(); let db = Database::open_in_memory().unwrap(); let store_dir = dir.path().join("store"); let store = SampleStore::new(&store_dir).unwrap(); (dir, db, store) } fn create_test_file(dir: &TempDir, name: &str, content: &[u8]) -> PathBuf { let path = dir.path().join(name); let mut f = fs::File::create(&path).unwrap(); f.write_all(content).unwrap(); path } #[test] fn import_creates_file_and_row() { let (dir, db, store) = setup(); let src = create_test_file(&dir, "kick.wav", b"fake audio data"); let hash = store.import(&src, &db).unwrap(); // File exists in store assert!(store.exists(&hash, "wav").unwrap()); // Row exists in DB let count: i64 = db .conn() .query_row( "SELECT COUNT(*) FROM samples WHERE hash = ?1", [&hash], |row| row.get(0), ) .unwrap(); assert_eq!(count, 1); } #[test] fn import_deduplicates() { let (dir, db, store) = setup(); let src = create_test_file(&dir, "kick.wav", b"same content"); let hash1 = store.import(&src, &db).unwrap(); let hash2 = store.import(&src, &db).unwrap(); assert_eq!(hash1, hash2); // Only one row let count: i64 = db .conn() .query_row("SELECT COUNT(*) FROM samples", [], |row| row.get(0)) .unwrap(); assert_eq!(count, 1); } #[test] fn import_same_bytes_different_extension_reuses_blob() { let (dir, db, store) = setup(); // Identical bytes, two different audio extensions (is_audio_file keys on // extension, so the content can be arbitrary). let wav = create_test_file(&dir, "loop.wav", b"identical bytes"); let aiff = create_test_file(&dir, "loop.aiff", b"identical bytes"); let h1 = store.import(&wav, &db).unwrap(); let h2 = store.import(&aiff, &db).unwrap(); assert_eq!(h1, h2, "identical bytes hash to the same sample"); // Exactly one blob on disk: the second import must reuse `{hash}.wav`, not // write an unreachable `{hash}.aiff` orphan. let blob_count = || { std::fs::read_dir(store.root()) .unwrap() .filter_map(|e| e.ok()) .filter(|e| e.path().is_file()) .count() }; assert_eq!(blob_count(), 1, "second extension must reuse the first blob"); // And remove() leaves nothing behind — the orphan would otherwise survive, // since remove() resolves the blob path from the DB row's extension. store.remove(&h1, &db).unwrap(); assert_eq!(blob_count(), 0, "no orphan blob remains after remove"); } #[test] fn remove_deletes_file_and_row() { let (dir, db, store) = setup(); let src = create_test_file(&dir, "snare.wav", b"snare data"); let hash = store.import(&src, &db).unwrap(); assert!(store.exists(&hash, "wav").unwrap()); store.remove(&hash, &db).unwrap(); assert!(!store.exists(&hash, "wav").unwrap()); let count: i64 = db .conn() .query_row("SELECT COUNT(*) FROM samples", [], |row| row.get(0)) .unwrap(); assert_eq!(count, 0); } #[test] fn remove_nonexistent_returns_error() { let (_dir, db, store) = setup(); // Use a valid 64-char hex hash that doesn't exist in the DB let fake_hash = "a".repeat(64); let result = store.remove(&fake_hash, &db); assert!(matches!(result, Err(CoreError::SampleNotFound(_)))); } #[test] fn sample_path_rejects_traversal() { let (_dir, _db, store) = setup(); let result = store.sample_path("../../../etc/passwd", "wav"); assert!(matches!(result, Err(CoreError::HashInvalid(_)))); } #[test] fn sample_path_rejects_short_hash() { let (_dir, _db, store) = setup(); let result = store.sample_path("abcdef", "wav"); assert!(matches!(result, Err(CoreError::HashInvalid(_)))); } #[test] fn sample_path_rejects_uppercase() { let (_dir, _db, store) = setup(); let hash = "A".repeat(64); let result = store.sample_path(&hash, "wav"); assert!(matches!(result, Err(CoreError::HashInvalid(_)))); } #[test] fn sample_path_accepts_valid_hash() { let (_dir, _db, store) = setup(); let hash = "a1b2c3d4e5f6".to_string() + &"0".repeat(52); let path = store.sample_path(&hash, "wav").unwrap(); assert!(path.to_string_lossy().ends_with(".wav")); } #[test] fn sample_path_rejects_traversal_in_extension() { let (_dir, _db, store) = setup(); let hash = "a".repeat(64); let result = store.sample_path(&hash, "../etc/passwd"); assert!(matches!(result, Err(CoreError::Internal(_)))); } #[test] fn sample_path_rejects_path_separator_in_extension() { let (_dir, _db, store) = setup(); let hash = "a".repeat(64); let result = store.sample_path(&hash, "wav/../../etc"); assert!(matches!(result, Err(CoreError::Internal(_)))); } #[test] fn sample_path_accepts_common_extensions() { let (_dir, _db, store) = setup(); let hash = "a".repeat(64); for ext in &["wav", "mp3", "flac", "aiff", "ogg", "tar.gz"] { assert!(store.sample_path(&hash, ext).is_ok(), "rejected valid ext: {ext}"); } } #[test] fn query_sample_field_rejects_disallowed_field() { let (_dir, db, _store) = setup(); let hash = "a".repeat(64); let result = query_sample_field(&db, &hash, "hash; DROP TABLE samples --"); assert!(matches!(result, Err(CoreError::Internal(_)))); } #[test] fn verify_sample_matches_after_import() { let (dir, db, store) = setup(); let src = create_test_file(&dir, "hihat.wav", b"hihat audio data"); let hash = store.import(&src, &db).unwrap(); assert!(store.verify_sample(&hash, "wav").unwrap()); } #[test] fn verify_sample_detects_corruption() { let (dir, db, store) = setup(); let src = create_test_file(&dir, "snare.wav", b"original data"); let hash = store.import(&src, &db).unwrap(); // Corrupt the stored file let stored_path = store.sample_path(&hash, "wav").unwrap(); fs::write(&stored_path, b"corrupted data").unwrap(); assert!(!store.verify_sample(&hash, "wav").unwrap()); } #[test] fn verify_sample_errors_on_missing_file() { let (_dir, _db, store) = setup(); let fake_hash = "b".repeat(64); let result = store.verify_sample(&fake_hash, "wav"); assert!(matches!(result, Err(CoreError::Io { .. }))); } #[test] fn import_rejects_zero_byte_file() { let (dir, db, store) = setup(); let src = create_test_file(&dir, "empty.wav", b""); let result = store.import(&src, &db); assert!(result.is_err()); let err_msg = format!("{}", result.unwrap_err()); assert!( err_msg.contains("zero-byte"), "expected zero-byte error, got: {err_msg}" ); // No row should have been inserted let count: i64 = db .conn() .query_row("SELECT COUNT(*) FROM samples", [], |row| row.get(0)) .unwrap(); assert_eq!(count, 0); } #[test] fn import_accepts_non_empty_file() { let (dir, db, store) = setup(); let src = create_test_file(&dir, "valid.wav", b"audio content"); let hash = store.import(&src, &db).unwrap(); assert!(!hash.is_empty()); assert!(store.exists(&hash, "wav").unwrap()); } #[test] fn remove_tolerates_missing_file() { // If the blob has already been deleted out from under us (manual rm, // crash mid-remove, etc.), the DB row should still be cleaned up. let (dir, db, store) = setup(); let src = create_test_file(&dir, "ghost.wav", b"ghost data"); let hash = store.import(&src, &db).unwrap(); let stored = store.sample_path(&hash, "wav").unwrap(); fs::remove_file(&stored).unwrap(); store.remove(&hash, &db).unwrap(); let count: i64 = db .conn() .query_row("SELECT COUNT(*) FROM samples", [], |row| row.get(0)) .unwrap(); assert_eq!(count, 0); } #[test] fn remove_deletes_file_before_db_row() { // Verify that after remove(), both the DB row and the file are gone. // The ordering guarantee (file first, then DB row) means a dangling DB // row is the only possible failure mode — never an orphaned blob. let (dir, db, store) = setup(); let src = create_test_file(&dir, "tom.wav", b"tom data"); let hash = store.import(&src, &db).unwrap(); let stored_path = store.sample_path(&hash, "wav").unwrap(); assert!(stored_path.exists()); store.remove(&hash, &db).unwrap(); // DB row gone let count: i64 = db .conn() .query_row("SELECT COUNT(*) FROM samples", [], |row| row.get(0)) .unwrap(); assert_eq!(count, 0); // File gone assert!(!stored_path.exists()); } #[test] fn remove_orphaned_samples_cleans_unreferenced() { let (dir, db, store) = setup(); let src1 = create_test_file(&dir, "kick.wav", b"kick data"); let src2 = create_test_file(&dir, "snare.wav", b"snare data"); let hash1 = store.import(&src1, &db).unwrap(); let hash2 = store.import(&src2, &db).unwrap(); // Create a VFS and link only hash1 let vfs_id = crate::vfs::create_vfs(&db, "Lib").unwrap(); crate::vfs::create_sample_link(&db, vfs_id, None, "kick.wav", &hash1).unwrap(); // hash2 is orphaned (no VFS node), hash1 is referenced let removed = store.remove_orphaned_samples(&db).unwrap(); assert_eq!(removed, 1); // hash1 still exists, hash2 is gone assert!(store.exists(&hash1, "wav").unwrap()); assert!(!store.exists(&hash2, "wav").unwrap()); let count: i64 = db .conn() .query_row("SELECT COUNT(*) FROM samples", [], |row| row.get(0)) .unwrap(); assert_eq!(count, 1); } #[test] fn remove_orphaned_samples_keeps_referenced() { let (dir, db, store) = setup(); let src = create_test_file(&dir, "hat.wav", b"hat data"); let hash = store.import(&src, &db).unwrap(); let vfs_id = crate::vfs::create_vfs(&db, "Lib").unwrap(); crate::vfs::create_sample_link(&db, vfs_id, None, "hat.wav", &hash).unwrap(); let removed = store.remove_orphaned_samples(&db).unwrap(); assert_eq!(removed, 0); assert!(store.exists(&hash, "wav").unwrap()); } #[test] fn remove_orphaned_after_vfs_delete() { let (dir, db, store) = setup(); let src = create_test_file(&dir, "clap.wav", b"clap data"); let hash = store.import(&src, &db).unwrap(); let vfs_id = crate::vfs::create_vfs(&db, "Lib").unwrap(); crate::vfs::create_sample_link(&db, vfs_id, None, "clap.wav", &hash).unwrap(); // Delete the VFS (cascades to vfs_nodes) crate::vfs::delete_vfs(&db, vfs_id).unwrap(); // Sample is now orphaned let removed = store.remove_orphaned_samples(&db).unwrap(); assert_eq!(removed, 1); assert!(!store.exists(&hash, "wav").unwrap()); } // --- Loose-files mode tests --- #[test] fn import_loose_files_does_not_copy_file() { let (dir, db, store) = setup(); let src = create_test_file(&dir, "kick.wav", b"unsafe kick data"); let hash = store.import_loose_files(&src, &db).unwrap(); // No file in the store assert!(!store.exists(&hash, "wav").unwrap()); // Row exists in DB with source_path set let sp: Option = db .conn() .query_row( "SELECT source_path FROM samples WHERE hash = ?1", [&hash], |row| row.get(0), ) .unwrap(); assert!(sp.is_some()); assert!(sp.unwrap().ends_with("kick.wav")); } #[test] fn import_loose_files_deduplicates() { let (dir, db, store) = setup(); let src = create_test_file(&dir, "kick.wav", b"same unsafe content"); let hash1 = store.import_loose_files(&src, &db).unwrap(); let hash2 = store.import_loose_files(&src, &db).unwrap(); assert_eq!(hash1, hash2); let count: i64 = db .conn() .query_row("SELECT COUNT(*) FROM samples", [], |row| row.get(0)) .unwrap(); assert_eq!(count, 1); } #[test] fn sample_source_path_returns_none_for_normal() { let (dir, db, store) = setup(); let src = create_test_file(&dir, "kick.wav", b"normal import"); let hash = store.import(&src, &db).unwrap(); assert!(sample_source_path(&db, &hash).unwrap().is_none()); } #[test] fn sample_source_path_returns_path_for_loose_files() { let (dir, db, store) = setup(); let src = create_test_file(&dir, "kick.wav", b"unsafe import"); let hash = store.import_loose_files(&src, &db).unwrap(); let sp = sample_source_path(&db, &hash).unwrap(); assert!(sp.is_some()); } #[test] fn resolve_file_path_prefers_source_path() { let (dir, db, store) = setup(); let src = create_test_file(&dir, "kick.wav", b"unsafe resolve test"); let hash = store.import_loose_files(&src, &db).unwrap(); let resolved = resolve_file_path(&store, &db, &hash, "wav").unwrap(); // Should resolve to the original file, not the store assert!(!resolved.starts_with(store.root())); } #[test] fn resolve_file_path_falls_back_to_store() { let (dir, db, store) = setup(); let src = create_test_file(&dir, "kick.wav", b"fallback test"); // Import normally (file exists in store) let hash = store.import(&src, &db).unwrap(); let resolved = resolve_file_path(&store, &db, &hash, "wav").unwrap(); assert!(resolved.starts_with(store.root())); } #[test] fn relocate_sample_rejects_hash_mismatch() { let (dir, db, store) = setup(); let src = create_test_file(&dir, "kick.wav", b"original content"); let hash = store.import_loose_files(&src, &db).unwrap(); let wrong_file = create_test_file(&dir, "snare.wav", b"different content"); let result = relocate_sample(&store, &db, &hash, &wrong_file); assert!(result.is_err()); assert!(result.unwrap_err().to_string().contains("hash mismatch")); } #[test] fn relocate_sample_updates_source_path() { let (dir, db, store) = setup(); let src = create_test_file(&dir, "kick.wav", b"relocate content"); let hash = store.import_loose_files(&src, &db).unwrap(); // Move the file let new_loc = dir.path().join("moved_kick.wav"); fs::copy(&src, &new_loc).unwrap(); relocate_sample(&store, &db, &hash, &new_loc).unwrap(); let sp = sample_source_path(&db, &hash).unwrap().unwrap(); assert!(sp.contains("moved_kick.wav")); } #[test] fn check_loose_files_integrity_counts_correctly() { let (dir, db, store) = setup(); let src1 = create_test_file(&dir, "kick.wav", b"integrity kick"); let src2 = create_test_file(&dir, "snare.wav", b"integrity snare"); store.import_loose_files(&src1, &db).unwrap(); let hash2 = store.import_loose_files(&src2, &db).unwrap(); // Delete snare from disk to simulate missing file let sp = sample_source_path(&db, &hash2).unwrap().unwrap(); fs::remove_file(&sp).unwrap(); let (valid, missing) = check_loose_files_integrity(&db).unwrap(); assert_eq!(valid, 1); assert_eq!(missing, 1); } #[test] fn purge_missing_loose_files_removes_only_missing() { let (dir, db, store) = setup(); let src1 = create_test_file(&dir, "kick.wav", b"purge kick"); let src2 = create_test_file(&dir, "snare.wav", b"purge snare"); let hash1 = store.import_loose_files(&src1, &db).unwrap(); let hash2 = store.import_loose_files(&src2, &db).unwrap(); // Delete snare from disk let sp = sample_source_path(&db, &hash2).unwrap().unwrap(); fs::remove_file(&sp).unwrap(); let purged = purge_missing_loose_files(&db).unwrap(); assert_eq!(purged, 1); // kick still exists, snare is gone assert!(sample_source_path(&db, &hash1).is_ok()); assert!(matches!( sample_source_path(&db, &hash2), Err(CoreError::SampleNotFound(_)) )); } #[test] fn purge_missing_loose_files_noop_when_all_valid() { let (dir, db, store) = setup(); let src = create_test_file(&dir, "kick.wav", b"all valid"); store.import_loose_files(&src, &db).unwrap(); let purged = purge_missing_loose_files(&db).unwrap(); assert_eq!(purged, 0); } }