Skip to main content

max / audiofiles

41.5 KB · 1165 lines History Blame Raw
1 //! Content-addressed sample storage: imports files by SHA-256 hash, deduplicates, and manages on-disk blobs.
2 //!
3 //! ## Why content-addressed storage
4 //!
5 //! - **Dedup by design:** Importing the same file twice is a no-op (same hash = same row).
6 //! Users often have the same sample in multiple folders.
7 //! - **Sync-friendly:** Hash is a stable, globally unique identifier across devices. No UUID
8 //! collisions, no server-assigned IDs, no coordination needed during offline edits.
9 //! - **Cloud eviction:** Setting `cloud_only=true` deletes the local blob while keeping the
10 //! metadata row. The hash lets SyncKit re-download the exact file from blob storage later.
11
12 use std::fs;
13 use std::io::Read;
14 use std::path::{Path, PathBuf};
15
16 use sha2::{Digest, Sha256};
17 use symphonia::core::formats::FormatOptions;
18 use symphonia::core::io::MediaSourceStream;
19 use symphonia::core::meta::MetadataOptions;
20 use symphonia::core::probe::Hint;
21
22 use crate::db::Database;
23 use crate::error::{io_err, unix_now, CoreError, Result};
24 use tracing::instrument;
25
26 /// Probe an audio file to extract its duration from metadata/headers.
27 /// Returns `None` if the duration cannot be determined without full decode.
28 fn probe_duration(path: &Path) -> Option<f64> {
29 let file = fs::File::open(path).ok()?;
30 let mss = MediaSourceStream::new(Box::new(file), Default::default());
31 let mut hint = Hint::new();
32 if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
33 hint.with_extension(ext);
34 }
35 if let Ok(probed) = symphonia::default::get_probe()
36 .format(&hint, mss, &FormatOptions::default(), &MetadataOptions::default())
37 {
38 let track = probed.format.default_track()?;
39 let time_base = track.codec_params.time_base?;
40 let n_frames = track.codec_params.n_frames?;
41 let duration = time_base.calc_time(n_frames);
42 return Some(duration.seconds as f64 + duration.frac);
43 }
44 // Fallback for WAV files Symphonia rejects (non-standard fmt chunk sizes)
45 let is_wav = path.extension().and_then(|e| e.to_str())
46 .is_some_and(|e| e.eq_ignore_ascii_case("wav"));
47 if is_wav {
48 let reader = hound::WavReader::open(path).ok()?;
49 let spec = reader.spec();
50 let n_samples = reader.len() as f64;
51 let frames = n_samples / spec.channels as f64;
52 return Some(frames / spec.sample_rate as f64);
53 }
54 None
55 }
56
57 /// Validate that a file extension contains only safe characters.
58 ///
59 /// Allows alphanumeric, dots, and hyphens (covers wav, mp3, flac, aiff, ogg,
60 /// tar.gz, etc.). Rejects path separators, null bytes, and anything else that
61 /// could be used for directory traversal.
62 pub fn validate_extension(ext: &str) -> Result<()> {
63 if !ext.is_empty()
64 && !ext
65 .bytes()
66 .all(|b| b.is_ascii_alphanumeric() || b == b'.' || b == b'-')
67 {
68 return Err(CoreError::Internal(format!(
69 "invalid file extension: {ext:?}"
70 )));
71 }
72 Ok(())
73 }
74
75 /// Validate that a hash string is exactly 64 lowercase hex characters (SHA-256).
76 pub fn validate_hash(hash: &str) -> Result<()> {
77 if hash.len() != 64 || !hash.bytes().all(|b| b.is_ascii_hexdigit() && !b.is_ascii_uppercase())
78 {
79 return Err(CoreError::HashInvalid(format!(
80 "expected 64 lowercase hex chars, got {:?} ({} chars)",
81 hash,
82 hash.len()
83 )));
84 }
85 Ok(())
86 }
87
88 /// Manages on-disk sample blobs in a flat directory structure, storing files as
89 /// `{sha256_hex}.{ext}` directly in the root directory.
90 ///
91 /// Deduplication via SHA-256: import streams the file through a hasher and skips
92 /// the copy if a blob with the same hash already exists.
93 ///
94 /// Thread-safe: all operations are stateless reads/writes against the filesystem
95 /// (no interior mutability or locks).
96 pub struct SampleStore {
97 root: PathBuf,
98 }
99
100 impl SampleStore {
101 /// Create a new sample store, ensuring the root directory exists.
102 #[instrument(skip_all)]
103 pub fn new(root: impl Into<PathBuf>) -> Result<Self> {
104 let root = root.into();
105 fs::create_dir_all(&root).map_err(|e| io_err(&root, e))?;
106 Ok(Self { root })
107 }
108
109 /// Import a file into the store: hash it, copy to content-addressed path,
110 /// insert into DB. Returns the hex SHA-256 hash.
111 #[instrument(skip_all)]
112 pub fn import(&self, path: &Path, db: &Database) -> Result<String> {
113 if !crate::util::is_audio_file(path) {
114 return Err(CoreError::Internal(format!(
115 "not a supported audio file: {}",
116 path.display()
117 )));
118 }
119
120 let mut file = fs::File::open(path).map_err(|e| io_err(path, e))?;
121 let metadata = file.metadata().map_err(|e| io_err(path, e))?;
122 let file_size = metadata.len() as i64;
123
124 if file_size == 0 {
125 return Err(CoreError::Internal(format!(
126 "cannot import zero-byte file: {}",
127 path.display()
128 )));
129 }
130
131 // Stream through SHA-256
132 let mut hasher = Sha256::new();
133 let mut buf = [0u8; 8192];
134 loop {
135 let n = file.read(&mut buf).map_err(|e| io_err(path, e))?;
136 if n == 0 {
137 break;
138 }
139 hasher.update(&buf[..n]);
140 }
141 let hash = format!("{:x}", hasher.finalize());
142
143 // Resolve the blob extension from an existing row if this hash is already
144 // known. The store is content-addressed (one blob per hash), so identical
145 // bytes imported under a different extension must reuse the existing blob
146 // rather than write a second `{hash}.{newext}` file: remove() resolves the
147 // blob path from the DB row, so any divergent-extension copy would be
148 // unreachable and leak disk forever. Query without the deleted_at filter so
149 // the blob naming stays consistent even for soft-deleted rows. Fresh import
150 // falls back to the incoming file's extension.
151 let ext = match db.conn().query_row(
152 "SELECT file_extension FROM samples WHERE hash = ?1",
153 [&hash],
154 |row| row.get::<_, String>(0),
155 ) {
156 Ok(existing) => existing,
157 Err(rusqlite::Error::QueryReturnedNoRows) => crate::util::get_extension(path),
158 Err(e) => return Err(CoreError::Db(e)),
159 };
160 let original_name = crate::util::get_filename(path, "unknown");
161
162 // Probe duration from file headers (cheap, no full decode)
163 let duration = probe_duration(path);
164
165 // Copy file to store if not already present
166 let dest = self.sample_path(&hash, &ext)?;
167 if !dest.exists() {
168 fs::copy(path, &dest).map_err(|e| io_err(&dest, e))?;
169 }
170
171 // Insert into DB (ignore if hash already exists)
172 let now = unix_now();
173 db.conn().execute(
174 "INSERT OR IGNORE INTO samples (hash, original_name, file_extension, file_size, import_date, last_modified, duration)
175 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)",
176 rusqlite::params![hash, original_name, ext, file_size, now, now, duration],
177 )?;
178
179 Ok(hash)
180 }
181
182 /// Check if a sample file exists in the store.
183 pub fn exists(&self, hash: &str, ext: &str) -> Result<bool> {
184 Ok(self.sample_path(hash, ext)?.exists())
185 }
186
187 /// Get the filesystem path for a sample.
188 ///
189 /// Validates that `hash` is exactly 64 lowercase hex characters (SHA-256)
190 /// to prevent directory traversal or malformed paths.
191 pub fn sample_path(&self, hash: &str, ext: &str) -> Result<PathBuf> {
192 validate_hash(hash)?;
193 validate_extension(ext)?;
194 if ext.is_empty() {
195 Ok(self.root.join(hash))
196 } else {
197 Ok(self.root.join(format!("{hash}.{ext}")))
198 }
199 }
200
201 /// Remove a sample from store and database. CASCADE handles VFS/tag refs.
202 ///
203 /// Deletes the file from disk first, then the DB row. If the file delete
204 /// fails (in-use on Windows, permission denied, etc.), the DB row is left
205 /// intact so the user can retry; nothing leaks. The reverse order would
206 /// silently leak orphan blobs on every file-delete failure, accumulating
207 /// disk waste invisible to the user.
208 #[instrument(skip_all)]
209 pub fn remove(&self, hash: &str, db: &Database) -> Result<()> {
210 let ext = sample_extension(db, hash)?;
211 let path = self.sample_path(hash, &ext)?;
212
213 // File first. ENOENT is fine — the row was already pointing at nothing.
214 match fs::remove_file(&path) {
215 Ok(()) => {}
216 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {}
217 Err(e) => return Err(io_err(&path, e)),
218 }
219
220 // Then the DB row (CASCADE handles tags, vfs_nodes, etc.).
221 db.conn()
222 .execute("DELETE FROM samples WHERE hash = ?1", [hash])?;
223
224 Ok(())
225 }
226
227 /// Remove samples that are no longer referenced by any VFS node.
228 ///
229 /// Returns the number of orphaned samples removed. Each orphan is deleted
230 /// from the database first (CASCADE handles tags, analysis, etc.), then the
231 /// file blob is removed from disk.
232 #[instrument(skip_all)]
233 pub fn remove_orphaned_samples(&self, db: &Database) -> Result<usize> {
234 // Skip tombstoned rows — the eventual hard-delete sweep
235 // (docs/design-sample-deletion.md Phase 4) will get them when the
236 // retention window expires. Double-processing here would race the
237 // sweep and pre-emptively destroy still-recoverable data.
238 let mut stmt = db.conn().prepare(
239 "SELECT s.hash, s.file_extension
240 FROM samples s
241 LEFT JOIN vfs_nodes vn ON s.hash = vn.sample_hash
242 WHERE vn.id IS NULL AND s.deleted_at IS NULL",
243 )?;
244 let orphans: Vec<(String, String)> = stmt
245 .query_map([], |row| Ok((row.get(0)?, row.get(1)?)))?
246 .collect::<std::result::Result<Vec<_>, _>>()?;
247
248 let count = orphans.len();
249
250 // Delete all orphan DB rows in a single transaction so a concurrent
251 // VFS link can't reference a sample between query and delete.
252 db.transaction(|| {
253 for (hash, _) in &orphans {
254 db.conn()
255 .execute("DELETE FROM samples WHERE hash = ?1", [hash])?;
256 }
257 Ok(())
258 })?;
259
260 // Remove files after the transaction (orphaned blobs are harmless if this fails)
261 for (hash, ext) in &orphans {
262 if let Ok(path) = self.sample_path(hash, ext)
263 && path.exists() {
264 let _ = fs::remove_file(&path);
265 }
266 }
267 Ok(count)
268 }
269
270 /// Get the store root directory.
271 pub fn root(&self) -> &Path {
272 &self.root
273 }
274
275 /// Re-hash a stored sample and compare against the expected hash.
276 ///
277 /// Returns `Ok(true)` if the file's SHA-256 matches `hash`, `Ok(false)` if
278 /// it differs. Returns an error if the file cannot be read.
279 #[instrument(skip_all)]
280 pub fn verify_sample(&self, hash: &str, ext: &str) -> Result<bool> {
281 let path = self.sample_path(hash, ext)?;
282 let mut file = fs::File::open(&path).map_err(|e| io_err(&path, e))?;
283
284 let mut hasher = Sha256::new();
285 let mut buf = [0u8; 8192];
286 loop {
287 let n = file.read(&mut buf).map_err(|e| io_err(&path, e))?;
288 if n == 0 {
289 break;
290 }
291 hasher.update(&buf[..n]);
292 }
293 let computed = format!("{:x}", hasher.finalize());
294
295 Ok(computed == hash)
296 }
297 }
298
299 // --- Sample metadata queries ---
300
301 /// Helper to query a single text column from the samples table by hash.
302 ///
303 /// Only fields in the allowlist may be queried; any other value returns an error
304 /// to prevent SQL injection through the interpolated column name.
305 fn query_sample_field(db: &Database, hash: &str, field: &str) -> Result<String> {
306 const ALLOWED_FIELDS: &[&str] = &["file_extension", "original_name"];
307
308 if !ALLOWED_FIELDS.contains(&field) {
309 return Err(CoreError::Internal(format!(
310 "query_sample_field: disallowed field {field:?}"
311 )));
312 }
313
314 let sql = format!("SELECT {field} FROM samples WHERE hash = ?1 AND deleted_at IS NULL");
315 db.conn()
316 .query_row(&sql, [hash], |row| row.get(0))
317 .map_err(|e| match e {
318 rusqlite::Error::QueryReturnedNoRows => {
319 CoreError::SampleNotFound(hash.to_string())
320 }
321 other => CoreError::Db(other),
322 })
323 }
324
325 /// Look up the file extension for a sample by its hash.
326 pub fn sample_extension(db: &Database, hash: &str) -> Result<String> {
327 query_sample_field(db, hash, "file_extension")
328 }
329
330 /// Look up the original filename for a sample by its hash.
331 pub fn sample_original_name(db: &Database, hash: &str) -> Result<String> {
332 query_sample_field(db, hash, "original_name")
333 }
334
335 // --- Loose-files mode ---
336
337 /// Look up the source_path for a sample (loose-files mode imports only).
338 ///
339 /// Returns `Ok(None)` for normal-mode samples (source_path is NULL).
340 pub fn sample_source_path(db: &Database, hash: &str) -> Result<Option<String>> {
341 db.conn()
342 .query_row(
343 "SELECT source_path FROM samples WHERE hash = ?1 AND deleted_at IS NULL",
344 [hash],
345 |row| row.get(0),
346 )
347 .map_err(|e| match e {
348 rusqlite::Error::QueryReturnedNoRows => CoreError::SampleNotFound(hash.to_string()),
349 other => CoreError::Db(other),
350 })
351 }
352
353 /// Resolve the actual file path for a sample, checking source_path first.
354 ///
355 /// For loose-files mode samples (source_path is set), returns the source path if
356 /// the file exists, otherwise falls back to the store path. For normal samples,
357 /// returns the store path directly.
358 pub fn resolve_file_path(store: &SampleStore, db: &Database, hash: &str, ext: &str) -> Result<PathBuf> {
359 if let Some(sp) = sample_source_path(db, hash)? {
360 let source = PathBuf::from(&sp);
361 if source.exists() {
362 return Ok(source);
363 }
364 // Fallback: maybe user re-imported in normal mode or placed file manually
365 let store_path = store.sample_path(hash, ext)?;
366 if store_path.exists() {
367 return Ok(store_path);
368 }
369 // Return the source path anyway — caller will handle the "not found"
370 return Ok(source);
371 }
372 store.sample_path(hash, ext)
373 }
374
375 /// Update the source_path for a sample after verifying the new file's hash matches.
376 ///
377 /// Used to relocate an loose-files mode sample whose original file has moved.
378 pub fn relocate_sample(
379 store: &SampleStore,
380 db: &Database,
381 hash: &str,
382 new_path: &Path,
383 ) -> Result<()> {
384 // Verify hash matches
385 let mut file = fs::File::open(new_path).map_err(|e| io_err(new_path, e))?;
386 let mut hasher = Sha256::new();
387 let mut buf = [0u8; 8192];
388 loop {
389 let n = file.read(&mut buf).map_err(|e| io_err(new_path, e))?;
390 if n == 0 {
391 break;
392 }
393 hasher.update(&buf[..n]);
394 }
395 let computed = format!("{:x}", hasher.finalize());
396
397 if computed != hash {
398 return Err(CoreError::Internal(format!(
399 "hash mismatch: expected {hash}, got {computed} — this is a different file"
400 )));
401 }
402
403 let abs_path = new_path
404 .canonicalize()
405 .map_err(|e| io_err(new_path, e))?
406 .to_string_lossy()
407 .to_string();
408
409 let changed = db.conn().execute(
410 "UPDATE samples SET source_path = ?1 WHERE hash = ?2",
411 rusqlite::params![abs_path, hash],
412 )?;
413 if changed == 0 {
414 return Err(CoreError::SampleNotFound(hash.to_string()));
415 }
416 let _ = store; // unused but passed for API consistency
417 Ok(())
418 }
419
420 /// Check integrity of loose-files mode samples.
421 ///
422 /// Returns `(valid, missing)` — counts of source_path entries where the file
423 /// exists vs. does not exist on disk.
424 pub fn check_loose_files_integrity(db: &Database) -> Result<(usize, usize)> {
425 let mut stmt = db.conn().prepare(
426 "SELECT source_path FROM samples WHERE source_path IS NOT NULL AND deleted_at IS NULL",
427 )?;
428 let paths: Vec<String> = stmt
429 .query_map([], |row| row.get(0))?
430 .collect::<std::result::Result<Vec<_>, _>>()?;
431
432 let mut valid = 0;
433 let mut missing = 0;
434 for p in &paths {
435 if Path::new(p).exists() {
436 valid += 1;
437 } else {
438 missing += 1;
439 }
440 }
441 Ok((valid, missing))
442 }
443
444 /// Try to re-locate loose-files mode samples whose source files have moved.
445 ///
446 /// Walks `search_root` recursively, building a basename map of candidate
447 /// files. For each missing sample, looks up its basename, then hash-verifies
448 /// candidates (cheapest: size-check before re-hashing the full file). On hash
449 /// match, the sample's `source_path` is updated to the new location.
450 ///
451 /// Returns `(relocated, still_missing)`. `relocated` counts samples whose
452 /// `source_path` was successfully repointed; `still_missing` is the residual
453 /// count for the dialog to surface so the user can run Locate again against a
454 /// different directory.
455 pub fn relocate_missing_loose_files(
456 db: &Database,
457 search_root: &Path,
458 ) -> Result<(usize, usize)> {
459 // 1. Gather missing samples — hash, basename of stored source_path, and
460 // the recorded file_size (used for cheap pre-filter before re-hashing).
461 let mut stmt = db.conn().prepare(
462 "SELECT hash, source_path, file_size FROM samples \
463 WHERE source_path IS NOT NULL AND deleted_at IS NULL",
464 )?;
465 let rows: Vec<(String, String, i64)> = stmt
466 .query_map([], |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)))?
467 .collect::<std::result::Result<Vec<_>, _>>()?;
468 let missing: Vec<(String, String, i64)> = rows
469 .into_iter()
470 .filter(|(_, source_path, _)| !Path::new(source_path).exists())
471 .collect();
472
473 if missing.is_empty() {
474 return Ok((0, 0));
475 }
476
477 // 2. Walk search_root, build basename -> Vec<PathBuf>. Lowercased so a
478 // case-changed filesystem (e.g. moved between macOS/Linux) still
479 // matches. Bounded by what the filesystem returns; large trees walk
480 // once and stay in memory for the duration of this call.
481 let mut candidates: std::collections::HashMap<String, Vec<PathBuf>> =
482 std::collections::HashMap::new();
483 let mut dirs = vec![search_root.to_path_buf()];
484 while let Some(d) = dirs.pop() {
485 let Ok(entries) = std::fs::read_dir(&d) else { continue };
486 for entry in entries.flatten() {
487 let path = entry.path();
488 if path.is_dir() {
489 if !crate::util::is_macos_metadata_dir(&path) {
490 dirs.push(path);
491 }
492 } else if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
493 candidates
494 .entry(name.to_lowercase())
495 .or_default()
496 .push(path);
497 }
498 }
499 }
500
501 // 3. For each missing sample, check candidates with matching basename.
502 // Size check filters out same-name-different-file collisions before we
503 // spend cycles hashing. Hash verify is the authoritative match.
504 let mut relocated_pairs: Vec<(String, String)> = Vec::new();
505 for (hash, source_path, file_size) in &missing {
506 let Some(basename) = Path::new(source_path)
507 .file_name()
508 .and_then(|n| n.to_str())
509 else { continue };
510 let key = basename.to_lowercase();
511 let Some(paths) = candidates.get(&key) else { continue };
512 for cand in paths {
513 let Ok(md) = std::fs::metadata(cand) else { continue };
514 if md.len() as i64 != *file_size {
515 continue;
516 }
517 // Hash verify. Bail on first match; sample hashes are unique so a
518 // second match for the same hash would be redundant.
519 let Ok(mut file) = fs::File::open(cand) else { continue };
520 let mut hasher = Sha256::new();
521 let mut buf = [0u8; 8192];
522 let ok = loop {
523 let Ok(n) = file.read(&mut buf) else { break false };
524 if n == 0 {
525 break true;
526 }
527 hasher.update(&buf[..n]);
528 };
529 if !ok {
530 continue;
531 }
532 let computed = format!("{:x}", hasher.finalize());
533 if computed == *hash {
534 let abs = cand
535 .canonicalize()
536 .map(|p| p.to_string_lossy().to_string())
537 .unwrap_or_else(|_| cand.to_string_lossy().to_string());
538 relocated_pairs.push((hash.clone(), abs));
539 break;
540 }
541 }
542 }
543
544 // 4. Atomic update of all relocated source_paths.
545 let relocated = relocated_pairs.len();
546 if relocated > 0 {
547 db.transaction(|| {
548 for (hash, new_path) in &relocated_pairs {
549 db.conn().execute(
550 "UPDATE samples SET source_path = ?1 WHERE hash = ?2",
551 rusqlite::params![new_path, hash],
552 )?;
553 }
554 Ok(())
555 })?;
556 }
557
558 let still_missing = missing.len() - relocated;
559 Ok((relocated, still_missing))
560 }
561
562 /// Delete all loose-files mode samples whose source files no longer exist on disk.
563 ///
564 /// Returns the number of samples purged. CASCADE handles VFS nodes, tags, etc.
565 pub fn purge_missing_loose_files(db: &Database) -> Result<usize> {
566 let mut stmt = db.conn().prepare(
567 "SELECT hash, source_path FROM samples WHERE source_path IS NOT NULL AND deleted_at IS NULL",
568 )?;
569 let rows: Vec<(String, String)> = stmt
570 .query_map([], |row| Ok((row.get(0)?, row.get(1)?)))?
571 .collect::<std::result::Result<Vec<_>, _>>()?;
572
573 // Collect hashes to purge, then delete atomically in one transaction.
574 let to_purge: Vec<&str> = rows
575 .iter()
576 .filter(|(_, source_path)| !Path::new(source_path).exists())
577 .map(|(hash, _)| hash.as_str())
578 .collect();
579 let purged = to_purge.len();
580
581 if purged > 0 {
582 db.transaction(|| {
583 for hash in &to_purge {
584 db.conn()
585 .execute("DELETE FROM samples WHERE hash = ?1", [hash])?;
586 }
587 Ok(())
588 })?;
589 }
590
591 Ok(purged)
592 }
593
594 impl SampleStore {
595 /// Import a file in loose-files mode: hash it but do NOT copy to the store.
596 ///
597 /// Records the original absolute path as `source_path` in the database.
598 /// The file stays where it is on disk.
599 #[instrument(skip_all)]
600 pub fn import_loose_files(&self, path: &Path, db: &Database) -> Result<String> {
601 if !crate::util::is_audio_file(path) {
602 return Err(CoreError::Internal(format!(
603 "not a supported audio file: {}",
604 path.display()
605 )));
606 }
607
608 let mut file = fs::File::open(path).map_err(|e| io_err(path, e))?;
609 let metadata = file.metadata().map_err(|e| io_err(path, e))?;
610 let file_size = metadata.len() as i64;
611
612 if file_size == 0 {
613 return Err(CoreError::Internal(format!(
614 "cannot import zero-byte file: {}",
615 path.display()
616 )));
617 }
618
619 // Stream through SHA-256
620 let mut hasher = Sha256::new();
621 let mut buf = [0u8; 8192];
622 loop {
623 let n = file.read(&mut buf).map_err(|e| io_err(path, e))?;
624 if n == 0 {
625 break;
626 }
627 hasher.update(&buf[..n]);
628 }
629 let hash = format!("{:x}", hasher.finalize());
630
631 let ext = crate::util::get_extension(path);
632 let original_name = crate::util::get_filename(path, "unknown");
633
634 // Probe duration from file headers (cheap, no full decode)
635 let duration = probe_duration(path);
636
637 // Resolve absolute path for storage
638 let abs_path = path
639 .canonicalize()
640 .map_err(|e| io_err(path, e))?
641 .to_string_lossy()
642 .to_string();
643
644 // Insert into DB with source_path (ignore if hash already exists)
645 let now = unix_now();
646 db.conn().execute(
647 "INSERT OR IGNORE INTO samples (hash, original_name, file_extension, file_size, import_date, last_modified, duration, source_path)
648 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)",
649 rusqlite::params![hash, original_name, ext, file_size, now, now, duration, abs_path],
650 )?;
651
652 Ok(hash)
653 }
654 }
655
656 #[cfg(test)]
657 mod tests {
658 use super::*;
659 use std::io::Write;
660 use tempfile::TempDir;
661
662 fn setup() -> (TempDir, Database, SampleStore) {
663 let dir = TempDir::new().unwrap();
664 let db = Database::open_in_memory().unwrap();
665 let store_dir = dir.path().join("store");
666 let store = SampleStore::new(&store_dir).unwrap();
667 (dir, db, store)
668 }
669
670 fn create_test_file(dir: &TempDir, name: &str, content: &[u8]) -> PathBuf {
671 let path = dir.path().join(name);
672 let mut f = fs::File::create(&path).unwrap();
673 f.write_all(content).unwrap();
674 path
675 }
676
677 #[test]
678 fn import_creates_file_and_row() {
679 let (dir, db, store) = setup();
680 let src = create_test_file(&dir, "kick.wav", b"fake audio data");
681
682 let hash = store.import(&src, &db).unwrap();
683
684 // File exists in store
685 assert!(store.exists(&hash, "wav").unwrap());
686
687 // Row exists in DB
688 let count: i64 = db
689 .conn()
690 .query_row(
691 "SELECT COUNT(*) FROM samples WHERE hash = ?1",
692 [&hash],
693 |row| row.get(0),
694 )
695 .unwrap();
696 assert_eq!(count, 1);
697 }
698
699 #[test]
700 fn import_deduplicates() {
701 let (dir, db, store) = setup();
702 let src = create_test_file(&dir, "kick.wav", b"same content");
703
704 let hash1 = store.import(&src, &db).unwrap();
705 let hash2 = store.import(&src, &db).unwrap();
706
707 assert_eq!(hash1, hash2);
708
709 // Only one row
710 let count: i64 = db
711 .conn()
712 .query_row("SELECT COUNT(*) FROM samples", [], |row| row.get(0))
713 .unwrap();
714 assert_eq!(count, 1);
715 }
716
717 #[test]
718 fn import_same_bytes_different_extension_reuses_blob() {
719 let (dir, db, store) = setup();
720 // Identical bytes, two different audio extensions (is_audio_file keys on
721 // extension, so the content can be arbitrary).
722 let wav = create_test_file(&dir, "loop.wav", b"identical bytes");
723 let aiff = create_test_file(&dir, "loop.aiff", b"identical bytes");
724
725 let h1 = store.import(&wav, &db).unwrap();
726 let h2 = store.import(&aiff, &db).unwrap();
727 assert_eq!(h1, h2, "identical bytes hash to the same sample");
728
729 // Exactly one blob on disk: the second import must reuse `{hash}.wav`, not
730 // write an unreachable `{hash}.aiff` orphan.
731 let blob_count = || {
732 std::fs::read_dir(store.root())
733 .unwrap()
734 .filter_map(|e| e.ok())
735 .filter(|e| e.path().is_file())
736 .count()
737 };
738 assert_eq!(blob_count(), 1, "second extension must reuse the first blob");
739
740 // And remove() leaves nothing behind — the orphan would otherwise survive,
741 // since remove() resolves the blob path from the DB row's extension.
742 store.remove(&h1, &db).unwrap();
743 assert_eq!(blob_count(), 0, "no orphan blob remains after remove");
744 }
745
746 #[test]
747 fn remove_deletes_file_and_row() {
748 let (dir, db, store) = setup();
749 let src = create_test_file(&dir, "snare.wav", b"snare data");
750
751 let hash = store.import(&src, &db).unwrap();
752 assert!(store.exists(&hash, "wav").unwrap());
753
754 store.remove(&hash, &db).unwrap();
755
756 assert!(!store.exists(&hash, "wav").unwrap());
757 let count: i64 = db
758 .conn()
759 .query_row("SELECT COUNT(*) FROM samples", [], |row| row.get(0))
760 .unwrap();
761 assert_eq!(count, 0);
762 }
763
764 #[test]
765 fn remove_nonexistent_returns_error() {
766 let (_dir, db, store) = setup();
767 // Use a valid 64-char hex hash that doesn't exist in the DB
768 let fake_hash = "a".repeat(64);
769 let result = store.remove(&fake_hash, &db);
770 assert!(matches!(result, Err(CoreError::SampleNotFound(_))));
771 }
772
773 #[test]
774 fn sample_path_rejects_traversal() {
775 let (_dir, _db, store) = setup();
776 let result = store.sample_path("../../../etc/passwd", "wav");
777 assert!(matches!(result, Err(CoreError::HashInvalid(_))));
778 }
779
780 #[test]
781 fn sample_path_rejects_short_hash() {
782 let (_dir, _db, store) = setup();
783 let result = store.sample_path("abcdef", "wav");
784 assert!(matches!(result, Err(CoreError::HashInvalid(_))));
785 }
786
787 #[test]
788 fn sample_path_rejects_uppercase() {
789 let (_dir, _db, store) = setup();
790 let hash = "A".repeat(64);
791 let result = store.sample_path(&hash, "wav");
792 assert!(matches!(result, Err(CoreError::HashInvalid(_))));
793 }
794
795 #[test]
796 fn sample_path_accepts_valid_hash() {
797 let (_dir, _db, store) = setup();
798 let hash = "a1b2c3d4e5f6".to_string() + &"0".repeat(52);
799 let path = store.sample_path(&hash, "wav").unwrap();
800 assert!(path.to_string_lossy().ends_with(".wav"));
801 }
802
803 #[test]
804 fn sample_path_rejects_traversal_in_extension() {
805 let (_dir, _db, store) = setup();
806 let hash = "a".repeat(64);
807 let result = store.sample_path(&hash, "../etc/passwd");
808 assert!(matches!(result, Err(CoreError::Internal(_))));
809 }
810
811 #[test]
812 fn sample_path_rejects_path_separator_in_extension() {
813 let (_dir, _db, store) = setup();
814 let hash = "a".repeat(64);
815 let result = store.sample_path(&hash, "wav/../../etc");
816 assert!(matches!(result, Err(CoreError::Internal(_))));
817 }
818
819 #[test]
820 fn sample_path_accepts_common_extensions() {
821 let (_dir, _db, store) = setup();
822 let hash = "a".repeat(64);
823 for ext in &["wav", "mp3", "flac", "aiff", "ogg", "tar.gz"] {
824 assert!(store.sample_path(&hash, ext).is_ok(), "rejected valid ext: {ext}");
825 }
826 }
827
828 #[test]
829 fn query_sample_field_rejects_disallowed_field() {
830 let (_dir, db, _store) = setup();
831 let hash = "a".repeat(64);
832 let result = query_sample_field(&db, &hash, "hash; DROP TABLE samples --");
833 assert!(matches!(result, Err(CoreError::Internal(_))));
834 }
835
836 #[test]
837 fn verify_sample_matches_after_import() {
838 let (dir, db, store) = setup();
839 let src = create_test_file(&dir, "hihat.wav", b"hihat audio data");
840
841 let hash = store.import(&src, &db).unwrap();
842 assert!(store.verify_sample(&hash, "wav").unwrap());
843 }
844
845 #[test]
846 fn verify_sample_detects_corruption() {
847 let (dir, db, store) = setup();
848 let src = create_test_file(&dir, "snare.wav", b"original data");
849
850 let hash = store.import(&src, &db).unwrap();
851
852 // Corrupt the stored file
853 let stored_path = store.sample_path(&hash, "wav").unwrap();
854 fs::write(&stored_path, b"corrupted data").unwrap();
855
856 assert!(!store.verify_sample(&hash, "wav").unwrap());
857 }
858
859 #[test]
860 fn verify_sample_errors_on_missing_file() {
861 let (_dir, _db, store) = setup();
862 let fake_hash = "b".repeat(64);
863 let result = store.verify_sample(&fake_hash, "wav");
864 assert!(matches!(result, Err(CoreError::Io { .. })));
865 }
866
867 #[test]
868 fn import_rejects_zero_byte_file() {
869 let (dir, db, store) = setup();
870 let src = create_test_file(&dir, "empty.wav", b"");
871
872 let result = store.import(&src, &db);
873 assert!(result.is_err());
874 let err_msg = format!("{}", result.unwrap_err());
875 assert!(
876 err_msg.contains("zero-byte"),
877 "expected zero-byte error, got: {err_msg}"
878 );
879
880 // No row should have been inserted
881 let count: i64 = db
882 .conn()
883 .query_row("SELECT COUNT(*) FROM samples", [], |row| row.get(0))
884 .unwrap();
885 assert_eq!(count, 0);
886 }
887
888 #[test]
889 fn import_accepts_non_empty_file() {
890 let (dir, db, store) = setup();
891 let src = create_test_file(&dir, "valid.wav", b"audio content");
892
893 let hash = store.import(&src, &db).unwrap();
894 assert!(!hash.is_empty());
895 assert!(store.exists(&hash, "wav").unwrap());
896 }
897
898 #[test]
899 fn remove_tolerates_missing_file() {
900 // If the blob has already been deleted out from under us (manual rm,
901 // crash mid-remove, etc.), the DB row should still be cleaned up.
902 let (dir, db, store) = setup();
903 let src = create_test_file(&dir, "ghost.wav", b"ghost data");
904 let hash = store.import(&src, &db).unwrap();
905 let stored = store.sample_path(&hash, "wav").unwrap();
906 fs::remove_file(&stored).unwrap();
907
908 store.remove(&hash, &db).unwrap();
909
910 let count: i64 = db
911 .conn()
912 .query_row("SELECT COUNT(*) FROM samples", [], |row| row.get(0))
913 .unwrap();
914 assert_eq!(count, 0);
915 }
916
917 #[test]
918 fn remove_deletes_file_before_db_row() {
919 // Verify that after remove(), both the DB row and the file are gone.
920 // The ordering guarantee (file first, then DB row) means a dangling DB
921 // row is the only possible failure mode — never an orphaned blob.
922 let (dir, db, store) = setup();
923 let src = create_test_file(&dir, "tom.wav", b"tom data");
924
925 let hash = store.import(&src, &db).unwrap();
926 let stored_path = store.sample_path(&hash, "wav").unwrap();
927 assert!(stored_path.exists());
928
929 store.remove(&hash, &db).unwrap();
930
931 // DB row gone
932 let count: i64 = db
933 .conn()
934 .query_row("SELECT COUNT(*) FROM samples", [], |row| row.get(0))
935 .unwrap();
936 assert_eq!(count, 0);
937
938 // File gone
939 assert!(!stored_path.exists());
940 }
941
942 #[test]
943 fn remove_orphaned_samples_cleans_unreferenced() {
944 let (dir, db, store) = setup();
945 let src1 = create_test_file(&dir, "kick.wav", b"kick data");
946 let src2 = create_test_file(&dir, "snare.wav", b"snare data");
947
948 let hash1 = store.import(&src1, &db).unwrap();
949 let hash2 = store.import(&src2, &db).unwrap();
950
951 // Create a VFS and link only hash1
952 let vfs_id = crate::vfs::create_vfs(&db, "Lib").unwrap();
953 crate::vfs::create_sample_link(&db, vfs_id, None, "kick.wav", &hash1).unwrap();
954
955 // hash2 is orphaned (no VFS node), hash1 is referenced
956 let removed = store.remove_orphaned_samples(&db).unwrap();
957 assert_eq!(removed, 1);
958
959 // hash1 still exists, hash2 is gone
960 assert!(store.exists(&hash1, "wav").unwrap());
961 assert!(!store.exists(&hash2, "wav").unwrap());
962
963 let count: i64 = db
964 .conn()
965 .query_row("SELECT COUNT(*) FROM samples", [], |row| row.get(0))
966 .unwrap();
967 assert_eq!(count, 1);
968 }
969
970 #[test]
971 fn remove_orphaned_samples_keeps_referenced() {
972 let (dir, db, store) = setup();
973 let src = create_test_file(&dir, "hat.wav", b"hat data");
974 let hash = store.import(&src, &db).unwrap();
975
976 let vfs_id = crate::vfs::create_vfs(&db, "Lib").unwrap();
977 crate::vfs::create_sample_link(&db, vfs_id, None, "hat.wav", &hash).unwrap();
978
979 let removed = store.remove_orphaned_samples(&db).unwrap();
980 assert_eq!(removed, 0);
981 assert!(store.exists(&hash, "wav").unwrap());
982 }
983
984 #[test]
985 fn remove_orphaned_after_vfs_delete() {
986 let (dir, db, store) = setup();
987 let src = create_test_file(&dir, "clap.wav", b"clap data");
988 let hash = store.import(&src, &db).unwrap();
989
990 let vfs_id = crate::vfs::create_vfs(&db, "Lib").unwrap();
991 crate::vfs::create_sample_link(&db, vfs_id, None, "clap.wav", &hash).unwrap();
992
993 // Delete the VFS (cascades to vfs_nodes)
994 crate::vfs::delete_vfs(&db, vfs_id).unwrap();
995
996 // Sample is now orphaned
997 let removed = store.remove_orphaned_samples(&db).unwrap();
998 assert_eq!(removed, 1);
999 assert!(!store.exists(&hash, "wav").unwrap());
1000 }
1001
1002 // --- Loose-files mode tests ---
1003
1004 #[test]
1005 fn import_loose_files_does_not_copy_file() {
1006 let (dir, db, store) = setup();
1007 let src = create_test_file(&dir, "kick.wav", b"unsafe kick data");
1008
1009 let hash = store.import_loose_files(&src, &db).unwrap();
1010
1011 // No file in the store
1012 assert!(!store.exists(&hash, "wav").unwrap());
1013
1014 // Row exists in DB with source_path set
1015 let sp: Option<String> = db
1016 .conn()
1017 .query_row(
1018 "SELECT source_path FROM samples WHERE hash = ?1",
1019 [&hash],
1020 |row| row.get(0),
1021 )
1022 .unwrap();
1023 assert!(sp.is_some());
1024 assert!(sp.unwrap().ends_with("kick.wav"));
1025 }
1026
1027 #[test]
1028 fn import_loose_files_deduplicates() {
1029 let (dir, db, store) = setup();
1030 let src = create_test_file(&dir, "kick.wav", b"same unsafe content");
1031
1032 let hash1 = store.import_loose_files(&src, &db).unwrap();
1033 let hash2 = store.import_loose_files(&src, &db).unwrap();
1034 assert_eq!(hash1, hash2);
1035
1036 let count: i64 = db
1037 .conn()
1038 .query_row("SELECT COUNT(*) FROM samples", [], |row| row.get(0))
1039 .unwrap();
1040 assert_eq!(count, 1);
1041 }
1042
1043 #[test]
1044 fn sample_source_path_returns_none_for_normal() {
1045 let (dir, db, store) = setup();
1046 let src = create_test_file(&dir, "kick.wav", b"normal import");
1047 let hash = store.import(&src, &db).unwrap();
1048
1049 assert!(sample_source_path(&db, &hash).unwrap().is_none());
1050 }
1051
1052 #[test]
1053 fn sample_source_path_returns_path_for_loose_files() {
1054 let (dir, db, store) = setup();
1055 let src = create_test_file(&dir, "kick.wav", b"unsafe import");
1056 let hash = store.import_loose_files(&src, &db).unwrap();
1057
1058 let sp = sample_source_path(&db, &hash).unwrap();
1059 assert!(sp.is_some());
1060 }
1061
1062 #[test]
1063 fn resolve_file_path_prefers_source_path() {
1064 let (dir, db, store) = setup();
1065 let src = create_test_file(&dir, "kick.wav", b"unsafe resolve test");
1066 let hash = store.import_loose_files(&src, &db).unwrap();
1067
1068 let resolved = resolve_file_path(&store, &db, &hash, "wav").unwrap();
1069 // Should resolve to the original file, not the store
1070 assert!(!resolved.starts_with(store.root()));
1071 }
1072
1073 #[test]
1074 fn resolve_file_path_falls_back_to_store() {
1075 let (dir, db, store) = setup();
1076 let src = create_test_file(&dir, "kick.wav", b"fallback test");
1077
1078 // Import normally (file exists in store)
1079 let hash = store.import(&src, &db).unwrap();
1080
1081 let resolved = resolve_file_path(&store, &db, &hash, "wav").unwrap();
1082 assert!(resolved.starts_with(store.root()));
1083 }
1084
1085 #[test]
1086 fn relocate_sample_rejects_hash_mismatch() {
1087 let (dir, db, store) = setup();
1088 let src = create_test_file(&dir, "kick.wav", b"original content");
1089 let hash = store.import_loose_files(&src, &db).unwrap();
1090
1091 let wrong_file = create_test_file(&dir, "snare.wav", b"different content");
1092 let result = relocate_sample(&store, &db, &hash, &wrong_file);
1093 assert!(result.is_err());
1094 assert!(result.unwrap_err().to_string().contains("hash mismatch"));
1095 }
1096
1097 #[test]
1098 fn relocate_sample_updates_source_path() {
1099 let (dir, db, store) = setup();
1100 let src = create_test_file(&dir, "kick.wav", b"relocate content");
1101 let hash = store.import_loose_files(&src, &db).unwrap();
1102
1103 // Move the file
1104 let new_loc = dir.path().join("moved_kick.wav");
1105 fs::copy(&src, &new_loc).unwrap();
1106
1107 relocate_sample(&store, &db, &hash, &new_loc).unwrap();
1108
1109 let sp = sample_source_path(&db, &hash).unwrap().unwrap();
1110 assert!(sp.contains("moved_kick.wav"));
1111 }
1112
1113 #[test]
1114 fn check_loose_files_integrity_counts_correctly() {
1115 let (dir, db, store) = setup();
1116 let src1 = create_test_file(&dir, "kick.wav", b"integrity kick");
1117 let src2 = create_test_file(&dir, "snare.wav", b"integrity snare");
1118
1119 store.import_loose_files(&src1, &db).unwrap();
1120 let hash2 = store.import_loose_files(&src2, &db).unwrap();
1121
1122 // Delete snare from disk to simulate missing file
1123 let sp = sample_source_path(&db, &hash2).unwrap().unwrap();
1124 fs::remove_file(&sp).unwrap();
1125
1126 let (valid, missing) = check_loose_files_integrity(&db).unwrap();
1127 assert_eq!(valid, 1);
1128 assert_eq!(missing, 1);
1129 }
1130
1131 #[test]
1132 fn purge_missing_loose_files_removes_only_missing() {
1133 let (dir, db, store) = setup();
1134 let src1 = create_test_file(&dir, "kick.wav", b"purge kick");
1135 let src2 = create_test_file(&dir, "snare.wav", b"purge snare");
1136
1137 let hash1 = store.import_loose_files(&src1, &db).unwrap();
1138 let hash2 = store.import_loose_files(&src2, &db).unwrap();
1139
1140 // Delete snare from disk
1141 let sp = sample_source_path(&db, &hash2).unwrap().unwrap();
1142 fs::remove_file(&sp).unwrap();
1143
1144 let purged = purge_missing_loose_files(&db).unwrap();
1145 assert_eq!(purged, 1);
1146
1147 // kick still exists, snare is gone
1148 assert!(sample_source_path(&db, &hash1).is_ok());
1149 assert!(matches!(
1150 sample_source_path(&db, &hash2),
1151 Err(CoreError::SampleNotFound(_))
1152 ));
1153 }
1154
1155 #[test]
1156 fn purge_missing_loose_files_noop_when_all_valid() {
1157 let (dir, db, store) = setup();
1158 let src = create_test_file(&dir, "kick.wav", b"all valid");
1159 store.import_loose_files(&src, &db).unwrap();
1160
1161 let purged = purge_missing_loose_files(&db).unwrap();
1162 assert_eq!(purged, 0);
1163 }
1164 }
1165