Skip to main content

max / audiofiles

Add macOS metadata filtering and orphaned sample cleanup infrastructure Filter .DS_Store, ._* resource forks, __MACOSX, .Spotlight-V100 etc. during import. Add SampleStore::remove_orphaned_samples() for cleaning up unreferenced blobs. Export SearchFilter::active_count() for UI. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Author: Max J. <87768334+MaxJMath@users.noreply.github.com> · 2026-04-13 21:57 UTC
Commit: ffa6b98ceca30fb57bef97a664b83a65d426c9a5
Parent: d9130c8
4 files changed, +202 insertions, -3 deletions
@@ -22,6 +22,11 @@ fn is_audio_file(path: &Path) -> bool {
22 22 audiofiles_core::util::is_audio_file(path)
23 23 }
24 24
25 + /// Check whether a directory should be skipped during traversal.
26 + fn is_skipped_dir(path: &Path) -> bool {
27 + audiofiles_core::util::is_macos_metadata_dir(path)
28 + }
29 +
25 30 /// How imported files should be organized in the VFS.
26 31 pub enum ImportStrategy {
27 32 /// All links in one directory, no subdirs created.
@@ -158,7 +163,9 @@ fn count_audio_files(dir: &Path, cmd_rx: &mpsc::Receiver<ImportCommand>) -> Opti
158 163 for entry in entries.flatten() {
159 164 let path = entry.path();
160 165 if path.is_dir() {
161 - stack.push(path);
166 + if !is_skipped_dir(&path) {
167 + stack.push(path);
168 + }
162 169 } else if is_audio_file(&path) {
163 170 count += 1;
164 171 }
@@ -279,6 +286,9 @@ fn import_directory_recursive(
279 286 }
280 287
281 288 if path.is_dir() {
289 + if is_skipped_dir(&path) {
290 + continue;
291 + }
282 292 let dir_name = audiofiles_core::util::get_filename(&path, "folder");
283 293
284 294 let dir_node_id =
@@ -341,6 +351,9 @@ fn import_directory_flat(
341 351 }
342 352
343 353 if path.is_dir() {
354 + if is_skipped_dir(&path) {
355 + continue;
356 + }
344 357 // Recurse into subdirs but still put all links at the same flat level
345 358 let cancelled = import_directory_flat(&path, vfs_id, parent_id, ctx);
346 359 if cancelled {
@@ -383,6 +396,9 @@ fn import_structured(
383 396 }
384 397
385 398 if path.is_dir() {
399 + if is_skipped_dir(&path) {
400 + continue;
401 + }
386 402 let dir_name = audiofiles_core::util::get_filename(&path, "folder");
387 403
388 404 let dir_node_id =
@@ -69,6 +69,17 @@ impl SearchFilter {
69 69 || !self.required_tags.is_empty()
70 70 }
71 71
72 + /// Count distinct active filter categories (excluding text query).
73 + pub fn active_count(&self) -> usize {
74 + let mut n = 0;
75 + if self.bpm_min.is_some() || self.bpm_max.is_some() { n += 1; }
76 + if self.duration_min.is_some() || self.duration_max.is_some() { n += 1; }
77 + if !self.classifications.is_empty() { n += 1; }
78 + if !self.keys.is_empty() { n += 1; }
79 + if !self.required_tags.is_empty() { n += 1; }
80 + n
81 + }
82 +
72 83 /// Clear all filter criteria.
73 84 pub fn clear(&mut self) {
74 85 *self = Self::default();
@@ -50,7 +50,7 @@ fn probe_duration(path: &Path) -> Option<f64> {
50 50 /// Allows alphanumeric, dots, and hyphens (covers wav, mp3, flac, aiff, ogg,
51 51 /// tar.gz, etc.). Rejects path separators, null bytes, and anything else that
52 52 /// could be used for directory traversal.
53 - fn validate_extension(ext: &str) -> Result<()> {
53 + pub fn validate_extension(ext: &str) -> Result<()> {
54 54 if !ext.is_empty()
55 55 && !ext
56 56 .bytes()
@@ -64,7 +64,7 @@ fn validate_extension(ext: &str) -> Result<()> {
64 64 }
65 65
66 66 /// Validate that a hash string is exactly 64 lowercase hex characters (SHA-256).
67 - fn validate_hash(hash: &str) -> Result<()> {
67 + pub fn validate_hash(hash: &str) -> Result<()> {
68 68 if hash.len() != 64 || !hash.bytes().all(|b| b.is_ascii_hexdigit() && !b.is_ascii_uppercase())
69 69 {
70 70 return Err(CoreError::HashInvalid(format!(
@@ -197,6 +197,36 @@ impl SampleStore {
197 197 Ok(())
198 198 }
199 199
200 + /// Remove samples that are no longer referenced by any VFS node.
201 + ///
202 + /// Returns the number of orphaned samples removed. Each orphan is deleted
203 + /// from the database first (CASCADE handles tags, analysis, etc.), then the
204 + /// file blob is removed from disk.
205 + #[instrument(skip_all)]
206 + pub fn remove_orphaned_samples(&self, db: &Database) -> Result<usize> {
207 + let mut stmt = db.conn().prepare(
208 + "SELECT s.hash, s.file_extension
209 + FROM samples s
210 + LEFT JOIN vfs_nodes vn ON s.hash = vn.sample_hash
211 + WHERE vn.id IS NULL",
212 + )?;
213 + let orphans: Vec<(String, String)> = stmt
214 + .query_map([], |row| Ok((row.get(0)?, row.get(1)?)))?
215 + .collect::<std::result::Result<Vec<_>, _>>()?;
216 +
217 + let count = orphans.len();
218 + for (hash, ext) in &orphans {
219 + db.conn()
220 + .execute("DELETE FROM samples WHERE hash = ?1", [hash])?;
221 + if let Ok(path) = self.sample_path(hash, ext) {
222 + if path.exists() {
223 + let _ = fs::remove_file(&path);
224 + }
225 + }
226 + }
227 + Ok(count)
228 + }
229 +
200 230 /// Get the store root directory.
201 231 pub fn root(&self) -> &Path {
202 232 &self.root
@@ -499,4 +529,64 @@ mod tests {
499 529 // File gone
500 530 assert!(!stored_path.exists());
501 531 }
532 +
533 + #[test]
534 + fn remove_orphaned_samples_cleans_unreferenced() {
535 + let (dir, db, store) = setup();
536 + let src1 = create_test_file(&dir, "kick.wav", b"kick data");
537 + let src2 = create_test_file(&dir, "snare.wav", b"snare data");
538 +
539 + let hash1 = store.import(&src1, &db).unwrap();
540 + let hash2 = store.import(&src2, &db).unwrap();
541 +
542 + // Create a VFS and link only hash1
543 + let vfs_id = crate::vfs::create_vfs(&db, "Lib").unwrap();
544 + crate::vfs::create_sample_link(&db, vfs_id, None, "kick.wav", &hash1).unwrap();
545 +
546 + // hash2 is orphaned (no VFS node), hash1 is referenced
547 + let removed = store.remove_orphaned_samples(&db).unwrap();
548 + assert_eq!(removed, 1);
549 +
550 + // hash1 still exists, hash2 is gone
551 + assert!(store.exists(&hash1, "wav").unwrap());
552 + assert!(!store.exists(&hash2, "wav").unwrap());
553 +
554 + let count: i64 = db
555 + .conn()
556 + .query_row("SELECT COUNT(*) FROM samples", [], |row| row.get(0))
557 + .unwrap();
558 + assert_eq!(count, 1);
559 + }
560 +
561 + #[test]
562 + fn remove_orphaned_samples_keeps_referenced() {
563 + let (dir, db, store) = setup();
564 + let src = create_test_file(&dir, "hat.wav", b"hat data");
565 + let hash = store.import(&src, &db).unwrap();
566 +
567 + let vfs_id = crate::vfs::create_vfs(&db, "Lib").unwrap();
568 + crate::vfs::create_sample_link(&db, vfs_id, None, "hat.wav", &hash).unwrap();
569 +
570 + let removed = store.remove_orphaned_samples(&db).unwrap();
571 + assert_eq!(removed, 0);
572 + assert!(store.exists(&hash, "wav").unwrap());
573 + }
574 +
575 + #[test]
576 + fn remove_orphaned_after_vfs_delete() {
577 + let (dir, db, store) = setup();
578 + let src = create_test_file(&dir, "clap.wav", b"clap data");
579 + let hash = store.import(&src, &db).unwrap();
580 +
581 + let vfs_id = crate::vfs::create_vfs(&db, "Lib").unwrap();
582 + crate::vfs::create_sample_link(&db, vfs_id, None, "clap.wav", &hash).unwrap();
583 +
584 + // Delete the VFS (cascades to vfs_nodes)
585 + crate::vfs::delete_vfs(&db, vfs_id).unwrap();
586 +
587 + // Sample is now orphaned
588 + let removed = store.remove_orphaned_samples(&db).unwrap();
589 + assert_eq!(removed, 1);
590 + assert!(!store.exists(&hash, "wav").unwrap());
591 + }
502 592 }
@@ -35,11 +35,41 @@ pub fn split_name_ext(filename: &str) -> (String, String) {
35 35 }
36 36
37 37 /// Check whether a path has an audio file extension.
38 + ///
39 + /// Rejects macOS resource fork sidecar files (`._*.wav` etc.) which carry an
40 + /// audio extension but contain binary metadata, not audio data. These files
41 + /// are invisible on macOS but appear as regular files on Linux.
38 42 pub fn is_audio_file(path: &Path) -> bool {
43 + if is_macos_resource_fork(path) {
44 + return false;
45 + }
39 46 let ext = get_extension(path);
40 47 AUDIO_EXTENSIONS.contains(&ext.as_str())
41 48 }
42 49
50 + /// Returns `true` for macOS resource fork sidecar files (`._*`) and `.DS_Store`.
51 + ///
52 + /// These are metadata files that macOS creates alongside real files. They often
53 + /// survive in zip archives and extracted folders transferred to Linux, where
54 + /// they are visible as regular files.
55 + pub fn is_macos_resource_fork(path: &Path) -> bool {
56 + let name = path
57 + .file_name()
58 + .and_then(|n| n.to_str())
59 + .unwrap_or("");
60 + name.starts_with("._") || name == ".DS_Store"
61 + }
62 +
63 + /// Returns `true` for macOS system directories that should be skipped during
64 + /// directory traversal (e.g. `__MACOSX` from zip extraction, Spotlight indexes).
65 + pub fn is_macos_metadata_dir(path: &Path) -> bool {
66 + let name = path
67 + .file_name()
68 + .and_then(|n| n.to_str())
69 + .unwrap_or("");
70 + matches!(name, "__MACOSX" | ".Spotlight-V100" | ".fseventsd" | ".Trashes")
71 + }
72 +
43 73 #[cfg(test)]
44 74 mod tests {
45 75 use super::*;
@@ -151,4 +181,56 @@ mod tests {
151 181 assert_eq!(ext, "wav");
152 182 }
153 183
184 + // --- macOS resource fork / metadata filtering ---
185 +
186 + #[test]
187 + fn resource_fork_wav_rejected() {
188 + assert!(!is_audio_file(Path::new("._kick.wav")));
189 + }
190 +
191 + #[test]
192 + fn resource_fork_aiff_rejected() {
193 + assert!(!is_audio_file(Path::new("._strings.aiff")));
194 + }
195 +
196 + #[test]
197 + fn resource_fork_flac_rejected() {
198 + assert!(!is_audio_file(Path::new("._pad.flac")));
199 + }
200 +
201 + #[test]
202 + fn resource_fork_in_subdir_rejected() {
203 + assert!(!is_audio_file(Path::new("/samples/drums/._snare.wav")));
204 + }
205 +
206 + #[test]
207 + fn resource_fork_detected() {
208 + assert!(is_macos_resource_fork(Path::new("._kick.wav")));
209 + assert!(is_macos_resource_fork(Path::new("/path/to/._file.aiff")));
210 + assert!(is_macos_resource_fork(Path::new(".DS_Store")));
211 + }
212 +
213 + #[test]
214 + fn normal_files_not_resource_fork() {
215 + assert!(!is_macos_resource_fork(Path::new("kick.wav")));
216 + assert!(!is_macos_resource_fork(Path::new(".hidden.wav")));
217 + assert!(!is_macos_resource_fork(Path::new("my_file.flac")));
218 + }
219 +
220 + #[test]
221 + fn macos_metadata_dirs_detected() {
222 + assert!(is_macos_metadata_dir(Path::new("__MACOSX")));
223 + assert!(is_macos_metadata_dir(Path::new("/path/to/__MACOSX")));
224 + assert!(is_macos_metadata_dir(Path::new(".Spotlight-V100")));
225 + assert!(is_macos_metadata_dir(Path::new(".fseventsd")));
226 + assert!(is_macos_metadata_dir(Path::new(".Trashes")));
227 + }
228 +
229 + #[test]
230 + fn normal_dirs_not_metadata() {
231 + assert!(!is_macos_metadata_dir(Path::new("samples")));
232 + assert!(!is_macos_metadata_dir(Path::new("drums")));
233 + assert!(!is_macos_metadata_dir(Path::new(".hidden_dir")));
234 + }
235 +
154 236 }