//! Pipeline benchmark for audiofiles analysis. //! //! Measures per-stage timing, throughput, resource usage, and classification accuracy //! against labeled training data. //! //! Usage: `cargo run --release -p audiofiles-bench` use std::collections::HashMap; use std::path::{Path, PathBuf}; use std::time::Instant; use audiofiles_core::analysis::classify::{self, ClassifyInput, SampleClass}; use audiofiles_core::analysis::config::AnalysisConfig; use audiofiles_core::analysis::loudness; use audiofiles_core::analysis::{self, basic, bpm, decode, loop_detect, mfcc, spectral}; use audiofiles_core::fingerprint; use rayon::prelude::*; // ── Timing Helpers ── struct StageTiming { decode_ms: f64, loudness_ms: f64, spectral_ms: f64, mfcc_ms: f64, classify_ms: f64, bpm_key_ms: f64, loop_ms: f64, fingerprint_ms: f64, total_ms: f64, } fn time_stages(path: &Path) -> Option<(StageTiming, f64, u32)> { let total_start = Instant::now(); // Decode let t = Instant::now(); let decoded = decode::decode_to_mono(path).ok()?; let decode_ms = t.elapsed().as_secs_f64() * 1000.0; let duration = decoded.duration; let sr = decoded.sample_rate; let max_secs = 30.0; let max_samples = (max_secs * sr as f64) as usize; let capped = &decoded.samples[..decoded.samples.len().min(max_samples)]; // Loudness let t = Instant::now(); let _ = basic::peak_db(&decoded.samples); let _ = basic::rms_db(&decoded.samples); let crest = basic::crest_factor(&decoded.samples); let attack = basic::attack_time(&decoded.samples, sr); let _ = loudness::measure_lufs(&decoded.samples, sr); let loudness_ms = t.elapsed().as_secs_f64() * 1000.0; // Spectral let t = Instant::now(); let (features, magnitude_frames) = spectral::compute_spectral_features_with_frames(capped, sr); let spectral_ms = t.elapsed().as_secs_f64() * 1000.0; // MFCC let t = Instant::now(); let mfcc_features = mfcc::compute_mfccs(&magnitude_frames, sr, 1024); let mfcc_ms = t.elapsed().as_secs_f64() * 1000.0; // Classify let t = Instant::now(); let input = ClassifyInput::with_mfccs(&features, duration, crest, attack, &mfcc_features); let _ = classify::classify_ml(&input); let classify_ms = t.elapsed().as_secs_f64() * 1000.0; // BPM + Key let t = Instant::now(); let bpm_result = bpm::detect_bpm_key(capped, sr, 2.0); let bpm_key_ms = t.elapsed().as_secs_f64() * 1000.0; // Loop detect let t = Instant::now(); let _ = loop_detect::is_loop(&decoded.samples, sr, bpm_result.bpm); let loop_ms = t.elapsed().as_secs_f64() * 1000.0; // Fingerprint let t = Instant::now(); let _ = fingerprint::compute_envelope(&decoded.samples, sr); let fingerprint_ms = t.elapsed().as_secs_f64() * 1000.0; let total_ms = total_start.elapsed().as_secs_f64() * 1000.0; Some(( StageTiming { decode_ms, loudness_ms, spectral_ms, mfcc_ms, classify_ms, bpm_key_ms, loop_ms, fingerprint_ms, total_ms, }, duration, sr, )) } // ── Classification Accuracy ── fn expected_class_from_dir(dir_name: &str) -> Option { match dir_name { "kick" => Some(SampleClass::Kick), "snare" => Some(SampleClass::Snare), "hihat" => Some(SampleClass::HiHat), "cymbal" => Some(SampleClass::Cymbal), "clap" => Some(SampleClass::Clap), "tom" => Some(SampleClass::Tom), "percussion" => Some(SampleClass::Percussion), _ => None, } } /// True if predicted is "correct enough": /// - Exact match, OR /// - Both are drum types (permissive: kick/snare/hihat/cymbal/percussion) fn is_drum_class(c: SampleClass) -> bool { matches!( c, SampleClass::Kick | SampleClass::Snare | SampleClass::HiHat | SampleClass::Cymbal | SampleClass::Clap | SampleClass::Tom | SampleClass::Percussion ) } struct ClassifyResult { expected: SampleClass, predicted: SampleClass, confidence: f64, } fn classify_file(path: &Path) -> Option<(SampleClass, f64)> { let decoded = decode::decode_to_mono(path).ok()?; let sr = decoded.sample_rate; let max_samples = (30.0 * sr as f64) as usize; let capped = &decoded.samples[..decoded.samples.len().min(max_samples)]; let crest = basic::crest_factor(&decoded.samples); let attack = basic::attack_time(&decoded.samples, sr); let (features, magnitude_frames) = spectral::compute_spectral_features_with_frames(capped, sr); let mfcc_features = mfcc::compute_mfccs(&magnitude_frames, sr, 1024); let input = ClassifyInput::with_mfccs(&features, decoded.duration, crest, attack, &mfcc_features); let result = classify::classify_ml(&input); Some((result.class, result.confidence)) } // ── File Discovery ── fn audio_extensions() -> &'static [&'static str] { &[".wav", ".aif", ".aiff", ".mp3", ".ogg", ".flac"] } fn is_audio(name: &str) -> bool { let lower = name.to_lowercase(); audio_extensions().iter().any(|ext| lower.ends_with(ext)) } fn collect_audio_files(dir: &Path, limit: Option) -> Vec { let mut files: Vec = Vec::new(); if !dir.exists() { return files; } for entry in walkdir(dir) { if let Some(lim) = limit { if files.len() >= lim { break; } } files.push(entry); } files } fn walkdir(dir: &Path) -> Vec { let mut out = Vec::new(); if let Ok(entries) = std::fs::read_dir(dir) { for entry in entries.flatten() { let path = entry.path(); if path.is_dir() { out.extend(walkdir(&path)); } else { let name = entry.file_name().to_string_lossy().to_string(); if is_audio(&name) { // Resolve symlinks let resolved = std::fs::read_link(&path).unwrap_or(path); if resolved.exists() { out.push(resolved); } } } } } out } // ── Report Formatting ── fn percentile(values: &mut [f64], p: f64) -> f64 { if values.is_empty() { return 0.0; } values.sort_by(|a, b| a.total_cmp(b)); let idx = (p / 100.0 * (values.len() - 1) as f64).round() as usize; values[idx.min(values.len() - 1)] } fn main() { let project_root = PathBuf::from(env!("CARGO_MANIFEST_DIR")) .parent() .unwrap() .parent() .unwrap() .to_path_buf(); let samples_dir = project_root.join("samples"); let training_dir = samples_dir.join("training"); let test_suite_dir = samples_dir.join("test-suite"); println!("╔══════════════════════════════════════════════════════════════╗"); println!("║ AudioFiles Analysis Pipeline — Benchmark Report ║"); println!("╚══════════════════════════════════════════════════════════════╝"); println!(); // ───────────────────────────────────────────────────────────── // Section 1: Per-Stage Timing (representative sample set) // ───────────────────────────────────────────────────────────── println!("━━━ 1. PER-STAGE TIMING ━━━"); println!(); // Collect samples from different sources for timing let timing_sources: Vec<(&str, PathBuf, Option)> = vec![ ("drum one-shots (WAV)", training_dir.join("kick"), Some(200)), ("drum one-shots (WAV)", training_dir.join("snare"), Some(200)), ("drum one-shots (WAV)", training_dir.join("hihat"), Some(200)), ("philharmonia (MP3)", test_suite_dir.join("formats/mp3"), Some(50)), ("AIFF samples", test_suite_dir.join("formats/aiff"), Some(10)), ("FLAC samples", test_suite_dir.join("formats/flac"), Some(10)), ("ambient loops (WAV)", test_suite_dir.join("genres/ambient"), Some(50)), ("synth loops (WAV)", test_suite_dir.join("genres/synth"), Some(50)), ("guitar loops (WAV)", test_suite_dir.join("genres/guitar"), Some(50)), ]; let mut all_timings: Vec<(StageTiming, f64, u32, String)> = Vec::new(); for (label, dir, limit) in &timing_sources { let files = collect_audio_files(dir, *limit); if files.is_empty() { continue; } let results: Vec<_> = files .par_iter() .filter_map(|f| { time_stages(f).map(|(t, dur, sr)| (t, dur, sr, label.to_string())) }) .collect(); all_timings.extend(results); } if all_timings.is_empty() { eprintln!("No files found for timing benchmarks!"); std::process::exit(1); } let n = all_timings.len(); println!("Benchmarked {} files", n); println!(); // Aggregate per-stage let mut decode = Vec::new(); let mut loud = Vec::new(); let mut spec = Vec::new(); let mut mfcc_t = Vec::new(); let mut class = Vec::new(); let mut bpm_t = Vec::new(); let mut loop_t = Vec::new(); let mut fp_t = Vec::new(); let mut total = Vec::new(); let mut durations = Vec::new(); for (t, dur, _sr, _) in &all_timings { decode.push(t.decode_ms); loud.push(t.loudness_ms); spec.push(t.spectral_ms); mfcc_t.push(t.mfcc_ms); class.push(t.classify_ms); bpm_t.push(t.bpm_key_ms); loop_t.push(t.loop_ms); fp_t.push(t.fingerprint_ms); total.push(t.total_ms); durations.push(*dur); } fn stats_line(name: &str, vals: &mut Vec) { let mean = vals.iter().sum::() / vals.len() as f64; let p50 = percentile(vals, 50.0); let p95 = percentile(vals, 95.0); let p99 = percentile(vals, 99.0); let max = percentile(vals, 100.0); println!( " {:<16} {:>8.2} {:>8.2} {:>8.2} {:>8.2} {:>8.2}", name, mean, p50, p95, p99, max ); } println!(" {:<16} {:>8} {:>8} {:>8} {:>8} {:>8}", "Stage", "Mean", "P50", "P95", "P99", "Max"); println!(" {}", "─".repeat(58)); stats_line("Decode", &mut decode); stats_line("Loudness+LUFS", &mut loud); stats_line("Spectral/STFT", &mut spec); stats_line("MFCC", &mut mfcc_t); stats_line("Classify", &mut class); stats_line("BPM+Key", &mut bpm_t); stats_line("Loop Detect", &mut loop_t); stats_line("Fingerprint", &mut fp_t); println!(" {}", "─".repeat(58)); stats_line("TOTAL", &mut total); println!(); // Duration stats let avg_dur = durations.iter().sum::() / durations.len() as f64; let avg_total = total.iter().sum::() / total.len() as f64; let realtime_ratio = avg_dur * 1000.0 / avg_total; println!(" Avg sample duration: {:.2}s", avg_dur); println!(" Avg analysis time: {:.1}ms", avg_total); println!(" Real-time ratio: {:.0}× (analysis is {:.0}× faster than real-time)", realtime_ratio, realtime_ratio); println!(); // ───────────────────────────────────────────────────────────── // Section 2: Format-Specific Performance // ───────────────────────────────────────────────────────────── println!("━━━ 2. FORMAT-SPECIFIC DECODE PERFORMANCE ━━━"); println!(); let format_dirs: Vec<(&str, PathBuf)> = vec![ ("WAV", test_suite_dir.join("formats/wav")), ("AIFF", test_suite_dir.join("formats/aiff")), ("MP3", test_suite_dir.join("formats/mp3")), ("FLAC", test_suite_dir.join("formats/flac")), ]; println!(" {:<8} {:>6} {:>10} {:>10} {:>10}", "Format", "Files", "Mean(ms)", "P95(ms)", "Max(ms)"); println!(" {}", "─".repeat(50)); for (fmt, dir) in &format_dirs { let files = collect_audio_files(dir, Some(100)); if files.is_empty() { println!(" {:<8} {:>6} {:>10} {:>10} {:>10}", fmt, 0, "-", "-", "-"); continue; } let mut decode_times: Vec = files .par_iter() .filter_map(|f| { let t = Instant::now(); decode::decode_to_mono(f).ok()?; Some(t.elapsed().as_secs_f64() * 1000.0) }) .collect(); let count = decode_times.len(); let mean = decode_times.iter().sum::() / count as f64; let p95 = percentile(&mut decode_times, 95.0); let max = percentile(&mut decode_times, 100.0); println!(" {:<8} {:>6} {:>10.2} {:>10.2} {:>10.2}", fmt, count, mean, p95, max); } println!(); // ───────────────────────────────────────────────────────────── // Section 3: Throughput // ───────────────────────────────────────────────────────────── println!("━━━ 3. THROUGHPUT ━━━"); println!(); // Parallel full-pipeline throughput on 500 drum one-shots let throughput_files = collect_audio_files(&training_dir.join("kick"), Some(250)); let mut throughput_files_ext = throughput_files; throughput_files_ext.extend(collect_audio_files(&training_dir.join("snare"), Some(250))); let config = AnalysisConfig { loudness: true, spectral: true, bpm: true, key: true, loop_detect: true, classify: true, fingerprint: true, auto_suggest_tags: false, max_analysis_seconds: Some(30.0), smart_skip: false, }; let tp_count = throughput_files_ext.len(); let tp_start = Instant::now(); let tp_ok: usize = throughput_files_ext .par_iter() .filter(|f| analysis::analyze_sample("bench", f, &config).is_ok()) .count(); let tp_elapsed = tp_start.elapsed().as_secs_f64(); let tp_rate = tp_ok as f64 / tp_elapsed; println!(" Full pipeline (all stages, parallel):"); println!(" Files: {} ({} succeeded)", tp_count, tp_ok); println!(" Wall time: {:.1}s", tp_elapsed); println!(" Throughput: {:.1} files/sec", tp_rate); println!(" Avg/file: {:.1}ms", tp_elapsed * 1000.0 / tp_ok as f64); println!(); // Single-threaded throughput for comparison let st_files = collect_audio_files(&training_dir.join("kick"), Some(100)); let st_start = Instant::now(); let st_ok: usize = st_files .iter() .filter(|f| analysis::analyze_sample("bench", f, &config).is_ok()) .count(); let st_elapsed = st_start.elapsed().as_secs_f64(); let st_rate = st_ok as f64 / st_elapsed; println!(" Single-threaded comparison (100 files):"); println!(" Throughput: {:.1} files/sec", st_rate); println!(" Speedup from parallelism: {:.1}×", tp_rate / st_rate); println!(); // ───────────────────────────────────────────────────────────── // Section 4: Resource Usage // ───────────────────────────────────────────────────────────── println!("━━━ 4. RESOURCE USAGE ━━━"); println!(); // Memory estimation: decode a few files of varying duration and measure buffer sizes let mem_files: Vec<(&str, PathBuf)> = vec![ ("Short drum hit", training_dir.join("kick")), ("Medium loop", test_suite_dir.join("genres/ambient")), ]; println!(" Per-sample memory (mono f32 decode buffer):"); for (label, dir) in &mem_files { let files = collect_audio_files(dir, Some(10)); if files.is_empty() { continue; } let mut sizes: Vec<(f64, usize)> = Vec::new(); for f in &files { if let Ok(decoded) = decode::decode_to_mono(f) { let bytes = decoded.samples.len() * 4; // f32 = 4 bytes sizes.push((decoded.duration, bytes)); } } if !sizes.is_empty() { let avg_dur = sizes.iter().map(|(d, _)| d).sum::() / sizes.len() as f64; let avg_bytes = sizes.iter().map(|(_, b)| *b).sum::() / sizes.len(); let max_bytes = sizes.iter().map(|(_, b)| *b).max().unwrap_or(0); println!( " {}: avg {:.2}s = {:.1} KB, max = {:.1} KB", label, avg_dur, avg_bytes as f64 / 1024.0, max_bytes as f64 / 1024.0 ); } } println!(); // STFT frame memory let frame_samples = 1024usize; let hop = 1024usize; let thirty_sec_frames = (30.0 * 44100.0 / hop as f64) as usize; let frame_mem = thirty_sec_frames * (frame_samples / 2 + 1) * 8; // f64 magnitude bins println!(" STFT magnitude frames (30s @ 44.1kHz, 1024-sample window):"); println!(" Frames: {}", thirty_sec_frames); println!(" Memory: {:.1} MB", frame_mem as f64 / 1_048_576.0); println!(); // Model size let model_path = project_root.join("crates/audiofiles-core/models/layer2_drum.json"); if let Ok(meta) = std::fs::metadata(&model_path) { println!(" RF model (layer2_drum.json): {:.1} MB on disk, embedded at compile time", meta.len() as f64 / 1_048_576.0); } println!(); // ───────────────────────────────────────────────────────────── // Section 5: Classification Accuracy // ───────────────────────────────────────────────────────────── println!("━━━ 5. CLASSIFICATION ACCURACY ━━━"); println!(); let class_dirs = ["kick", "snare", "hihat", "cymbal", "clap", "tom", "percussion"]; let max_per_class = 300; // enough for statistical significance, not too slow let mut all_results: Vec = Vec::new(); for dir_name in &class_dirs { let dir = training_dir.join(dir_name); let files = collect_audio_files(&dir, Some(max_per_class)); if files.is_empty() { continue; } let expected = match expected_class_from_dir(dir_name) { Some(c) => c, None => continue, }; let results: Vec = files .par_iter() .filter_map(|f| { let (predicted, confidence) = classify_file(f)?; Some(ClassifyResult { expected, predicted, confidence, }) }) .collect(); all_results.extend(results); } let total_classified = all_results.len(); println!(" Evaluated {} samples (up to {} per class)", total_classified, max_per_class); println!(); // Strict accuracy: predicted class == expected class exactly let strict_correct = all_results .iter() .filter(|r| r.predicted == r.expected) .count(); let strict_acc = strict_correct as f64 / total_classified as f64 * 100.0; // Layer 1 accuracy: predicted is any drum class when expected is drum let drum_correct = all_results .iter() .filter(|r| is_drum_class(r.predicted)) .count(); let drum_acc = drum_correct as f64 / total_classified as f64 * 100.0; println!(" Overall:"); println!(" Strict accuracy (exact class match): {:.1}% ({}/{})", strict_acc, strict_correct, total_classified); println!(" Layer 1 accuracy (drum detection): {:.1}% ({}/{})", drum_acc, drum_correct, total_classified); println!(); // Per-class breakdown println!(" Per-class (strict):"); println!(" {:<12} {:>6} {:>8} {:>10} {:>10}", "Expected", "N", "Correct", "Accuracy", "Avg Conf"); println!(" {}", "─".repeat(52)); for dir_name in &class_dirs { let expected = match expected_class_from_dir(dir_name) { Some(c) => c, None => continue, }; let class_results: Vec<&ClassifyResult> = all_results .iter() .filter(|r| r.expected == expected) .collect(); if class_results.is_empty() { continue; } let n = class_results.len(); let correct = class_results.iter().filter(|r| r.predicted == r.expected).count(); let acc = correct as f64 / n as f64 * 100.0; let avg_conf = class_results.iter().map(|r| r.confidence).sum::() / n as f64; println!( " {:<12} {:>6} {:>8} {:>9.1}% {:>9.2}", dir_name, n, correct, acc, avg_conf ); } println!(); // Confusion matrix (7 classes) let class_names = ["kick", "snare", "hihat", "cymbal", "clap", "tom", "perc"]; let class_ids: Vec = vec![ SampleClass::Kick, SampleClass::Snare, SampleClass::HiHat, SampleClass::Cymbal, SampleClass::Clap, SampleClass::Tom, SampleClass::Percussion, ]; println!(" Confusion matrix (rows=expected, cols=predicted):"); print!(" {:>12}", ""); for name in &class_names { print!(" {:>7}", name); } println!(" {:>7}", "other"); println!(" {}", "─".repeat(60)); for (i, expected) in class_ids.iter().enumerate() { let class_results: Vec<&ClassifyResult> = all_results .iter() .filter(|r| r.expected == *expected) .collect(); if class_results.is_empty() { continue; } print!(" {:>12}", class_names[i]); for pred in &class_ids { let count = class_results.iter().filter(|r| r.predicted == *pred).count(); print!(" {:>7}", count); } let other = class_results .iter() .filter(|r| !is_drum_class(r.predicted)) .count(); println!(" {:>7}", other); } println!(); // Non-drum misclassifications let non_drum: Vec<&ClassifyResult> = all_results .iter() .filter(|r| !is_drum_class(r.predicted)) .collect(); if !non_drum.is_empty() { println!(" Samples classified as non-drum: {} ({:.1}%)", non_drum.len(), non_drum.len() as f64 / total_classified as f64 * 100.0); let mut non_drum_classes: HashMap<&str, usize> = HashMap::new(); for r in &non_drum { *non_drum_classes.entry(r.predicted.as_str()).or_default() += 1; } let mut sorted: Vec<_> = non_drum_classes.into_iter().collect(); sorted.sort_by(|a, b| b.1.cmp(&a.1)); for (class, count) in &sorted { println!(" {} → {}", count, class); } } println!(); // ───────────────────────────────────────────────────────────── // Section 6: Edge Cases // ───────────────────────────────────────────────────────────── println!("━━━ 6. EDGE CASE HANDLING ━━━"); println!(); let edge_cases = [ ("silent.wav", test_suite_dir.join("edge-cases/silent.wav")), ("truncated.wav", test_suite_dir.join("edge-cases/truncated.wav")), ("café-crème.wav", test_suite_dir.join("edge-cases/unicode-name/café-crème.wav")), ("日本語テスト.wav", test_suite_dir.join("edge-cases/unicode-name/日本語テスト.wav")), ]; for (name, path) in &edge_cases { let config = AnalysisConfig { loudness: true, spectral: true, bpm: true, key: true, loop_detect: true, classify: true, fingerprint: true, auto_suggest_tags: false, max_analysis_seconds: Some(30.0), smart_skip: false, }; match analysis::analyze_sample("edge", path, &config) { Ok(r) => { println!( " {} → OK (dur={:.2}s, class={}, conf={:.2})", name, r.duration, r.classification.map(|c| c.as_str()).unwrap_or("none"), r.classification_confidence.unwrap_or(0.0) ); } Err(e) => { println!(" {} → ERROR: {}", name, e); } } } println!(); println!("═══════════════════════════════════════════════════════════════"); println!(" Benchmark complete. {} files analyzed.", n + total_classified); println!("═══════════════════════════════════════════════════════════════"); }