//! Audio analysis pipeline: orchestrates decoding, feature extraction, classification, and DB persistence. //! //! The pipeline decodes audio to mono f32 via Symphonia, then runs configurable //! stages (loudness, spectral features, BPM/key detection, loop detection, //! classification) and persists results to the `audio_analysis` table. pub mod basic; pub mod bpm; pub mod classify; pub mod config; pub mod decode; pub mod loop_detect; #[cfg(feature = "analysis")] pub mod loudness; pub mod mfcc; pub mod spectral; pub mod suggest; pub mod waveform; pub mod worker; use std::path::Path; use classify::SampleClass; use config::AnalysisConfig; use crate::db::Database; use crate::error::{unix_now, CoreError}; use crate::fingerprint; use tracing::instrument; /// Complete analysis result for a single sample. #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub struct AnalysisResult { /// Content-addressed hash identifying this sample in the store. pub hash: String, /// Total duration in seconds. pub duration: f64, /// Sample rate in Hz. pub sample_rate: u32, /// Number of channels in the source file. pub channels: u16, /// Peak amplitude in dBFS. pub peak_db: Option, /// RMS loudness in dBFS. pub rms_db: Option, /// Integrated loudness in LUFS. pub lufs: Option, /// Estimated tempo in beats per minute. pub bpm: Option, /// Estimated musical key (e.g. "A minor"). pub musical_key: Option, /// Whether the sample is detected as a seamless loop. pub is_loop: Option, /// Spectral centroid in Hz (brightness measure). pub spectral_centroid: Option, /// Spectral flatness (0 = tonal, 1 = noise-like). pub spectral_flatness: Option, /// Spectral rolloff frequency in Hz. pub spectral_rolloff: Option, /// Zero-crossing rate (proportion of sign changes per sample). pub zero_crossing_rate: Option, /// Onset detection strength. pub onset_strength: Option, /// Heuristic sample classification (kick, snare, pad, etc.). pub classification: Option, /// Peak envelope fingerprint for near-duplicate detection. pub fingerprint: Option>, /// Spectral bandwidth in Hz (spread of energy around centroid). pub spectral_bandwidth: Option, /// Variance of per-frame spectral centroids (spectral evolution). pub centroid_variance: Option, /// Peak-to-RMS ratio in linear domain (transient sharpness). pub crest_factor: Option, /// Time to 90% of peak amplitude in seconds (onset speed). pub attack_time: Option, /// ML classifier confidence (0.0-1.0). 0.0 when using rule-based fallback. pub classification_confidence: Option, } /// Run all configured analyses on a single sample file. #[instrument(skip_all)] pub fn analyze_sample( hash: &str, path: &Path, config: &AnalysisConfig, ) -> Result { // Guard against memory exhaustion: reject files over 2 GB before decoding. // A 2 GB compressed file would expand to several GB of f32 samples. const MAX_FILE_SIZE: u64 = 2 * 1024 * 1024 * 1024; if let Ok(metadata) = std::fs::metadata(path) && metadata.len() > MAX_FILE_SIZE { return Err(CoreError::Analysis(crate::error::AnalysisError::ProbeFailed( format!("file too large for analysis ({} MB, max {} MB)", metadata.len() / (1024 * 1024), MAX_FILE_SIZE / (1024 * 1024)), ))); } let decoded = decode::decode_to_mono(path)?; // Hard cap: reject files over 30 minutes to prevent memory exhaustion. // A 30-minute 96kHz mono signal is ~660 MB of f32 — beyond that is almost // certainly not a sample. const MAX_DECODE_DURATION: f64 = 1800.0; if decoded.duration > MAX_DECODE_DURATION { return Err(CoreError::Analysis(crate::error::AnalysisError::ProbeFailed( format!("file too long for analysis ({:.0}s, max {MAX_DECODE_DURATION}s)", decoded.duration), ))); } // Cap samples for expensive analyses (STFT, BPM/key). Cheap analyses and // fingerprint use the full signal. let capped_samples: &[f32] = if let Some(max_secs) = config.max_analysis_seconds { let max_samples = (max_secs * decoded.sample_rate as f64) as usize; &decoded.samples[..decoded.samples.len().min(max_samples)] } else { &decoded.samples }; let mut result = AnalysisResult { hash: hash.to_string(), duration: decoded.duration, sample_rate: decoded.sample_rate, channels: decoded.channels, peak_db: None, rms_db: None, lufs: None, bpm: None, musical_key: None, is_loop: None, spectral_centroid: None, spectral_flatness: None, spectral_rolloff: None, zero_crossing_rate: None, onset_strength: None, classification: None, fingerprint: None, spectral_bandwidth: None, centroid_variance: None, crest_factor: None, attack_time: None, classification_confidence: None, }; // Basic loudness (always fast — uses full signal) if config.loudness { result.peak_db = Some(basic::peak_db(&decoded.samples)); result.rms_db = Some(basic::rms_db(&decoded.samples)); result.crest_factor = Some(basic::crest_factor(&decoded.samples)); result.attack_time = Some(basic::attack_time(&decoded.samples, decoded.sample_rate)); #[cfg(feature = "analysis")] { result.lufs = Some(loudness::measure_lufs(&decoded.samples, decoded.sample_rate)); } } // Spectral features (uses capped samples) #[cfg(feature = "analysis")] if config.spectral { let (features, magnitude_frames) = spectral::compute_spectral_features_with_frames(capped_samples, decoded.sample_rate); result.spectral_centroid = Some(features.centroid); result.spectral_flatness = Some(features.flatness); result.spectral_rolloff = Some(features.rolloff); result.zero_crossing_rate = Some(features.zero_crossing_rate); result.onset_strength = Some(features.onset_strength); result.spectral_bandwidth = Some(features.bandwidth); result.centroid_variance = Some(features.centroid_variance); // Classification requires spectral + waveform features if config.classify { // Compute MFCCs from STFT magnitude frames let mfcc_features = mfcc::compute_mfccs(&magnitude_frames, decoded.sample_rate, 1024); let input = classify::ClassifyInput::with_mfccs( &features, decoded.duration, result.crest_factor.unwrap_or(0.0), result.attack_time.unwrap_or(0.0), &mfcc_features, ); let ml_result = classify::classify_ml(&input); result.classification = Some(ml_result.class); result.classification_confidence = Some(ml_result.confidence); } } // Smart skip: use classification to decide if BPM/key/loop make sense. // Drums, impacts, noise, foley, ambience, and textures skip these expensive stages. let (want_bpm, want_key, want_loop) = if config.smart_skip { if let Some(ref class) = result.classification { ( config.bpm && class.has_rhythm(), config.key && class.has_pitch(), config.loop_detect && class.has_rhythm(), ) } else { // No classification result — run everything requested (config.bpm, config.key, config.loop_detect) } } else { (config.bpm, config.key, config.loop_detect) }; // BPM + key detection (uses capped samples) if want_bpm || want_key { let bpm_key = bpm::detect_bpm_key(capped_samples, decoded.sample_rate, 2.0); if want_bpm { result.bpm = bpm_key.bpm; } if want_key { result.musical_key = bpm_key.key; } } // Loop detection if want_loop { result.is_loop = Some(loop_detect::is_loop( &decoded.samples, decoded.sample_rate, result.bpm, )); } // Fingerprint for near-duplicate detection (uses full signal) if config.fingerprint { result.fingerprint = Some(fingerprint::compute_envelope( &decoded.samples, decoded.sample_rate, )); } Ok(result) } /// Save analysis results to the database, overwriting any previous results for this hash. #[instrument(skip_all)] pub fn save_analysis(db: &Database, result: &AnalysisResult) -> Result<(), CoreError> { let now = unix_now(); db.conn().execute( "INSERT OR REPLACE INTO audio_analysis ( hash, duration, sample_rate, channels, peak_db, rms_db, lufs, bpm, musical_key, is_loop, spectral_centroid, spectral_flatness, spectral_rolloff, zero_crossing_rate, onset_strength, classification, spectral_bandwidth, centroid_variance, crest_factor, attack_time, classification_confidence, analyzed_at ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15, ?16, ?17, ?18, ?19, ?20, ?21, ?22)", rusqlite::params![ result.hash, result.duration, result.sample_rate, result.channels, result.peak_db, result.rms_db, result.lufs, result.bpm, result.musical_key, result.is_loop, result.spectral_centroid, result.spectral_flatness, result.spectral_rolloff, result.zero_crossing_rate, result.onset_strength, result.classification.as_ref().map(|c| c.as_str()), result.spectral_bandwidth, result.centroid_variance, result.crest_factor, result.attack_time, result.classification_confidence, now, ], )?; if let Some(ref envelope) = result.fingerprint { fingerprint::save_fingerprint( db, &fingerprint::Fingerprint { hash: result.hash.clone(), envelope: envelope.clone(), sample_rate: result.sample_rate, }, )?; } Ok(()) } /// Load analysis results for a sample by hash. Returns `None` if no analysis exists. #[instrument(skip_all)] pub fn load_analysis(db: &Database, hash: &str) -> Option { db.conn() .query_row( "SELECT hash, duration, sample_rate, channels, peak_db, rms_db, lufs, bpm, musical_key, is_loop, spectral_centroid, spectral_flatness, spectral_rolloff, zero_crossing_rate, onset_strength, classification, spectral_bandwidth, centroid_variance, crest_factor, attack_time, classification_confidence FROM audio_analysis WHERE hash = ?1", [hash], |row| { let class_str: Option = row.get(15)?; Ok(AnalysisResult { hash: row.get(0)?, duration: row.get(1)?, sample_rate: row.get(2)?, channels: row.get(3)?, peak_db: row.get(4)?, rms_db: row.get(5)?, lufs: row.get(6)?, bpm: row.get(7)?, musical_key: row.get(8)?, is_loop: row.get(9)?, spectral_centroid: row.get(10)?, spectral_flatness: row.get(11)?, spectral_rolloff: row.get(12)?, zero_crossing_rate: row.get(13)?, onset_strength: row.get(14)?, classification: class_str .and_then(|s| s.parse::().ok()), spectral_bandwidth: row.get(16)?, centroid_variance: row.get(17)?, crest_factor: row.get(18)?, attack_time: row.get(19)?, classification_confidence: row.get(20)?, fingerprint: None, }) }, ) .ok() } #[cfg(test)] mod tests { use super::*; #[test] fn analysis_result_construction_with_defaults() { let result = AnalysisResult { hash: "abc123".to_string(), duration: 1.5, sample_rate: 48000, channels: 1, peak_db: None, rms_db: None, lufs: None, bpm: None, musical_key: None, is_loop: None, spectral_centroid: None, spectral_flatness: None, spectral_rolloff: None, zero_crossing_rate: None, onset_strength: None, classification: None, fingerprint: None, spectral_bandwidth: None, centroid_variance: None, crest_factor: None, attack_time: None, classification_confidence: None, }; assert_eq!(result.hash, "abc123"); assert_eq!(result.sample_rate, 48000); assert_eq!(result.channels, 1); assert!((result.duration - 1.5).abs() < f64::EPSILON); assert!(result.peak_db.is_none()); assert!(result.bpm.is_none()); assert!(result.classification.is_none()); assert!(result.spectral_bandwidth.is_none()); assert!(result.crest_factor.is_none()); } #[test] fn analysis_result_fully_populated() { let result = AnalysisResult { hash: "def456".to_string(), duration: 3.2, sample_rate: 44100, channels: 2, peak_db: Some(-0.5), rms_db: Some(-12.0), lufs: Some(-14.0), bpm: Some(128.0), musical_key: Some("A minor".to_string()), is_loop: Some(true), spectral_centroid: Some(1500.0), spectral_flatness: Some(0.15), spectral_rolloff: Some(8000.0), zero_crossing_rate: Some(0.05), onset_strength: Some(30.0), classification: Some(SampleClass::Kick), fingerprint: None, spectral_bandwidth: Some(2500.0), centroid_variance: Some(50000.0), crest_factor: Some(4.5), attack_time: Some(0.005), classification_confidence: Some(0.87), }; assert_eq!(result.peak_db, Some(-0.5)); assert_eq!(result.rms_db, Some(-12.0)); assert_eq!(result.lufs, Some(-14.0)); assert_eq!(result.bpm, Some(128.0)); assert_eq!(result.musical_key.as_deref(), Some("A minor")); assert_eq!(result.is_loop, Some(true)); assert_eq!(result.spectral_centroid, Some(1500.0)); assert_eq!(result.spectral_flatness, Some(0.15)); assert_eq!(result.spectral_rolloff, Some(8000.0)); assert_eq!(result.zero_crossing_rate, Some(0.05)); assert_eq!(result.onset_strength, Some(30.0)); assert_eq!(result.classification, Some(SampleClass::Kick)); assert_eq!(result.spectral_bandwidth, Some(2500.0)); assert_eq!(result.centroid_variance, Some(50000.0)); assert_eq!(result.crest_factor, Some(4.5)); assert_eq!(result.attack_time, Some(0.005)); } }