//! Spectral analysis via STFT: centroid, flatness, rolloff, zero-crossing rate, and onset strength. //! //! The heavy `compute_spectral_features` function (which depends on `realfft`) is gated behind //! the `analysis` feature. The [`SpectralFeatures`] struct is always available so that downstream //! code (classify, AnalysisResult) compiles unconditionally. #[cfg(feature = "analysis")] use realfft::RealFftPlanner; use tracing::instrument; /// Results of spectral analysis across the entire signal. #[derive(Debug, Clone, Default)] pub struct SpectralFeatures { /// Spectral center of mass in Hz (low = bassy, high = bright). pub centroid: f64, /// Ratio from 0.0 (pure tone) to 1.0 (white noise), computed as geometric/arithmetic /// mean of magnitudes. pub flatness: f64, /// Frequency in Hz below which 85% of spectral energy resides. pub rolloff: f64, /// Proportion of sign changes per sample (high = noisy/bright transients). pub zero_crossing_rate: f64, /// Sum of positive spectral flux across frames (higher = more percussive onsets). pub onset_strength: f64, /// Spectral standard deviation around the centroid in Hz (spread of energy). pub bandwidth: f64, /// Variance of per-frame centroids (high = evolving spectrum, low = static). pub centroid_variance: f64, } /// Compute spectral features from mono audio samples. /// /// Uses STFT with Hann window, hop size = window/4. /// All features are averaged across frames. /// /// Requires the `analysis` feature (pulls in `realfft`). #[cfg(feature = "analysis")] #[instrument(skip_all)] pub fn compute_spectral_features(samples: &[f32], sample_rate: u32) -> SpectralFeatures { compute_spectral_inner(samples, sample_rate).0 } /// Compute spectral features and return per-frame magnitude spectra. /// /// Same as [`compute_spectral_features`] but also returns the magnitude vectors /// from each STFT frame, for downstream use by MFCC computation. #[cfg(feature = "analysis")] #[instrument(skip_all)] pub fn compute_spectral_features_with_frames( samples: &[f32], sample_rate: u32, ) -> (SpectralFeatures, Vec>) { compute_spectral_inner(samples, sample_rate) } /// Shared implementation for spectral feature extraction. #[cfg(feature = "analysis")] fn compute_spectral_inner( samples: &[f32], sample_rate: u32, ) -> (SpectralFeatures, Vec>) { if samples.is_empty() { return (SpectralFeatures::default(), Vec::new()); } let window_size = 1024; let hop_size = window_size; // no overlap — classification needs stable averages, not frame-level detail // Compute ZCR on the raw signal let zcr = zero_crossing_rate(samples); // If signal is too short for a single window, return just ZCR if samples.len() < window_size { return ( SpectralFeatures { zero_crossing_rate: zcr, ..Default::default() }, Vec::new(), ); } let mut planner = RealFftPlanner::::new(); let fft = planner.plan_fft_forward(window_size); // Hann window: w(n) = 0.5 * (1 - cos(2*pi*n / (N-1))) // Smoothly tapers signal to zero at frame edges, reducing spectral leakage in the FFT. let hann: Vec = (0..window_size) .map(|i| { 0.5 * (1.0 - (2.0 * std::f64::consts::PI * i as f64 / (window_size - 1) as f64).cos()) }) .collect(); let freq_bin_hz = sample_rate as f64 / window_size as f64; let spectrum_len = window_size / 2 + 1; let mut centroids = Vec::new(); let mut flatnesses = Vec::new(); let mut rolloffs = Vec::new(); let mut bandwidths = Vec::new(); let mut onset_diffs = Vec::new(); let mut magnitude_frames = Vec::new(); let mut pos = 0; while pos + window_size <= samples.len() { // Apply window let mut windowed: Vec = samples[pos..pos + window_size] .iter() .enumerate() .map(|(i, &s)| s as f64 * hann[i]) .collect(); // FFT let mut spectrum = fft.make_output_vec(); if fft.process(&mut windowed, &mut spectrum).is_err() { pos += hop_size; continue; } // Magnitude spectrum let magnitudes: Vec = spectrum.iter().map(|c| c.norm()).collect(); // Spectral centroid: frequency-weighted average of the magnitude spectrum. // Sum(bin_freq * magnitude) / Sum(magnitude) — gives the "center of mass" in Hz. // Low centroid = bass-heavy, high centroid = bright/treble-heavy. let mag_sum: f64 = magnitudes.iter().sum(); if mag_sum > 0.0 { let weighted_sum: f64 = magnitudes .iter() .enumerate() .map(|(i, &m)| i as f64 * freq_bin_hz * m) .sum(); let frame_centroid = weighted_sum / mag_sum; centroids.push(frame_centroid); // Spectral bandwidth: standard deviation of the spectrum around the centroid. // sqrt(Sum((freq - centroid)^2 * mag) / Sum(mag)) per frame. let bw_sum: f64 = magnitudes .iter() .enumerate() .map(|(i, &m)| { let freq = i as f64 * freq_bin_hz; let diff = freq - frame_centroid; diff * diff * m }) .sum(); bandwidths.push((bw_sum / mag_sum).sqrt()); // Spectral flatness = geometric_mean / arithmetic_mean of the magnitude spectrum. // Measures how "tone-like" vs "noise-like" the spectrum is: // 0.0 = pure tone (energy in one bin), 1.0 = white noise (energy uniform). // Computed in log-space to avoid floating-point underflow: exp(mean(ln(x))) // instead of the direct product. Bins below 1e-10 are floored to avoid ln(0). let n = spectrum_len as f64; let arithmetic_mean = mag_sum / n; let log_sum: f64 = magnitudes .iter() .map(|&m| if m > 1e-10 { m.ln() } else { (1e-10_f64).ln() }) .sum(); let geometric_mean = (log_sum / n).exp(); let flat = if arithmetic_mean > 0.0 { geometric_mean / arithmetic_mean } else { 0.0 }; flatnesses.push(flat.clamp(0.0, 1.0)); // Spectral rolloff: frequency below which 85% of spectral energy resides. // 85% is the standard threshold (vs 90% or 95%) — it captures the "useful" // bandwidth while ignoring the long tail of high-frequency noise. let threshold = mag_sum * 0.85; let mut cumsum = 0.0; let mut rolloff_freq = 0.0; for (i, &m) in magnitudes.iter().enumerate() { cumsum += m; if cumsum >= threshold { rolloff_freq = i as f64 * freq_bin_hz; break; } } rolloffs.push(rolloff_freq); // Store this frame, then compute onset strength against the previous // stored frame. `magnitude_frames` already retains every (non-silent) // frame for the later MFCC pass, so the previous spectrum is just the // second-to-last element — no separate `prev_spectrum` copy needed. magnitude_frames.push(magnitudes); // Onset strength via spectral flux: sum of positive magnitude increases // between consecutive frames. Only positive differences are counted // (half-wave rectification) because onsets are characterised by energy // appearing, not disappearing. if magnitude_frames.len() >= 2 { let curr = &magnitude_frames[magnitude_frames.len() - 1]; let prev = &magnitude_frames[magnitude_frames.len() - 2]; let flux: f64 = curr .iter() .zip(prev.iter()) .map(|(&curr, &prev)| (curr - prev).max(0.0)) .sum(); onset_diffs.push(flux); } } pos += hop_size; } let avg = |v: &[f64]| -> f64 { if v.is_empty() { 0.0 } else { v.iter().sum::() / v.len() as f64 } }; // Centroid variance: variance of per-frame centroids (how much the spectrum evolves). let centroid_mean = avg(¢roids); let centroid_var = if centroids.len() < 2 { 0.0 } else { let sum_sq: f64 = centroids.iter().map(|&c| (c - centroid_mean).powi(2)).sum(); sum_sq / centroids.len() as f64 }; let features = SpectralFeatures { centroid: centroid_mean, flatness: avg(&flatnesses), rolloff: avg(&rolloffs), zero_crossing_rate: zcr, onset_strength: avg(&onset_diffs), bandwidth: avg(&bandwidths), centroid_variance: centroid_var, }; (features, magnitude_frames) } /// Fraction of consecutive sample pairs that cross zero. #[cfg(feature = "analysis")] fn zero_crossing_rate(samples: &[f32]) -> f64 { if samples.len() < 2 { return 0.0; } let crossings = samples .windows(2) .filter(|w| (w[0] >= 0.0) != (w[1] >= 0.0)) .count(); crossings as f64 / (samples.len() - 1) as f64 } #[cfg(test)] #[cfg(feature = "analysis")] mod tests { use super::*; #[test] fn silence_features() { let silence = vec![0.0f32; 4096]; let f = compute_spectral_features(&silence, 44100); assert_eq!(f.centroid, 0.0); assert_eq!(f.onset_strength, 0.0); } #[test] fn sine_has_low_flatness() { // Pure sine at 440 Hz — should have low spectral flatness (tonal) let samples: Vec = (0..44100) .map(|i| (2.0 * std::f32::consts::PI * 440.0 * i as f32 / 44100.0).sin()) .collect(); let f = compute_spectral_features(&samples, 44100); assert!( f.flatness < 0.1, "pure sine flatness should be low, got {}", f.flatness ); // Centroid should be near 440 Hz assert!( (f.centroid - 440.0).abs() < 100.0, "centroid should be near 440 Hz, got {}", f.centroid ); } #[test] fn zcr_of_high_freq_is_higher() { let low: Vec = (0..44100) .map(|i| (2.0 * std::f32::consts::PI * 100.0 * i as f32 / 44100.0).sin()) .collect(); let high: Vec = (0..44100) .map(|i| (2.0 * std::f32::consts::PI * 10000.0 * i as f32 / 44100.0).sin()) .collect(); let zcr_low = zero_crossing_rate(&low); let zcr_high = zero_crossing_rate(&high); assert!(zcr_high > zcr_low); } #[test] fn short_signal() { // Shorter than one window let short = vec![0.5f32; 100]; let f = compute_spectral_features(&short, 44100); // Should still compute ZCR assert_eq!(f.zero_crossing_rate, 0.0); // constant signal, no crossings } }