| 1 |
|
| 2 |
|
| 3 |
|
| 4 |
|
| 5 |
|
| 6 |
|
| 7 |
#[cfg(feature = "analysis")] |
| 8 |
use realfft::RealFftPlanner; |
| 9 |
use tracing::instrument; |
| 10 |
|
| 11 |
|
| 12 |
#[derive(Debug, Clone, Default)] |
| 13 |
pub struct SpectralFeatures { |
| 14 |
|
| 15 |
pub centroid: f64, |
| 16 |
|
| 17 |
|
| 18 |
pub flatness: f64, |
| 19 |
|
| 20 |
pub rolloff: f64, |
| 21 |
|
| 22 |
pub zero_crossing_rate: f64, |
| 23 |
|
| 24 |
pub onset_strength: f64, |
| 25 |
|
| 26 |
pub bandwidth: f64, |
| 27 |
|
| 28 |
pub centroid_variance: f64, |
| 29 |
} |
| 30 |
|
| 31 |
|
| 32 |
|
| 33 |
|
| 34 |
|
| 35 |
|
| 36 |
|
| 37 |
#[cfg(feature = "analysis")] |
| 38 |
#[instrument(skip_all)] |
| 39 |
pub fn compute_spectral_features(samples: &[f32], sample_rate: u32) -> SpectralFeatures { |
| 40 |
compute_spectral_inner(samples, sample_rate).0 |
| 41 |
} |
| 42 |
|
| 43 |
|
| 44 |
|
| 45 |
|
| 46 |
|
| 47 |
#[cfg(feature = "analysis")] |
| 48 |
#[instrument(skip_all)] |
| 49 |
pub fn compute_spectral_features_with_frames( |
| 50 |
samples: &[f32], |
| 51 |
sample_rate: u32, |
| 52 |
) -> (SpectralFeatures, Vec<Vec<f64>>) { |
| 53 |
compute_spectral_inner(samples, sample_rate) |
| 54 |
} |
| 55 |
|
| 56 |
|
| 57 |
#[cfg(feature = "analysis")] |
| 58 |
fn compute_spectral_inner( |
| 59 |
samples: &[f32], |
| 60 |
sample_rate: u32, |
| 61 |
) -> (SpectralFeatures, Vec<Vec<f64>>) { |
| 62 |
if samples.is_empty() { |
| 63 |
return (SpectralFeatures::default(), Vec::new()); |
| 64 |
} |
| 65 |
|
| 66 |
let window_size = 1024; |
| 67 |
let hop_size = window_size; |
| 68 |
|
| 69 |
|
| 70 |
let zcr = zero_crossing_rate(samples); |
| 71 |
|
| 72 |
|
| 73 |
if samples.len() < window_size { |
| 74 |
return ( |
| 75 |
SpectralFeatures { |
| 76 |
zero_crossing_rate: zcr, |
| 77 |
..Default::default() |
| 78 |
}, |
| 79 |
Vec::new(), |
| 80 |
); |
| 81 |
} |
| 82 |
|
| 83 |
let mut planner = RealFftPlanner::<f64>::new(); |
| 84 |
let fft = planner.plan_fft_forward(window_size); |
| 85 |
|
| 86 |
|
| 87 |
|
| 88 |
let hann: Vec<f64> = (0..window_size) |
| 89 |
.map(|i| { |
| 90 |
0.5 * (1.0 - (2.0 * std::f64::consts::PI * i as f64 / (window_size - 1) as f64).cos()) |
| 91 |
}) |
| 92 |
.collect(); |
| 93 |
|
| 94 |
let freq_bin_hz = sample_rate as f64 / window_size as f64; |
| 95 |
let spectrum_len = window_size / 2 + 1; |
| 96 |
|
| 97 |
let mut centroids = Vec::new(); |
| 98 |
let mut flatnesses = Vec::new(); |
| 99 |
let mut rolloffs = Vec::new(); |
| 100 |
let mut bandwidths = Vec::new(); |
| 101 |
let mut onset_diffs = Vec::new(); |
| 102 |
let mut magnitude_frames = Vec::new(); |
| 103 |
|
| 104 |
let mut pos = 0; |
| 105 |
while pos + window_size <= samples.len() { |
| 106 |
|
| 107 |
let mut windowed: Vec<f64> = samples[pos..pos + window_size] |
| 108 |
.iter() |
| 109 |
.enumerate() |
| 110 |
.map(|(i, &s)| s as f64 * hann[i]) |
| 111 |
.collect(); |
| 112 |
|
| 113 |
|
| 114 |
let mut spectrum = fft.make_output_vec(); |
| 115 |
if fft.process(&mut windowed, &mut spectrum).is_err() { |
| 116 |
pos += hop_size; |
| 117 |
continue; |
| 118 |
} |
| 119 |
|
| 120 |
|
| 121 |
let magnitudes: Vec<f64> = spectrum.iter().map(|c| c.norm()).collect(); |
| 122 |
|
| 123 |
|
| 124 |
|
| 125 |
|
| 126 |
let mag_sum: f64 = magnitudes.iter().sum(); |
| 127 |
if mag_sum > 0.0 { |
| 128 |
let weighted_sum: f64 = magnitudes |
| 129 |
.iter() |
| 130 |
.enumerate() |
| 131 |
.map(|(i, &m)| i as f64 * freq_bin_hz * m) |
| 132 |
.sum(); |
| 133 |
let frame_centroid = weighted_sum / mag_sum; |
| 134 |
centroids.push(frame_centroid); |
| 135 |
|
| 136 |
|
| 137 |
|
| 138 |
let bw_sum: f64 = magnitudes |
| 139 |
.iter() |
| 140 |
.enumerate() |
| 141 |
.map(|(i, &m)| { |
| 142 |
let freq = i as f64 * freq_bin_hz; |
| 143 |
let diff = freq - frame_centroid; |
| 144 |
diff * diff * m |
| 145 |
}) |
| 146 |
.sum(); |
| 147 |
bandwidths.push((bw_sum / mag_sum).sqrt()); |
| 148 |
|
| 149 |
|
| 150 |
|
| 151 |
|
| 152 |
|
| 153 |
|
| 154 |
let n = spectrum_len as f64; |
| 155 |
let arithmetic_mean = mag_sum / n; |
| 156 |
let log_sum: f64 = magnitudes |
| 157 |
.iter() |
| 158 |
.map(|&m| if m > 1e-10 { m.ln() } else { (1e-10_f64).ln() }) |
| 159 |
.sum(); |
| 160 |
let geometric_mean = (log_sum / n).exp(); |
| 161 |
let flat = if arithmetic_mean > 0.0 { |
| 162 |
geometric_mean / arithmetic_mean |
| 163 |
} else { |
| 164 |
0.0 |
| 165 |
}; |
| 166 |
flatnesses.push(flat.clamp(0.0, 1.0)); |
| 167 |
|
| 168 |
|
| 169 |
|
| 170 |
|
| 171 |
let threshold = mag_sum * 0.85; |
| 172 |
let mut cumsum = 0.0; |
| 173 |
let mut rolloff_freq = 0.0; |
| 174 |
for (i, &m) in magnitudes.iter().enumerate() { |
| 175 |
cumsum += m; |
| 176 |
if cumsum >= threshold { |
| 177 |
rolloff_freq = i as f64 * freq_bin_hz; |
| 178 |
break; |
| 179 |
} |
| 180 |
} |
| 181 |
rolloffs.push(rolloff_freq); |
| 182 |
|
| 183 |
|
| 184 |
|
| 185 |
|
| 186 |
|
| 187 |
magnitude_frames.push(magnitudes); |
| 188 |
|
| 189 |
|
| 190 |
|
| 191 |
|
| 192 |
|
| 193 |
if magnitude_frames.len() >= 2 { |
| 194 |
let curr = &magnitude_frames[magnitude_frames.len() - 1]; |
| 195 |
let prev = &magnitude_frames[magnitude_frames.len() - 2]; |
| 196 |
let flux: f64 = curr |
| 197 |
.iter() |
| 198 |
.zip(prev.iter()) |
| 199 |
.map(|(&curr, &prev)| (curr - prev).max(0.0)) |
| 200 |
.sum(); |
| 201 |
onset_diffs.push(flux); |
| 202 |
} |
| 203 |
} |
| 204 |
|
| 205 |
pos += hop_size; |
| 206 |
} |
| 207 |
|
| 208 |
let avg = |v: &[f64]| -> f64 { |
| 209 |
if v.is_empty() { |
| 210 |
0.0 |
| 211 |
} else { |
| 212 |
v.iter().sum::<f64>() / v.len() as f64 |
| 213 |
} |
| 214 |
}; |
| 215 |
|
| 216 |
|
| 217 |
let centroid_mean = avg(¢roids); |
| 218 |
let centroid_var = if centroids.len() < 2 { |
| 219 |
0.0 |
| 220 |
} else { |
| 221 |
let sum_sq: f64 = centroids.iter().map(|&c| (c - centroid_mean).powi(2)).sum(); |
| 222 |
sum_sq / centroids.len() as f64 |
| 223 |
}; |
| 224 |
|
| 225 |
let features = SpectralFeatures { |
| 226 |
centroid: centroid_mean, |
| 227 |
flatness: avg(&flatnesses), |
| 228 |
rolloff: avg(&rolloffs), |
| 229 |
zero_crossing_rate: zcr, |
| 230 |
onset_strength: avg(&onset_diffs), |
| 231 |
bandwidth: avg(&bandwidths), |
| 232 |
centroid_variance: centroid_var, |
| 233 |
}; |
| 234 |
|
| 235 |
(features, magnitude_frames) |
| 236 |
} |
| 237 |
|
| 238 |
|
| 239 |
#[cfg(feature = "analysis")] |
| 240 |
fn zero_crossing_rate(samples: &[f32]) -> f64 { |
| 241 |
if samples.len() < 2 { |
| 242 |
return 0.0; |
| 243 |
} |
| 244 |
let crossings = samples |
| 245 |
.windows(2) |
| 246 |
.filter(|w| (w[0] >= 0.0) != (w[1] >= 0.0)) |
| 247 |
.count(); |
| 248 |
crossings as f64 / (samples.len() - 1) as f64 |
| 249 |
} |
| 250 |
|
| 251 |
#[cfg(test)] |
| 252 |
#[cfg(feature = "analysis")] |
| 253 |
mod tests { |
| 254 |
use super::*; |
| 255 |
|
| 256 |
#[test] |
| 257 |
fn silence_features() { |
| 258 |
let silence = vec![0.0f32; 4096]; |
| 259 |
let f = compute_spectral_features(&silence, 44100); |
| 260 |
assert_eq!(f.centroid, 0.0); |
| 261 |
assert_eq!(f.onset_strength, 0.0); |
| 262 |
} |
| 263 |
|
| 264 |
#[test] |
| 265 |
fn sine_has_low_flatness() { |
| 266 |
|
| 267 |
let samples: Vec<f32> = (0..44100) |
| 268 |
.map(|i| (2.0 * std::f32::consts::PI * 440.0 * i as f32 / 44100.0).sin()) |
| 269 |
.collect(); |
| 270 |
let f = compute_spectral_features(&samples, 44100); |
| 271 |
assert!( |
| 272 |
f.flatness < 0.1, |
| 273 |
"pure sine flatness should be low, got {}", |
| 274 |
f.flatness |
| 275 |
); |
| 276 |
|
| 277 |
assert!( |
| 278 |
(f.centroid - 440.0).abs() < 100.0, |
| 279 |
"centroid should be near 440 Hz, got {}", |
| 280 |
f.centroid |
| 281 |
); |
| 282 |
} |
| 283 |
|
| 284 |
#[test] |
| 285 |
fn zcr_of_high_freq_is_higher() { |
| 286 |
let low: Vec<f32> = (0..44100) |
| 287 |
.map(|i| (2.0 * std::f32::consts::PI * 100.0 * i as f32 / 44100.0).sin()) |
| 288 |
.collect(); |
| 289 |
let high: Vec<f32> = (0..44100) |
| 290 |
.map(|i| (2.0 * std::f32::consts::PI * 10000.0 * i as f32 / 44100.0).sin()) |
| 291 |
.collect(); |
| 292 |
let zcr_low = zero_crossing_rate(&low); |
| 293 |
let zcr_high = zero_crossing_rate(&high); |
| 294 |
assert!(zcr_high > zcr_low); |
| 295 |
} |
| 296 |
|
| 297 |
#[test] |
| 298 |
fn short_signal() { |
| 299 |
|
| 300 |
let short = vec![0.5f32; 100]; |
| 301 |
let f = compute_spectral_features(&short, 44100); |
| 302 |
|
| 303 |
assert_eq!(f.zero_crossing_rate, 0.0); |
| 304 |
} |
| 305 |
} |
| 306 |
|