Skip to main content

max / audiofiles

15.0 KB · 407 lines History Blame Raw
1 //! Chop engine: slice a sample into individual one-shots by transient, by equal
2 //! divisions, or by a BPM grid.
3 //!
4 //! Transient detection reuses the same spectral-flux onset measure the analysis
5 //! pipeline uses for `onset_strength` (see [`crate::analysis::spectral`]) — here
6 //! it is peak-picked to recover onset *positions* rather than a single aggregate.
7 //! BPM-grid slicing reuses stratum-dsp tempo detection via [`detect_bpm`].
8
9 use realfft::RealFftPlanner;
10
11 use crate::error::CoreError;
12
13 /// A half-open slice of a sample, in frame units: `[start_frame, end_frame)`.
14 #[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
15 pub struct Slice {
16 pub start_frame: usize,
17 pub end_frame: usize,
18 }
19
20 impl Slice {
21 /// Number of frames in this slice.
22 pub fn len_frames(&self) -> usize {
23 self.end_frame.saturating_sub(self.start_frame)
24 }
25 }
26
27 /// How to chop a sample into slices.
28 #[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
29 pub enum ChopMethod {
30 /// Slice at detected transients. `sensitivity` in `0.0..=1.0` — higher finds
31 /// more (quieter) onsets, lower finds only the strongest hits.
32 Transient { sensitivity: f32 },
33 /// Slice into `n` equal divisions (e.g. 2, 4, 8, 16, 32).
34 EqualDivisions(usize),
35 /// Slice on a tempo grid. `subdivisions_per_beat` of 1 = one slice per beat,
36 /// 2 = eighth notes, 4 = sixteenths.
37 BpmGrid { bpm: f64, subdivisions_per_beat: u32 },
38 }
39
40 /// Compute slice boundaries for `samples` (interleaved, `channels`-wide) using
41 /// the given method. The returned slices always tile the whole sample with no
42 /// gaps or overlaps, the first starting at frame 0 and the last ending at the
43 /// final frame. Returns an empty vec for empty / zero-channel input.
44 pub fn compute_slices(
45 samples: &[f32],
46 channels: u16,
47 sample_rate: u32,
48 method: &ChopMethod,
49 ) -> Result<Vec<Slice>, CoreError> {
50 let ch = channels as usize;
51 if ch == 0 {
52 return Err(CoreError::Internal("chop: channels must be > 0".to_string()));
53 }
54 let total_frames = samples.len() / ch;
55 if total_frames == 0 {
56 return Ok(Vec::new());
57 }
58
59 let boundaries = match method {
60 ChopMethod::Transient { sensitivity } => {
61 let mono = mixdown_mono(samples, ch);
62 let mut starts = detect_onsets(&mono, sample_rate, *sensitivity);
63 // Force the first boundary to 0 so any pre-attack lead-in is folded
64 // into the first slice instead of being dropped.
65 if starts.first() != Some(&0) {
66 starts.insert(0, 0);
67 }
68 starts
69 }
70 ChopMethod::EqualDivisions(n) => {
71 let n = (*n).max(1);
72 (0..n).map(|i| i * total_frames / n).collect()
73 }
74 ChopMethod::BpmGrid {
75 bpm,
76 subdivisions_per_beat,
77 } => {
78 if *bpm <= 0.0 {
79 return Err(CoreError::Internal(
80 "chop: bpm must be positive for BPM-grid chop".to_string(),
81 ));
82 }
83 let subdiv = (*subdivisions_per_beat).max(1) as f64;
84 let frames_per_slice = (sample_rate as f64 * 60.0 / (bpm * subdiv)).round() as usize;
85 if frames_per_slice == 0 {
86 return Err(CoreError::Internal("chop: BPM grid step rounds to 0 frames".to_string()));
87 }
88 (0..total_frames).step_by(frames_per_slice).collect()
89 }
90 };
91
92 Ok(boundaries_to_slices(boundaries, total_frames))
93 }
94
95 /// Convert sorted, deduplicated start boundaries into half-open slices covering
96 /// `[0, total_frames)`. Boundaries at or past the end are dropped.
97 fn boundaries_to_slices(mut starts: Vec<usize>, total_frames: usize) -> Vec<Slice> {
98 starts.retain(|&s| s < total_frames);
99 starts.sort_unstable();
100 starts.dedup();
101 if starts.is_empty() {
102 starts.push(0);
103 }
104 let mut slices = Vec::with_capacity(starts.len());
105 for i in 0..starts.len() {
106 let start = starts[i];
107 let end = starts.get(i + 1).copied().unwrap_or(total_frames);
108 if end > start {
109 slices.push(Slice { start_frame: start, end_frame: end });
110 }
111 }
112 slices
113 }
114
115 /// Extract one slice as its own interleaved buffer.
116 pub fn render_slice(samples: &[f32], channels: u16, slice: &Slice) -> Vec<f32> {
117 let ch = channels as usize;
118 if ch == 0 {
119 return Vec::new();
120 }
121 let total_frames = samples.len() / ch;
122 let start = slice.start_frame.min(total_frames);
123 let end = slice.end_frame.min(total_frames);
124 if end <= start {
125 return Vec::new();
126 }
127 samples[start * ch..end * ch].to_vec()
128 }
129
130 /// Detect BPM for a sample by mixing to mono and reusing stratum-dsp tempo
131 /// detection. Returns `None` when the tempo can't be reliably estimated.
132 pub fn detect_bpm(samples: &[f32], channels: u16, sample_rate: u32) -> Option<f64> {
133 let ch = channels.max(1) as usize;
134 let mono = mixdown_mono(samples, ch);
135 crate::analysis::bpm::detect_bpm_key(&mono, sample_rate, 2.0).bpm
136 }
137
138 /// Average all channels of an interleaved buffer down to mono.
139 fn mixdown_mono(samples: &[f32], ch: usize) -> Vec<f32> {
140 if ch <= 1 {
141 return samples.to_vec();
142 }
143 let num_frames = samples.len() / ch;
144 let mut mono = Vec::with_capacity(num_frames);
145 for frame in 0..num_frames {
146 let mut sum = 0.0f32;
147 for c in 0..ch {
148 sum += samples[frame * ch + c];
149 }
150 mono.push(sum / ch as f32);
151 }
152 mono
153 }
154
155 const WINDOW_SIZE: usize = 1024;
156 const HOP_SIZE: usize = 512;
157
158 /// Detect transient onset positions (in frames) via spectral-flux peak picking.
159 ///
160 /// Computes a per-frame spectral flux (sum of positive magnitude increases
161 /// between consecutive STFT frames), then selects frames that both exceed an
162 /// adaptive threshold (mean + k·std, where k falls as sensitivity rises) and are
163 /// local maxima, enforcing a minimum gap so a single hit isn't split.
164 fn detect_onsets(mono: &[f32], sample_rate: u32, sensitivity: f32) -> Vec<usize> {
165 if mono.len() < WINDOW_SIZE * 2 {
166 // Too short for a meaningful STFT — treat as a single slice.
167 return vec![0];
168 }
169
170 let mut planner = RealFftPlanner::<f32>::new();
171 let fft = planner.plan_fft_forward(WINDOW_SIZE);
172
173 let hann: Vec<f32> = (0..WINDOW_SIZE)
174 .map(|i| {
175 0.5 * (1.0 - (2.0 * std::f32::consts::PI * i as f32 / (WINDOW_SIZE - 1) as f32).cos())
176 })
177 .collect();
178
179 // Spectral flux per hop, paired with the frame index of the *current* frame.
180 let mut flux: Vec<f32> = Vec::new();
181 let mut frame_positions: Vec<usize> = Vec::new();
182 let mut prev_mag: Option<Vec<f32>> = None;
183
184 let mut pos = 0;
185 while pos + WINDOW_SIZE <= mono.len() {
186 let mut windowed: Vec<f32> = mono[pos..pos + WINDOW_SIZE]
187 .iter()
188 .enumerate()
189 .map(|(i, &s)| s * hann[i])
190 .collect();
191 let mut spectrum = fft.make_output_vec();
192 if fft.process(&mut windowed, &mut spectrum).is_ok() {
193 let mag: Vec<f32> = spectrum.iter().map(|c| c.norm()).collect();
194 if let Some(ref prev) = prev_mag {
195 let f: f32 = mag
196 .iter()
197 .zip(prev.iter())
198 .map(|(&c, &p)| (c - p).max(0.0))
199 .sum();
200 flux.push(f);
201 frame_positions.push(pos);
202 }
203 prev_mag = Some(mag);
204 }
205 pos += HOP_SIZE;
206 }
207
208 if flux.is_empty() {
209 return vec![0];
210 }
211
212 let mean = flux.iter().sum::<f32>() / flux.len() as f32;
213 let var = flux.iter().map(|&f| (f - mean) * (f - mean)).sum::<f32>() / flux.len() as f32;
214 let std = var.sqrt();
215 // sensitivity 0 -> mean + 1.5 std (few onsets); 1 -> mean + 0.3 std (many).
216 let k = 1.5 - 1.2 * sensitivity.clamp(0.0, 1.0);
217 let threshold = mean + k * std;
218
219 // 50 ms: above a typical one-shot's attack span, so the several flux frames
220 // a single hit spans collapse to one onset rather than splitting the hit.
221 let min_gap_frames = (sample_rate as f32 * 0.05) as usize;
222 // Absolute floor so a silent (all-zero-flux) buffer yields no onsets even
223 // though its adaptive threshold is also zero.
224 const FLUX_FLOOR: f32 = 1e-4;
225
226 // Collect candidate peaks: above threshold and a strict local maximum.
227 // Each candidate's boundary sits one hop *before* the frame whose flux rose,
228 // so the cut lands ahead of the attack rather than a hop inside it (the flux
229 // for the window at `pos` measures the energy jump from the window at
230 // `pos - HOP_SIZE`, so the attack precedes `pos`).
231 let mut candidates: Vec<(f32, usize)> = Vec::new();
232 for i in 0..flux.len() {
233 let f = flux[i];
234 if f <= threshold || f < FLUX_FLOOR {
235 continue;
236 }
237 let is_peak = (i == 0 || flux[i - 1] < f) && (i + 1 >= flux.len() || flux[i + 1] < f);
238 if is_peak {
239 candidates.push((f, frame_positions[i].saturating_sub(HOP_SIZE)));
240 }
241 }
242
243 // Non-maximum suppression: take the strongest peaks first and drop any
244 // weaker peak within `min_gap_frames` of one already kept. This keeps the
245 // real (loud) transient when a soft pre-onset or flam sits beside it,
246 // instead of greedily keeping whichever came first in time.
247 candidates.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
248 let mut onsets: Vec<usize> = Vec::new();
249 for (_flux, frame) in candidates {
250 if onsets.iter().any(|&o| frame.abs_diff(o) < min_gap_frames) {
251 continue;
252 }
253 onsets.push(frame);
254 }
255 onsets.sort_unstable();
256
257 if onsets.is_empty() {
258 onsets.push(0);
259 }
260 onsets
261 }
262
263 #[cfg(test)]
264 mod tests {
265 use super::*;
266
267 #[test]
268 fn equal_divisions_tiles_exactly() {
269 // 1000 mono frames into 4 -> [0,250),[250,500),[500,750),[750,1000)
270 let samples = vec![0.0f32; 1000];
271 let slices = compute_slices(&samples, 1, 44100, &ChopMethod::EqualDivisions(4)).unwrap();
272 assert_eq!(slices.len(), 4);
273 assert_eq!(slices[0], Slice { start_frame: 0, end_frame: 250 });
274 assert_eq!(slices[3], Slice { start_frame: 750, end_frame: 1000 });
275 // Tiling: contiguous, no gaps, covers the whole buffer.
276 assert_eq!(slices[0].start_frame, 0);
277 assert_eq!(slices.last().unwrap().end_frame, 1000);
278 for w in slices.windows(2) {
279 assert_eq!(w[0].end_frame, w[1].start_frame);
280 }
281 }
282
283 #[test]
284 fn equal_divisions_stereo_frames() {
285 // 8 stereo frames (16 samples) into 2 -> two 4-frame slices.
286 let samples = vec![0.0f32; 16];
287 let slices = compute_slices(&samples, 2, 44100, &ChopMethod::EqualDivisions(2)).unwrap();
288 assert_eq!(slices.len(), 2);
289 assert_eq!(slices[0], Slice { start_frame: 0, end_frame: 4 });
290 assert_eq!(slices[1], Slice { start_frame: 4, end_frame: 8 });
291 }
292
293 #[test]
294 fn bpm_grid_step_matches_tempo() {
295 // 120 BPM at 48k = 0.5s/beat = 24000 frames/beat.
296 let sample_rate = 48000;
297 let samples = vec![0.0f32; 48000]; // 1 second
298 let slices = compute_slices(
299 &samples,
300 1,
301 sample_rate,
302 &ChopMethod::BpmGrid { bpm: 120.0, subdivisions_per_beat: 1 },
303 )
304 .unwrap();
305 // boundaries at 0 and 24000 -> two slices
306 assert_eq!(slices.len(), 2);
307 assert_eq!(slices[0], Slice { start_frame: 0, end_frame: 24000 });
308 assert_eq!(slices[1], Slice { start_frame: 24000, end_frame: 48000 });
309 }
310
311 #[test]
312 fn bpm_grid_subdivisions() {
313 // Sixteenths at 120 BPM/48k: 24000/4 = 6000 frames per slice.
314 let slices = compute_slices(
315 &vec![0.0f32; 24000],
316 1,
317 48000,
318 &ChopMethod::BpmGrid { bpm: 120.0, subdivisions_per_beat: 4 },
319 )
320 .unwrap();
321 assert_eq!(slices.len(), 4);
322 assert!(slices.iter().all(|s| s.len_frames() == 6000));
323 }
324
325 #[test]
326 fn bpm_grid_rejects_nonpositive() {
327 let r = compute_slices(
328 &vec![0.0f32; 100],
329 1,
330 48000,
331 &ChopMethod::BpmGrid { bpm: 0.0, subdivisions_per_beat: 1 },
332 );
333 assert!(r.is_err());
334 }
335
336 #[test]
337 fn render_slice_extracts_stereo_range() {
338 // 4 stereo frames: L/R interleaved.
339 let samples = vec![1.0, -1.0, 2.0, -2.0, 3.0, -3.0, 4.0, -4.0];
340 let out = render_slice(&samples, 2, &Slice { start_frame: 1, end_frame: 3 });
341 assert_eq!(out, vec![2.0, -2.0, 3.0, -3.0]);
342 }
343
344 #[test]
345 fn render_slice_clamps_out_of_range() {
346 let samples = vec![1.0, 2.0, 3.0];
347 let out = render_slice(&samples, 1, &Slice { start_frame: 2, end_frame: 99 });
348 assert_eq!(out, vec![3.0]);
349 let empty = render_slice(&samples, 1, &Slice { start_frame: 5, end_frame: 6 });
350 assert!(empty.is_empty());
351 }
352
353 #[test]
354 fn empty_input_yields_no_slices() {
355 let slices = compute_slices(&[], 1, 44100, &ChopMethod::EqualDivisions(4)).unwrap();
356 assert!(slices.is_empty());
357 }
358
359 #[test]
360 fn zero_channels_errors() {
361 assert!(compute_slices(&[0.0, 1.0], 0, 44100, &ChopMethod::EqualDivisions(2)).is_err());
362 }
363
364 #[test]
365 fn transient_chop_finds_impulses() {
366 // Build 4 impulses spaced 0.25s apart in a 1s 44.1k mono signal. Each
367 // impulse is a short burst of full-scale noise-like content so the FFT
368 // sees a broadband energy jump (a clear spectral-flux peak).
369 let sr = 44100usize;
370 let mut signal = vec![0.0f32; sr];
371 let positions = [0usize, sr / 4, sr / 2, 3 * sr / 4];
372 for &p in &positions {
373 for k in 0..2000 {
374 // Alternating sign = high-frequency burst, strong flux.
375 let v = if k % 2 == 0 { 0.9 } else { -0.9 };
376 signal[p + k] = v;
377 }
378 }
379 let slices = compute_slices(&signal, 1, sr as u32, &ChopMethod::Transient { sensitivity: 0.5 }).unwrap();
380 // Should recover roughly one slice per impulse (allow the detector some
381 // slack but require it found the structure, not one big slice or dozens).
382 assert!(
383 (3..=6).contains(&slices.len()),
384 "expected ~4 slices, got {}",
385 slices.len()
386 );
387 // Slices tile the whole buffer.
388 assert_eq!(slices[0].start_frame, 0);
389 assert_eq!(slices.last().unwrap().end_frame, sr);
390 for w in slices.windows(2) {
391 assert_eq!(w[0].end_frame, w[1].start_frame);
392 }
393 // A detected boundary should land near the second impulse (sr/4).
394 let near = slices
395 .iter()
396 .any(|s| (s.start_frame as i64 - (sr / 4) as i64).abs() < 3000);
397 assert!(near, "no slice boundary near the 0.25s impulse: {slices:?}");
398 }
399
400 #[test]
401 fn transient_chop_silence_is_single_slice() {
402 let slices = compute_slices(&vec![0.0f32; 44100], 1, 44100, &ChopMethod::Transient { sensitivity: 0.5 }).unwrap();
403 assert_eq!(slices.len(), 1);
404 assert_eq!(slices[0], Slice { start_frame: 0, end_frame: 44100 });
405 }
406 }
407