Skip to main content

max / audiofiles

4.9 KB · 154 lines History Blame Raw
1 //! Loop detection via start/end cross-correlation and beat-alignment heuristics.
2
3 use tracing::instrument;
4
5 /// Detect whether an audio sample is likely a loop.
6 ///
7 /// Uses two heuristics:
8 /// 1. Cross-correlation between the start and end of the sample
9 /// 2. Beat alignment: if BPM is known, check if duration is a clean
10 /// multiple of the beat length
11 #[instrument(skip_all)]
12 pub fn is_loop(samples: &[f32], sample_rate: u32, bpm: Option<f64>) -> bool {
13 if samples.len() < 1024 {
14 return false;
15 }
16
17 let cross_corr = start_end_correlation(samples);
18 let beat_aligned = bpm.is_some_and(|b| is_beat_aligned(samples, sample_rate, b));
19
20 // High correlation at boundaries + beat alignment = strong loop signal
21 if cross_corr > 0.8 && beat_aligned {
22 return true;
23 }
24
25 // Very high correlation alone is suggestive
26 if cross_corr > 0.9 {
27 return true;
28 }
29
30 // Beat-aligned with moderate correlation
31 if beat_aligned && cross_corr > 0.5 {
32 return true;
33 }
34
35 false
36 }
37
38 /// Normalized cross-correlation (Pearson) between first and last `window_size` samples.
39 ///
40 /// Window size is capped at 512 samples (~12ms at 44.1kHz) or 1/4 of the signal,
41 /// whichever is smaller. 512 is enough to capture a few cycles of bass frequencies
42 /// while staying fast.
43 fn start_end_correlation(samples: &[f32]) -> f64 {
44 let window_size = 512.min(samples.len() / 4);
45 if window_size < 64 {
46 return 0.0;
47 }
48
49 let start = &samples[..window_size];
50 let end = &samples[samples.len() - window_size..];
51
52 // Pearson correlation: subtract means, then compute covariance / (std_s * std_e).
53 let mean_s: f64 = start.iter().map(|&s| s as f64).sum::<f64>() / window_size as f64;
54 let mean_e: f64 = end.iter().map(|&s| s as f64).sum::<f64>() / window_size as f64;
55
56 // Accumulate covariance and variances in a single pass.
57 let mut cov = 0.0f64;
58 let mut var_s = 0.0f64;
59 let mut var_e = 0.0f64;
60
61 for i in 0..window_size {
62 let ds = start[i] as f64 - mean_s;
63 let de = end[i] as f64 - mean_e;
64 cov += ds * de; // cross-covariance numerator
65 var_s += ds * ds; // start variance numerator
66 var_e += de * de; // end variance numerator
67 }
68
69 // denom = sqrt(var_s * var_e). If zero, one or both windows are constant (silent),
70 // so there's no meaningful correlation — return 0.
71 let denom = (var_s * var_e).sqrt();
72 if denom == 0.0 {
73 0.0
74 } else {
75 (cov / denom).clamp(-1.0, 1.0)
76 }
77 }
78
79 /// Check if the sample duration is a clean multiple of one beat at the given BPM.
80 fn is_beat_aligned(samples: &[f32], sample_rate: u32, bpm: f64) -> bool {
81 if bpm <= 0.0 || sample_rate == 0 {
82 return false;
83 }
84
85 let duration = samples.len() as f64 / sample_rate as f64;
86 let beat_length = 60.0 / bpm;
87
88 let beats = duration / beat_length;
89 let rounded = beats.round();
90
91 // Must be at least 1 beat long.
92 if rounded < 1.0 {
93 return false;
94 }
95
96 // Allow up to 3% relative error from a clean beat multiple. This tolerance accounts
97 // for minor sample-count rounding and slight BPM estimation drift while still
98 // rejecting arbitrary-length recordings.
99 let error = (beats - rounded).abs() / rounded;
100 error < 0.03
101 }
102
103 #[cfg(test)]
104 mod tests {
105 use super::*;
106
107 #[test]
108 fn identical_start_end_is_loop() {
109 // Create a signal that repeats (same at start and end)
110 let period: Vec<f32> = (0..1024)
111 .map(|i| (2.0 * std::f32::consts::PI * 2.0 * i as f32 / 1024.0).sin())
112 .collect();
113 let mut samples = Vec::new();
114 for _ in 0..8 {
115 samples.extend_from_slice(&period);
116 }
117 assert!(start_end_correlation(&samples) > 0.9);
118 }
119
120 #[test]
121 fn different_start_end_not_loop() {
122 // Low-frequency sine at start, high-frequency noise at end
123 let mut samples = vec![0.0f32; 8192];
124 for (i, s) in samples.iter_mut().enumerate() {
125 if i < 4096 {
126 // Low freq sine at start
127 *s = (2.0 * std::f32::consts::PI * 100.0 * i as f32 / 44100.0).sin();
128 } else {
129 // High freq sine at end (very different waveform shape)
130 *s = (2.0 * std::f32::consts::PI * 8000.0 * i as f32 / 44100.0).sin() * 0.3;
131 }
132 }
133 let corr = start_end_correlation(&samples);
134 assert!(corr < 0.5, "different start/end correlation should be low, got {corr}");
135 }
136
137 #[test]
138 fn beat_alignment() {
139 // 120 BPM = 0.5s per beat. 4 beats = 2.0s = 88200 samples at 44100
140 let samples = vec![0.0f32; 88200];
141 assert!(is_beat_aligned(&samples, 44100, 120.0));
142
143 // Not aligned: 2.13s at 120 BPM
144 let samples = vec![0.0f32; 93933];
145 assert!(!is_beat_aligned(&samples, 44100, 120.0));
146 }
147
148 #[test]
149 fn short_sample_not_loop() {
150 let short = vec![0.5f32; 100];
151 assert!(!is_loop(&short, 44100, None));
152 }
153 }
154