Skip to main content

max / audiofiles

9.4 KB · 278 lines History Blame Raw
1 //! Audio conversion pipeline: channel count and sample rate conversion for export.
2
3 use rubato::{Resampler, SincFixedIn, SincInterpolationParameters, SincInterpolationType, WindowFunction};
4
5 use super::ExportChannels;
6 use crate::error::CoreError;
7 use tracing::instrument;
8
9 /// Audio data after conversion, ready for encoding.
10 pub struct ConvertedAudio {
11 pub samples: Vec<f32>,
12 pub sample_rate: u32,
13 pub channels: u16,
14 }
15
16 /// Convert channel count: mono mixdown, stereo upmix, or passthrough.
17 #[instrument(skip_all)]
18 pub fn convert_channels(
19 samples: &[f32],
20 src_channels: u16,
21 target: &ExportChannels,
22 ) -> (Vec<f32>, u16) {
23 if src_channels == 0 || samples.is_empty() {
24 return (Vec::new(), src_channels.max(1));
25 }
26 match target {
27 ExportChannels::Original => (samples.to_vec(), src_channels),
28 ExportChannels::Mono => {
29 if src_channels == 1 {
30 return (samples.to_vec(), 1);
31 }
32 let ch = src_channels as usize;
33 let num_frames = samples.len() / ch;
34 let mut mono = Vec::with_capacity(num_frames);
35 for frame in 0..num_frames {
36 let mut sum = 0.0f32;
37 for c in 0..ch {
38 sum += samples[frame * ch + c];
39 }
40 mono.push(sum / ch as f32);
41 }
42 (mono, 1)
43 }
44 ExportChannels::Stereo => {
45 if src_channels == 2 {
46 return (samples.to_vec(), 2);
47 }
48 if src_channels == 1 {
49 // Mono -> stereo: duplicate each sample
50 let mut stereo = Vec::with_capacity(samples.len() * 2);
51 for &s in samples {
52 stereo.push(s);
53 stereo.push(s);
54 }
55 return (stereo, 2);
56 }
57 // Multi-channel -> stereo: take first two channels
58 let ch = src_channels as usize;
59 let num_frames = samples.len() / ch;
60 let mut stereo = Vec::with_capacity(num_frames * 2);
61 for frame in 0..num_frames {
62 let base = frame * ch;
63 stereo.push(samples[base]);
64 stereo.push(samples.get(base + 1).copied().unwrap_or(0.0));
65 }
66 (stereo, 2)
67 }
68 }
69 }
70
71 /// Resample interleaved audio from src_rate to dst_rate using rubato.
72 /// Returns samples unchanged if rates match.
73 #[instrument(skip_all)]
74 pub fn resample(
75 samples: &[f32],
76 channels: u16,
77 src_rate: u32,
78 dst_rate: u32,
79 ) -> Result<Vec<f32>, CoreError> {
80 if src_rate == dst_rate {
81 return Ok(samples.to_vec());
82 }
83 if channels == 0 || src_rate == 0 || dst_rate == 0 {
84 return Err(CoreError::Export(format!(
85 "invalid resample params: channels={channels}, src_rate={src_rate}, dst_rate={dst_rate}"
86 )));
87 }
88
89 let ch = channels as usize;
90 let num_frames = samples.len() / ch;
91
92 // De-interleave into per-channel vectors
93 let mut channel_bufs: Vec<Vec<f32>> = vec![Vec::with_capacity(num_frames); ch];
94 for frame in 0..num_frames {
95 for c in 0..ch {
96 channel_bufs[c].push(samples[frame * ch + c]);
97 }
98 }
99
100 let params = SincInterpolationParameters {
101 sinc_len: 256,
102 f_cutoff: 0.95,
103 interpolation: SincInterpolationType::Linear,
104 oversampling_factor: 256,
105 window: WindowFunction::BlackmanHarris2,
106 };
107
108 let ratio = dst_rate as f64 / src_rate as f64;
109 let chunk_size = 1024;
110
111 let mut resampler = SincFixedIn::<f32>::new(ratio, 2.0, params, chunk_size, ch)
112 .map_err(|e| CoreError::Export(format!("resampler init: {e}")))?;
113
114 let target_total = (num_frames as f64 * ratio).round() as usize;
115
116 let mut output_channels: Vec<Vec<f32>> = vec![Vec::new(); ch];
117 let mut pos = 0;
118
119 while pos < num_frames {
120 let end = (pos + chunk_size).min(num_frames);
121 let actual_len = end - pos;
122
123 let input_chunk: Vec<Vec<f32>> = channel_bufs
124 .iter()
125 .map(|buf| buf[pos..end].to_vec())
126 .collect();
127
128 // Full chunks go through `process`; the short final chunk through
129 // `process_partial`, which zero-pads internally to a full chunk.
130 let output_chunk = if actual_len == chunk_size {
131 resampler.process(&input_chunk, None)
132 } else {
133 resampler.process_partial(Some(input_chunk.as_slice()), None)
134 }
135 .map_err(|e| CoreError::Export(format!("resample: {e}")))?;
136
137 for (c, chunk) in output_chunk.into_iter().enumerate() {
138 output_channels[c].extend_from_slice(&chunk);
139 }
140
141 pos += chunk_size;
142 }
143
144 // Drain the resampler's internal buffer. SincFixedIn buffers across chunks,
145 // so the final `target_total` output frames are not all emitted until it is
146 // flushed with empty input; without this, every rate-converted export lost
147 // its tail. (The resampler time-aligns output to input internally, so no
148 // leading-delay trim is needed.)
149 while output_channels[0].len() < target_total {
150 let flushed = resampler
151 .process_partial::<Vec<f32>>(None, None)
152 .map_err(|e| CoreError::Export(format!("resample flush: {e}")))?;
153 let produced = flushed.first().map_or(0, |c| c.len());
154 for (c, chunk) in flushed.into_iter().enumerate() {
155 output_channels[c].extend_from_slice(&chunk);
156 }
157 if produced == 0 {
158 break; // fully drained
159 }
160 }
161
162 // Clamp to the expected frame count (drop any extra produced while flushing).
163 let take = target_total.min(output_channels[0].len());
164 let mut interleaved = Vec::with_capacity(take * ch);
165 for frame in 0..take {
166 for channel in &output_channels {
167 interleaved.push(channel[frame]);
168 }
169 }
170
171 Ok(interleaved)
172 }
173
174 /// Apply channel conversion then resampling.
175 pub fn apply_conversion(
176 samples: &[f32],
177 src_channels: u16,
178 src_rate: u32,
179 target_channels: &ExportChannels,
180 target_rate: Option<u32>,
181 ) -> Result<ConvertedAudio, CoreError> {
182 let (converted, out_channels) = convert_channels(samples, src_channels, target_channels);
183 let dst_rate = target_rate.unwrap_or(src_rate);
184 let resampled = resample(&converted, out_channels, src_rate, dst_rate)?;
185
186 Ok(ConvertedAudio {
187 samples: resampled,
188 sample_rate: dst_rate,
189 channels: out_channels,
190 })
191 }
192
193 #[cfg(test)]
194 mod tests {
195 use super::*;
196
197 #[test]
198 fn mono_mixdown() {
199 // Stereo: L=0.4, R=0.6 -> mono: 0.5
200 let samples = vec![0.4, 0.6, 0.2, 0.8];
201 let (out, ch) = convert_channels(&samples, 2, &ExportChannels::Mono);
202 assert_eq!(ch, 1);
203 assert_eq!(out.len(), 2);
204 assert!((out[0] - 0.5).abs() < 1e-6);
205 assert!((out[1] - 0.5).abs() < 1e-6);
206 }
207
208 #[test]
209 fn stereo_upmix() {
210 let samples = vec![0.3, -0.3, 0.7];
211 let (out, ch) = convert_channels(&samples, 1, &ExportChannels::Stereo);
212 assert_eq!(ch, 2);
213 assert_eq!(out, vec![0.3, 0.3, -0.3, -0.3, 0.7, 0.7]);
214 }
215
216 #[test]
217 fn passthrough() {
218 let samples = vec![0.1, 0.2, 0.3, 0.4];
219 let (out, ch) = convert_channels(&samples, 2, &ExportChannels::Original);
220 assert_eq!(ch, 2);
221 assert_eq!(out, samples);
222 }
223
224 #[test]
225 fn resample_noop() {
226 let samples = vec![0.1, 0.2, 0.3, 0.4];
227 let out = resample(&samples, 1, 44100, 44100).unwrap();
228 assert_eq!(out, samples);
229 }
230
231 #[test]
232 fn resample_changes_length() {
233 // Use multiple full chunks to avoid edge effects. 4096 mono samples at 44100 -> 48000.
234 let num_samples = 4096;
235 let samples: Vec<f32> = (0..num_samples).map(|i| i as f32 / num_samples as f32).collect();
236 let out = resample(&samples, 1, 44100, 48000).unwrap();
237 // Output should be longer than input since 48000 > 44100
238 assert!(
239 out.len() > num_samples,
240 "expected output longer than input ({num_samples}), got {}",
241 out.len()
242 );
243 // And roughly in the right ratio (within 15% to account for resampler latency/padding)
244 let expected = (num_samples as f64 * 48000.0 / 44100.0) as usize;
245 assert!(
246 out.len() > expected / 2,
247 "output too short: expected ~{expected}, got {}",
248 out.len()
249 );
250 }
251
252 #[test]
253 fn resample_compensates_delay_and_length() {
254 // An impulse at input frame 1000, downsampled 48k -> 24k (ratio 0.5),
255 // must land near output frame 500 and the output length must track the
256 // ratio exactly. Regression for the dropped group delay / missing tail
257 // flush: previously the output was shifted and lost its final frames.
258 let num_frames = 2048usize;
259 let mut samples = vec![0.0f32; num_frames];
260 samples[1000] = 1.0;
261 let out = resample(&samples, 1, 48_000, 24_000).unwrap();
262
263 let expected_len = (num_frames as f64 * 0.5).round() as usize;
264 assert_eq!(out.len(), expected_len, "length should track the ratio exactly");
265
266 let peak = out
267 .iter()
268 .enumerate()
269 .max_by(|a, b| a.1.abs().partial_cmp(&b.1.abs()).unwrap())
270 .map(|(i, _)| i)
271 .unwrap();
272 assert!(
273 (peak as i64 - 500).abs() <= 4,
274 "impulse should land near frame 500 after delay compensation, got {peak}"
275 );
276 }
277 }
278