max / audiofiles

9.4 KB · 278 lines History Blame Raw

1	//! Audio conversion pipeline: channel count and sample rate conversion for export.
2
3	use rubato::{Resampler, SincFixedIn, SincInterpolationParameters, SincInterpolationType, WindowFunction};
4
5	use super::ExportChannels;
6	use crate::error::CoreError;
7	use tracing::instrument;
8
9	/// Audio data after conversion, ready for encoding.
10	pub struct ConvertedAudio {
11	pub samples: Vec<f32>,
12	pub sample_rate: u32,
13	pub channels: u16,
14	}
15
16	/// Convert channel count: mono mixdown, stereo upmix, or passthrough.
17	#[instrument(skip_all)]
18	pub fn convert_channels(
19	samples: &[f32],
20	src_channels: u16,
21	target: &ExportChannels,
22	) -> (Vec<f32>, u16) {
23	if src_channels == 0 \|\| samples.is_empty() {
24	return (Vec::new(), src_channels.max(1));
25	}
26	match target {
27	ExportChannels::Original => (samples.to_vec(), src_channels),
28	ExportChannels::Mono => {
29	if src_channels == 1 {
30	return (samples.to_vec(), 1);
31	}
32	let ch = src_channels as usize;
33	let num_frames = samples.len() / ch;
34	let mut mono = Vec::with_capacity(num_frames);
35	for frame in 0..num_frames {
36	let mut sum = 0.0f32;
37	for c in 0..ch {
38	sum += samples[frame * ch + c];
39	}
40	mono.push(sum / ch as f32);
41	}
42	(mono, 1)
43	}
44	ExportChannels::Stereo => {
45	if src_channels == 2 {
46	return (samples.to_vec(), 2);
47	}
48	if src_channels == 1 {
49	// Mono -> stereo: duplicate each sample
50	let mut stereo = Vec::with_capacity(samples.len() * 2);
51	for &s in samples {
52	stereo.push(s);
53	stereo.push(s);
54	}
55	return (stereo, 2);
56	}
57	// Multi-channel -> stereo: take first two channels
58	let ch = src_channels as usize;
59	let num_frames = samples.len() / ch;
60	let mut stereo = Vec::with_capacity(num_frames * 2);
61	for frame in 0..num_frames {
62	let base = frame * ch;
63	stereo.push(samples[base]);
64	stereo.push(samples.get(base + 1).copied().unwrap_or(0.0));
65	}
66	(stereo, 2)
67	}
68	}
69	}
70
71	/// Resample interleaved audio from src_rate to dst_rate using rubato.
72	/// Returns samples unchanged if rates match.
73	#[instrument(skip_all)]
74	pub fn resample(
75	samples: &[f32],
76	channels: u16,
77	src_rate: u32,
78	dst_rate: u32,
79	) -> Result<Vec<f32>, CoreError> {
80	if src_rate == dst_rate {
81	return Ok(samples.to_vec());
82	}
83	if channels == 0 \|\| src_rate == 0 \|\| dst_rate == 0 {
84	return Err(CoreError::Export(format!(
85	"invalid resample params: channels={channels}, src_rate={src_rate}, dst_rate={dst_rate}"
86	)));
87	}
88
89	let ch = channels as usize;
90	let num_frames = samples.len() / ch;
91
92	// De-interleave into per-channel vectors
93	let mut channel_bufs: Vec<Vec<f32>> = vec![Vec::with_capacity(num_frames); ch];
94	for frame in 0..num_frames {
95	for c in 0..ch {
96	channel_bufs[c].push(samples[frame * ch + c]);
97	}
98	}
99
100	let params = SincInterpolationParameters {
101	sinc_len: 256,
102	f_cutoff: 0.95,
103	interpolation: SincInterpolationType::Linear,
104	oversampling_factor: 256,
105	window: WindowFunction::BlackmanHarris2,
106	};
107
108	let ratio = dst_rate as f64 / src_rate as f64;
109	let chunk_size = 1024;
110
111	let mut resampler = SincFixedIn::<f32>::new(ratio, 2.0, params, chunk_size, ch)
112	.map_err(\|e\| CoreError::Export(format!("resampler init: {e}")))?;
113
114	let target_total = (num_frames as f64 * ratio).round() as usize;
115
116	let mut output_channels: Vec<Vec<f32>> = vec![Vec::new(); ch];
117	let mut pos = 0;
118
119	while pos < num_frames {
120	let end = (pos + chunk_size).min(num_frames);
121	let actual_len = end - pos;
122
123	let input_chunk: Vec<Vec<f32>> = channel_bufs
124	.iter()
125	.map(\|buf\| buf[pos..end].to_vec())
126	.collect();
127
128	// Full chunks go through `process`; the short final chunk through
129	// `process_partial`, which zero-pads internally to a full chunk.
130	let output_chunk = if actual_len == chunk_size {
131	resampler.process(&input_chunk, None)
132	} else {
133	resampler.process_partial(Some(input_chunk.as_slice()), None)
134	}
135	.map_err(\|e\| CoreError::Export(format!("resample: {e}")))?;
136
137	for (c, chunk) in output_chunk.into_iter().enumerate() {
138	output_channels[c].extend_from_slice(&chunk);
139	}
140
141	pos += chunk_size;
142	}
143
144	// Drain the resampler's internal buffer. SincFixedIn buffers across chunks,
145	// so the final `target_total` output frames are not all emitted until it is
146	// flushed with empty input; without this, every rate-converted export lost
147	// its tail. (The resampler time-aligns output to input internally, so no
148	// leading-delay trim is needed.)
149	while output_channels[0].len() < target_total {
150	let flushed = resampler
151	.process_partial::<Vec<f32>>(None, None)
152	.map_err(\|e\| CoreError::Export(format!("resample flush: {e}")))?;
153	let produced = flushed.first().map_or(0, \|c\| c.len());
154	for (c, chunk) in flushed.into_iter().enumerate() {
155	output_channels[c].extend_from_slice(&chunk);
156	}
157	if produced == 0 {
158	break; // fully drained
159	}
160	}
161
162	// Clamp to the expected frame count (drop any extra produced while flushing).
163	let take = target_total.min(output_channels[0].len());
164	let mut interleaved = Vec::with_capacity(take * ch);
165	for frame in 0..take {
166	for channel in &output_channels {
167	interleaved.push(channel[frame]);
168	}
169	}
170
171	Ok(interleaved)
172	}
173
174	/// Apply channel conversion then resampling.
175	pub fn apply_conversion(
176	samples: &[f32],
177	src_channels: u16,
178	src_rate: u32,
179	target_channels: &ExportChannels,
180	target_rate: Option<u32>,
181	) -> Result<ConvertedAudio, CoreError> {
182	let (converted, out_channels) = convert_channels(samples, src_channels, target_channels);
183	let dst_rate = target_rate.unwrap_or(src_rate);
184	let resampled = resample(&converted, out_channels, src_rate, dst_rate)?;
185
186	Ok(ConvertedAudio {
187	samples: resampled,
188	sample_rate: dst_rate,
189	channels: out_channels,
190	})
191	}
192
193	#[cfg(test)]
194	mod tests {
195	use super::*;
196
197	#[test]
198	fn mono_mixdown() {
199	// Stereo: L=0.4, R=0.6 -> mono: 0.5
200	let samples = vec![0.4, 0.6, 0.2, 0.8];
201	let (out, ch) = convert_channels(&samples, 2, &ExportChannels::Mono);
202	assert_eq!(ch, 1);
203	assert_eq!(out.len(), 2);
204	assert!((out[0] - 0.5).abs() < 1e-6);
205	assert!((out[1] - 0.5).abs() < 1e-6);
206	}
207
208	#[test]
209	fn stereo_upmix() {
210	let samples = vec![0.3, -0.3, 0.7];
211	let (out, ch) = convert_channels(&samples, 1, &ExportChannels::Stereo);
212	assert_eq!(ch, 2);
213	assert_eq!(out, vec![0.3, 0.3, -0.3, -0.3, 0.7, 0.7]);
214	}
215
216	#[test]
217	fn passthrough() {
218	let samples = vec![0.1, 0.2, 0.3, 0.4];
219	let (out, ch) = convert_channels(&samples, 2, &ExportChannels::Original);
220	assert_eq!(ch, 2);
221	assert_eq!(out, samples);
222	}
223
224	#[test]
225	fn resample_noop() {
226	let samples = vec![0.1, 0.2, 0.3, 0.4];
227	let out = resample(&samples, 1, 44100, 44100).unwrap();
228	assert_eq!(out, samples);
229	}
230
231	#[test]
232	fn resample_changes_length() {
233	// Use multiple full chunks to avoid edge effects. 4096 mono samples at 44100 -> 48000.
234	let num_samples = 4096;
235	let samples: Vec<f32> = (0..num_samples).map(\|i\| i as f32 / num_samples as f32).collect();
236	let out = resample(&samples, 1, 44100, 48000).unwrap();
237	// Output should be longer than input since 48000 > 44100
238	assert!(
239	out.len() > num_samples,
240	"expected output longer than input ({num_samples}), got {}",
241	out.len()
242	);
243	// And roughly in the right ratio (within 15% to account for resampler latency/padding)
244	let expected = (num_samples as f64 * 48000.0 / 44100.0) as usize;
245	assert!(
246	out.len() > expected / 2,
247	"output too short: expected ~{expected}, got {}",
248	out.len()
249	);
250	}
251
252	#[test]
253	fn resample_compensates_delay_and_length() {
254	// An impulse at input frame 1000, downsampled 48k -> 24k (ratio 0.5),
255	// must land near output frame 500 and the output length must track the
256	// ratio exactly. Regression for the dropped group delay / missing tail
257	// flush: previously the output was shifted and lost its final frames.
258	let num_frames = 2048usize;
259	let mut samples = vec![0.0f32; num_frames];
260	samples[1000] = 1.0;
261	let out = resample(&samples, 1, 48_000, 24_000).unwrap();
262
263	let expected_len = (num_frames as f64 * 0.5).round() as usize;
264	assert_eq!(out.len(), expected_len, "length should track the ratio exactly");
265
266	let peak = out
267	.iter()
268	.enumerate()
269	.max_by(\|a, b\| a.1.abs().partial_cmp(&b.1.abs()).unwrap())
270	.map(\|(i, _)\| i)
271	.unwrap();
272	assert!(
273	(peak as i64 - 500).abs() <= 4,
274	"impulse should land near frame 500 after delay compensation, got {peak}"
275	);
276	}
277	}
278