//! Audio conversion pipeline: channel count and sample rate conversion for export. use rubato::{Resampler, SincFixedIn, SincInterpolationParameters, SincInterpolationType, WindowFunction}; use super::ExportChannels; use crate::error::CoreError; use tracing::instrument; /// Audio data after conversion, ready for encoding. pub struct ConvertedAudio { pub samples: Vec, pub sample_rate: u32, pub channels: u16, } /// Convert channel count: mono mixdown, stereo upmix, or passthrough. #[instrument(skip_all)] pub fn convert_channels( samples: &[f32], src_channels: u16, target: &ExportChannels, ) -> (Vec, u16) { if src_channels == 0 || samples.is_empty() { return (Vec::new(), src_channels.max(1)); } match target { ExportChannels::Original => (samples.to_vec(), src_channels), ExportChannels::Mono => { if src_channels == 1 { return (samples.to_vec(), 1); } let ch = src_channels as usize; let num_frames = samples.len() / ch; let mut mono = Vec::with_capacity(num_frames); for frame in 0..num_frames { let mut sum = 0.0f32; for c in 0..ch { sum += samples[frame * ch + c]; } mono.push(sum / ch as f32); } (mono, 1) } ExportChannels::Stereo => { if src_channels == 2 { return (samples.to_vec(), 2); } if src_channels == 1 { // Mono -> stereo: duplicate each sample let mut stereo = Vec::with_capacity(samples.len() * 2); for &s in samples { stereo.push(s); stereo.push(s); } return (stereo, 2); } // Multi-channel -> stereo: take first two channels let ch = src_channels as usize; let num_frames = samples.len() / ch; let mut stereo = Vec::with_capacity(num_frames * 2); for frame in 0..num_frames { let base = frame * ch; stereo.push(samples[base]); stereo.push(samples.get(base + 1).copied().unwrap_or(0.0)); } (stereo, 2) } } } /// Resample interleaved audio from src_rate to dst_rate using rubato. /// Returns samples unchanged if rates match. #[instrument(skip_all)] pub fn resample( samples: &[f32], channels: u16, src_rate: u32, dst_rate: u32, ) -> Result, CoreError> { if src_rate == dst_rate { return Ok(samples.to_vec()); } if channels == 0 || src_rate == 0 || dst_rate == 0 { return Err(CoreError::Export(format!( "invalid resample params: channels={channels}, src_rate={src_rate}, dst_rate={dst_rate}" ))); } let ch = channels as usize; let num_frames = samples.len() / ch; // De-interleave into per-channel vectors let mut channel_bufs: Vec> = vec![Vec::with_capacity(num_frames); ch]; for frame in 0..num_frames { for c in 0..ch { channel_bufs[c].push(samples[frame * ch + c]); } } let params = SincInterpolationParameters { sinc_len: 256, f_cutoff: 0.95, interpolation: SincInterpolationType::Linear, oversampling_factor: 256, window: WindowFunction::BlackmanHarris2, }; let ratio = dst_rate as f64 / src_rate as f64; let chunk_size = 1024; let mut resampler = SincFixedIn::::new(ratio, 2.0, params, chunk_size, ch) .map_err(|e| CoreError::Export(format!("resampler init: {e}")))?; let target_total = (num_frames as f64 * ratio).round() as usize; let mut output_channels: Vec> = vec![Vec::new(); ch]; let mut pos = 0; while pos < num_frames { let end = (pos + chunk_size).min(num_frames); let actual_len = end - pos; let input_chunk: Vec> = channel_bufs .iter() .map(|buf| buf[pos..end].to_vec()) .collect(); // Full chunks go through `process`; the short final chunk through // `process_partial`, which zero-pads internally to a full chunk. let output_chunk = if actual_len == chunk_size { resampler.process(&input_chunk, None) } else { resampler.process_partial(Some(input_chunk.as_slice()), None) } .map_err(|e| CoreError::Export(format!("resample: {e}")))?; for (c, chunk) in output_chunk.into_iter().enumerate() { output_channels[c].extend_from_slice(&chunk); } pos += chunk_size; } // Drain the resampler's internal buffer. SincFixedIn buffers across chunks, // so the final `target_total` output frames are not all emitted until it is // flushed with empty input; without this, every rate-converted export lost // its tail. (The resampler time-aligns output to input internally, so no // leading-delay trim is needed.) while output_channels[0].len() < target_total { let flushed = resampler .process_partial::>(None, None) .map_err(|e| CoreError::Export(format!("resample flush: {e}")))?; let produced = flushed.first().map_or(0, |c| c.len()); for (c, chunk) in flushed.into_iter().enumerate() { output_channels[c].extend_from_slice(&chunk); } if produced == 0 { break; // fully drained } } // Clamp to the expected frame count (drop any extra produced while flushing). let take = target_total.min(output_channels[0].len()); let mut interleaved = Vec::with_capacity(take * ch); for frame in 0..take { for channel in &output_channels { interleaved.push(channel[frame]); } } Ok(interleaved) } /// Apply channel conversion then resampling. pub fn apply_conversion( samples: &[f32], src_channels: u16, src_rate: u32, target_channels: &ExportChannels, target_rate: Option, ) -> Result { let (converted, out_channels) = convert_channels(samples, src_channels, target_channels); let dst_rate = target_rate.unwrap_or(src_rate); let resampled = resample(&converted, out_channels, src_rate, dst_rate)?; Ok(ConvertedAudio { samples: resampled, sample_rate: dst_rate, channels: out_channels, }) } #[cfg(test)] mod tests { use super::*; #[test] fn mono_mixdown() { // Stereo: L=0.4, R=0.6 -> mono: 0.5 let samples = vec![0.4, 0.6, 0.2, 0.8]; let (out, ch) = convert_channels(&samples, 2, &ExportChannels::Mono); assert_eq!(ch, 1); assert_eq!(out.len(), 2); assert!((out[0] - 0.5).abs() < 1e-6); assert!((out[1] - 0.5).abs() < 1e-6); } #[test] fn stereo_upmix() { let samples = vec![0.3, -0.3, 0.7]; let (out, ch) = convert_channels(&samples, 1, &ExportChannels::Stereo); assert_eq!(ch, 2); assert_eq!(out, vec![0.3, 0.3, -0.3, -0.3, 0.7, 0.7]); } #[test] fn passthrough() { let samples = vec![0.1, 0.2, 0.3, 0.4]; let (out, ch) = convert_channels(&samples, 2, &ExportChannels::Original); assert_eq!(ch, 2); assert_eq!(out, samples); } #[test] fn resample_noop() { let samples = vec![0.1, 0.2, 0.3, 0.4]; let out = resample(&samples, 1, 44100, 44100).unwrap(); assert_eq!(out, samples); } #[test] fn resample_changes_length() { // Use multiple full chunks to avoid edge effects. 4096 mono samples at 44100 -> 48000. let num_samples = 4096; let samples: Vec = (0..num_samples).map(|i| i as f32 / num_samples as f32).collect(); let out = resample(&samples, 1, 44100, 48000).unwrap(); // Output should be longer than input since 48000 > 44100 assert!( out.len() > num_samples, "expected output longer than input ({num_samples}), got {}", out.len() ); // And roughly in the right ratio (within 15% to account for resampler latency/padding) let expected = (num_samples as f64 * 48000.0 / 44100.0) as usize; assert!( out.len() > expected / 2, "output too short: expected ~{expected}, got {}", out.len() ); } #[test] fn resample_compensates_delay_and_length() { // An impulse at input frame 1000, downsampled 48k -> 24k (ratio 0.5), // must land near output frame 500 and the output length must track the // ratio exactly. Regression for the dropped group delay / missing tail // flush: previously the output was shifted and lost its final frames. let num_frames = 2048usize; let mut samples = vec![0.0f32; num_frames]; samples[1000] = 1.0; let out = resample(&samples, 1, 48_000, 24_000).unwrap(); let expected_len = (num_frames as f64 * 0.5).round() as usize; assert_eq!(out.len(), expected_len, "length should track the ratio exactly"); let peak = out .iter() .enumerate() .max_by(|a, b| a.1.abs().partial_cmp(&b.1.abs()).unwrap()) .map(|(i, _)| i) .unwrap(); assert!( (peak as i64 - 500).abs() <= 4, "impulse should land near frame 500 after delay compensation, got {peak}" ); } }