//! Audio file decoding to mono f32 samples via Symphonia, for use in analysis pipelines. //! //! [`decode_to_mono`] opens any Symphonia-supported format, mixes all channels //! down to a single mono signal, and returns the result as [`DecodedAudio`]. //! This is distinct from the plugin's stereo preview decode path. use std::path::Path; use symphonia::core::audio::SampleBuffer; use symphonia::core::codecs::DecoderOptions; use symphonia::core::formats::FormatOptions; use symphonia::core::io::MediaSourceStream; use symphonia::core::meta::MetadataOptions; use symphonia::core::probe::Hint; use tracing::{instrument, warn}; use crate::error::{io_err, AnalysisError, CoreError}; /// Decoded audio as mono f32 samples for analysis. #[derive(Debug)] pub struct DecodedAudio { /// Mono sample data (channel-mixed to single channel). pub samples: Vec, /// Sample rate of the source file in Hz. pub sample_rate: u32, /// Number of channels in the original source file. pub channels: u16, /// Total duration in seconds. pub duration: f64, } /// Decode an audio file to mono f32 for analysis. /// Distinct from the plugin's stereo preview decode — this always /// produces a single-channel signal suitable for spectral/BPM analysis. #[instrument(skip_all)] pub fn decode_to_mono(path: &Path) -> Result { let file = std::fs::File::open(path).map_err(|e| io_err(path, e))?; let mss = MediaSourceStream::new(Box::new(file), Default::default()); let mut hint = Hint::new(); if let Some(ext) = path.extension().and_then(|e| e.to_str()) { hint.with_extension(ext); } let probed = match symphonia::default::get_probe().format( &hint, mss, &FormatOptions::default(), &MetadataOptions::default(), ) { Ok(p) => p, Err(e) => { let is_wav = path .extension() .and_then(|e| e.to_str()) .is_some_and(|e| e.eq_ignore_ascii_case("wav")); if is_wav { warn!( path = %path.display(), "symphonia probe failed, trying hound fallback: {e}" ); return decode_wav_hound(path); } return Err(AnalysisError::ProbeFailed(e.to_string()).into()); } }; let mut format = probed.format; let track = format .default_track() .ok_or(AnalysisError::NoAudioTrack)?; let track_id = track.id; let source_sample_rate = track .codec_params .sample_rate .ok_or(AnalysisError::ProbeFailed("missing sample rate".to_string()))?; let source_channels = track .codec_params .channels .map(|c| c.count() as u16) .ok_or(AnalysisError::ProbeFailed("missing channel count".to_string()))?; let mut decoder = symphonia::default::get_codecs() .make(&track.codec_params, &DecoderOptions::default()) .map_err(|e| AnalysisError::DecoderFailed(e.to_string()))?; let mut mono_samples: Vec = Vec::new(); loop { let packet = match format.next_packet() { Ok(p) => p, Err(symphonia::core::errors::Error::IoError(ref e)) if e.kind() == std::io::ErrorKind::UnexpectedEof => { break; } Err(e) => return Err(AnalysisError::PacketError(e.to_string()).into()), }; if packet.track_id() != track_id { continue; } let decoded = match decoder.decode(&packet) { Ok(d) => d, Err(symphonia::core::errors::Error::DecodeError(_)) => continue, Err(e) => return Err(AnalysisError::DecodeError(e.to_string()).into()), }; let spec = *decoded.spec(); let num_frames = decoded.frames(); let num_channels = spec.channels.count(); if num_channels == 0 { continue; } let mut sample_buf = SampleBuffer::::new(num_frames as u64, *decoded.spec()); sample_buf.copy_interleaved_ref(decoded); let samples = sample_buf.samples(); // Mix down to mono by averaging all channels for frame in 0..num_frames { let mut sum = 0.0f32; for ch in 0..num_channels { sum += samples[frame * num_channels + ch]; } mono_samples.push(sum / num_channels as f32); } } if mono_samples.is_empty() { return Err(AnalysisError::NoAudioData.into()); } let duration = mono_samples.len() as f64 / source_sample_rate as f64; Ok(DecodedAudio { samples: mono_samples, sample_rate: source_sample_rate, channels: source_channels, duration, }) } /// Fallback WAV decoder using hound for files Symphonia rejects /// (e.g. non-standard fmt chunk sizes: 18 or 20 bytes instead of 16 for PCM). #[instrument(skip_all)] fn decode_wav_hound(path: &Path) -> Result { let reader = hound::WavReader::open(path) .map_err(|e| AnalysisError::ProbeFailed(format!("hound: {e}")))?; let spec = reader.spec(); let source_sample_rate = spec.sample_rate; let source_channels = spec.channels; let mono_samples: Vec = match spec.sample_format { hound::SampleFormat::Int => { let max_val = (1i64 << (spec.bits_per_sample - 1)) as f32; let all: Vec = reader .into_samples::() .map(|s| s.map(|v| v as f32 / max_val)) .collect::>() .map_err(|e| AnalysisError::DecodeError(format!("hound: {e}")))?; mix_to_mono(&all, source_channels as usize) } hound::SampleFormat::Float => { let all: Vec = reader .into_samples::() .collect::>() .map_err(|e| AnalysisError::DecodeError(format!("hound: {e}")))?; mix_to_mono(&all, source_channels as usize) } }; if mono_samples.is_empty() { return Err(AnalysisError::NoAudioData.into()); } let duration = mono_samples.len() as f64 / source_sample_rate as f64; Ok(DecodedAudio { samples: mono_samples, sample_rate: source_sample_rate, channels: source_channels, duration, }) } fn mix_to_mono(interleaved: &[f32], channels: usize) -> Vec { if channels == 1 { return interleaved.to_vec(); } interleaved .chunks_exact(channels) .map(|frame| frame.iter().sum::() / channels as f32) .collect() } #[cfg(test)] mod tests { use super::*; #[test] fn nonexistent_path_returns_err() { let result = decode_to_mono(Path::new("/nonexistent/path/audio.wav")); assert!(result.is_err()); let err = result.unwrap_err(); assert!( matches!(err, CoreError::Io { .. }), "expected CoreError::Io, got: {err}" ); } #[test] fn decoded_audio_field_access() { let audio = DecodedAudio { samples: vec![0.0, 0.5, -0.5, 1.0], sample_rate: 44100, channels: 2, duration: 0.5, }; assert_eq!(audio.samples.len(), 4); assert_eq!(audio.sample_rate, 44100); assert_eq!(audio.channels, 2); assert!((audio.duration - 0.5).abs() < f64::EPSILON); } }