Skip to main content

max / audiofiles

7.3 KB · 231 lines History Blame Raw
1 //! Audio file decoding to mono f32 samples via Symphonia, for use in analysis pipelines.
2 //!
3 //! [`decode_to_mono`] opens any Symphonia-supported format, mixes all channels
4 //! down to a single mono signal, and returns the result as [`DecodedAudio`].
5 //! This is distinct from the plugin's stereo preview decode path.
6
7 use std::path::Path;
8
9 use symphonia::core::audio::SampleBuffer;
10 use symphonia::core::codecs::DecoderOptions;
11 use symphonia::core::formats::FormatOptions;
12 use symphonia::core::io::MediaSourceStream;
13 use symphonia::core::meta::MetadataOptions;
14 use symphonia::core::probe::Hint;
15 use tracing::{instrument, warn};
16
17 use crate::error::{io_err, AnalysisError, CoreError};
18
19 /// Decoded audio as mono f32 samples for analysis.
20 #[derive(Debug)]
21 pub struct DecodedAudio {
22 /// Mono sample data (channel-mixed to single channel).
23 pub samples: Vec<f32>,
24 /// Sample rate of the source file in Hz.
25 pub sample_rate: u32,
26 /// Number of channels in the original source file.
27 pub channels: u16,
28 /// Total duration in seconds.
29 pub duration: f64,
30 }
31
32 /// Decode an audio file to mono f32 for analysis.
33 /// Distinct from the plugin's stereo preview decode — this always
34 /// produces a single-channel signal suitable for spectral/BPM analysis.
35 #[instrument(skip_all)]
36 pub fn decode_to_mono(path: &Path) -> Result<DecodedAudio, CoreError> {
37 let file = std::fs::File::open(path).map_err(|e| io_err(path, e))?;
38 let mss = MediaSourceStream::new(Box::new(file), Default::default());
39
40 let mut hint = Hint::new();
41 if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
42 hint.with_extension(ext);
43 }
44
45 let probed = match symphonia::default::get_probe().format(
46 &hint,
47 mss,
48 &FormatOptions::default(),
49 &MetadataOptions::default(),
50 ) {
51 Ok(p) => p,
52 Err(e) => {
53 let is_wav = path
54 .extension()
55 .and_then(|e| e.to_str())
56 .is_some_and(|e| e.eq_ignore_ascii_case("wav"));
57 if is_wav {
58 warn!(
59 path = %path.display(),
60 "symphonia probe failed, trying hound fallback: {e}"
61 );
62 return decode_wav_hound(path);
63 }
64 return Err(AnalysisError::ProbeFailed(e.to_string()).into());
65 }
66 };
67
68 let mut format = probed.format;
69
70 let track = format
71 .default_track()
72 .ok_or(AnalysisError::NoAudioTrack)?;
73
74 let track_id = track.id;
75 let source_sample_rate = track
76 .codec_params
77 .sample_rate
78 .ok_or(AnalysisError::ProbeFailed("missing sample rate".to_string()))?;
79 let source_channels = track
80 .codec_params
81 .channels
82 .map(|c| c.count() as u16)
83 .ok_or(AnalysisError::ProbeFailed("missing channel count".to_string()))?;
84
85 let mut decoder = symphonia::default::get_codecs()
86 .make(&track.codec_params, &DecoderOptions::default())
87 .map_err(|e| AnalysisError::DecoderFailed(e.to_string()))?;
88
89 let mut mono_samples: Vec<f32> = Vec::new();
90
91 loop {
92 let packet = match format.next_packet() {
93 Ok(p) => p,
94 Err(symphonia::core::errors::Error::IoError(ref e))
95 if e.kind() == std::io::ErrorKind::UnexpectedEof =>
96 {
97 break;
98 }
99 Err(e) => return Err(AnalysisError::PacketError(e.to_string()).into()),
100 };
101
102 if packet.track_id() != track_id {
103 continue;
104 }
105
106 let decoded = match decoder.decode(&packet) {
107 Ok(d) => d,
108 Err(symphonia::core::errors::Error::DecodeError(_)) => continue,
109 Err(e) => return Err(AnalysisError::DecodeError(e.to_string()).into()),
110 };
111
112 let spec = *decoded.spec();
113 let num_frames = decoded.frames();
114 let num_channels = spec.channels.count();
115
116 if num_channels == 0 {
117 continue;
118 }
119
120 let mut sample_buf = SampleBuffer::<f32>::new(num_frames as u64, *decoded.spec());
121 sample_buf.copy_interleaved_ref(decoded);
122 let samples = sample_buf.samples();
123
124 // Mix down to mono by averaging all channels
125 for frame in 0..num_frames {
126 let mut sum = 0.0f32;
127 for ch in 0..num_channels {
128 sum += samples[frame * num_channels + ch];
129 }
130 mono_samples.push(sum / num_channels as f32);
131 }
132 }
133
134 if mono_samples.is_empty() {
135 return Err(AnalysisError::NoAudioData.into());
136 }
137
138 let duration = mono_samples.len() as f64 / source_sample_rate as f64;
139
140 Ok(DecodedAudio {
141 samples: mono_samples,
142 sample_rate: source_sample_rate,
143 channels: source_channels,
144 duration,
145 })
146 }
147
148 /// Fallback WAV decoder using hound for files Symphonia rejects
149 /// (e.g. non-standard fmt chunk sizes: 18 or 20 bytes instead of 16 for PCM).
150 #[instrument(skip_all)]
151 fn decode_wav_hound(path: &Path) -> Result<DecodedAudio, CoreError> {
152 let reader = hound::WavReader::open(path)
153 .map_err(|e| AnalysisError::ProbeFailed(format!("hound: {e}")))?;
154
155 let spec = reader.spec();
156 let source_sample_rate = spec.sample_rate;
157 let source_channels = spec.channels;
158
159 let mono_samples: Vec<f32> = match spec.sample_format {
160 hound::SampleFormat::Int => {
161 let max_val = (1i64 << (spec.bits_per_sample - 1)) as f32;
162 let all: Vec<f32> = reader
163 .into_samples::<i32>()
164 .map(|s| s.map(|v| v as f32 / max_val))
165 .collect::<Result<_, _>>()
166 .map_err(|e| AnalysisError::DecodeError(format!("hound: {e}")))?;
167 mix_to_mono(&all, source_channels as usize)
168 }
169 hound::SampleFormat::Float => {
170 let all: Vec<f32> = reader
171 .into_samples::<f32>()
172 .collect::<Result<_, _>>()
173 .map_err(|e| AnalysisError::DecodeError(format!("hound: {e}")))?;
174 mix_to_mono(&all, source_channels as usize)
175 }
176 };
177
178 if mono_samples.is_empty() {
179 return Err(AnalysisError::NoAudioData.into());
180 }
181
182 let duration = mono_samples.len() as f64 / source_sample_rate as f64;
183
184 Ok(DecodedAudio {
185 samples: mono_samples,
186 sample_rate: source_sample_rate,
187 channels: source_channels,
188 duration,
189 })
190 }
191
192 fn mix_to_mono(interleaved: &[f32], channels: usize) -> Vec<f32> {
193 if channels == 1 {
194 return interleaved.to_vec();
195 }
196 interleaved
197 .chunks_exact(channels)
198 .map(|frame| frame.iter().sum::<f32>() / channels as f32)
199 .collect()
200 }
201
202 #[cfg(test)]
203 mod tests {
204 use super::*;
205
206 #[test]
207 fn nonexistent_path_returns_err() {
208 let result = decode_to_mono(Path::new("/nonexistent/path/audio.wav"));
209 assert!(result.is_err());
210 let err = result.unwrap_err();
211 assert!(
212 matches!(err, CoreError::Io { .. }),
213 "expected CoreError::Io, got: {err}"
214 );
215 }
216
217 #[test]
218 fn decoded_audio_field_access() {
219 let audio = DecodedAudio {
220 samples: vec![0.0, 0.5, -0.5, 1.0],
221 sample_rate: 44100,
222 channels: 2,
223 duration: 0.5,
224 };
225 assert_eq!(audio.samples.len(), 4);
226 assert_eq!(audio.sample_rate, 44100);
227 assert_eq!(audio.channels, 2);
228 assert!((audio.duration - 0.5).abs() < f64::EPSILON);
229 }
230 }
231