max / audiofiles

7.3 KB · 231 lines History Blame Raw

1	//! Audio file decoding to mono f32 samples via Symphonia, for use in analysis pipelines.
2	//!
3	//! [`decode_to_mono`] opens any Symphonia-supported format, mixes all channels
4	//! down to a single mono signal, and returns the result as [`DecodedAudio`].
5	//! This is distinct from the plugin's stereo preview decode path.
6
7	use std::path::Path;
8
9	use symphonia::core::audio::SampleBuffer;
10	use symphonia::core::codecs::DecoderOptions;
11	use symphonia::core::formats::FormatOptions;
12	use symphonia::core::io::MediaSourceStream;
13	use symphonia::core::meta::MetadataOptions;
14	use symphonia::core::probe::Hint;
15	use tracing::{instrument, warn};
16
17	use crate::error::{io_err, AnalysisError, CoreError};
18
19	/// Decoded audio as mono f32 samples for analysis.
20	#[derive(Debug)]
21	pub struct DecodedAudio {
22	/// Mono sample data (channel-mixed to single channel).
23	pub samples: Vec<f32>,
24	/// Sample rate of the source file in Hz.
25	pub sample_rate: u32,
26	/// Number of channels in the original source file.
27	pub channels: u16,
28	/// Total duration in seconds.
29	pub duration: f64,
30	}
31
32	/// Decode an audio file to mono f32 for analysis.
33	/// Distinct from the plugin's stereo preview decode — this always
34	/// produces a single-channel signal suitable for spectral/BPM analysis.
35	#[instrument(skip_all)]
36	pub fn decode_to_mono(path: &Path) -> Result<DecodedAudio, CoreError> {
37	let file = std::fs::File::open(path).map_err(\|e\| io_err(path, e))?;
38	let mss = MediaSourceStream::new(Box::new(file), Default::default());
39
40	let mut hint = Hint::new();
41	if let Some(ext) = path.extension().and_then(\|e\| e.to_str()) {
42	hint.with_extension(ext);
43	}
44
45	let probed = match symphonia::default::get_probe().format(
46	&hint,
47	mss,
48	&FormatOptions::default(),
49	&MetadataOptions::default(),
50	) {
51	Ok(p) => p,
52	Err(e) => {
53	let is_wav = path
54	.extension()
55	.and_then(\|e\| e.to_str())
56	.is_some_and(\|e\| e.eq_ignore_ascii_case("wav"));
57	if is_wav {
58	warn!(
59	path = %path.display(),
60	"symphonia probe failed, trying hound fallback: {e}"
61	);
62	return decode_wav_hound(path);
63	}
64	return Err(AnalysisError::ProbeFailed(e.to_string()).into());
65	}
66	};
67
68	let mut format = probed.format;
69
70	let track = format
71	.default_track()
72	.ok_or(AnalysisError::NoAudioTrack)?;
73
74	let track_id = track.id;
75	let source_sample_rate = track
76	.codec_params
77	.sample_rate
78	.ok_or(AnalysisError::ProbeFailed("missing sample rate".to_string()))?;
79	let source_channels = track
80	.codec_params
81	.channels
82	.map(\|c\| c.count() as u16)
83	.ok_or(AnalysisError::ProbeFailed("missing channel count".to_string()))?;
84
85	let mut decoder = symphonia::default::get_codecs()
86	.make(&track.codec_params, &DecoderOptions::default())
87	.map_err(\|e\| AnalysisError::DecoderFailed(e.to_string()))?;
88
89	let mut mono_samples: Vec<f32> = Vec::new();
90
91	loop {
92	let packet = match format.next_packet() {
93	Ok(p) => p,
94	Err(symphonia::core::errors::Error::IoError(ref e))
95	if e.kind() == std::io::ErrorKind::UnexpectedEof =>
96	{
97	break;
98	}
99	Err(e) => return Err(AnalysisError::PacketError(e.to_string()).into()),
100	};
101
102	if packet.track_id() != track_id {
103	continue;
104	}
105
106	let decoded = match decoder.decode(&packet) {
107	Ok(d) => d,
108	Err(symphonia::core::errors::Error::DecodeError(_)) => continue,
109	Err(e) => return Err(AnalysisError::DecodeError(e.to_string()).into()),
110	};
111
112	let spec = *decoded.spec();
113	let num_frames = decoded.frames();
114	let num_channels = spec.channels.count();
115
116	if num_channels == 0 {
117	continue;
118	}
119
120	let mut sample_buf = SampleBuffer::<f32>::new(num_frames as u64, *decoded.spec());
121	sample_buf.copy_interleaved_ref(decoded);
122	let samples = sample_buf.samples();
123
124	// Mix down to mono by averaging all channels
125	for frame in 0..num_frames {
126	let mut sum = 0.0f32;
127	for ch in 0..num_channels {
128	sum += samples[frame * num_channels + ch];
129	}
130	mono_samples.push(sum / num_channels as f32);
131	}
132	}
133
134	if mono_samples.is_empty() {
135	return Err(AnalysisError::NoAudioData.into());
136	}
137
138	let duration = mono_samples.len() as f64 / source_sample_rate as f64;
139
140	Ok(DecodedAudio {
141	samples: mono_samples,
142	sample_rate: source_sample_rate,
143	channels: source_channels,
144	duration,
145	})
146	}
147
148	/// Fallback WAV decoder using hound for files Symphonia rejects
149	/// (e.g. non-standard fmt chunk sizes: 18 or 20 bytes instead of 16 for PCM).
150	#[instrument(skip_all)]
151	fn decode_wav_hound(path: &Path) -> Result<DecodedAudio, CoreError> {
152	let reader = hound::WavReader::open(path)
153	.map_err(\|e\| AnalysisError::ProbeFailed(format!("hound: {e}")))?;
154
155	let spec = reader.spec();
156	let source_sample_rate = spec.sample_rate;
157	let source_channels = spec.channels;
158
159	let mono_samples: Vec<f32> = match spec.sample_format {
160	hound::SampleFormat::Int => {
161	let max_val = (1i64 << (spec.bits_per_sample - 1)) as f32;
162	let all: Vec<f32> = reader
163	.into_samples::<i32>()
164	.map(\|s\| s.map(\|v\| v as f32 / max_val))
165	.collect::<Result<_, _>>()
166	.map_err(\|e\| AnalysisError::DecodeError(format!("hound: {e}")))?;
167	mix_to_mono(&all, source_channels as usize)
168	}
169	hound::SampleFormat::Float => {
170	let all: Vec<f32> = reader
171	.into_samples::<f32>()
172	.collect::<Result<_, _>>()
173	.map_err(\|e\| AnalysisError::DecodeError(format!("hound: {e}")))?;
174	mix_to_mono(&all, source_channels as usize)
175	}
176	};
177
178	if mono_samples.is_empty() {
179	return Err(AnalysisError::NoAudioData.into());
180	}
181
182	let duration = mono_samples.len() as f64 / source_sample_rate as f64;
183
184	Ok(DecodedAudio {
185	samples: mono_samples,
186	sample_rate: source_sample_rate,
187	channels: source_channels,
188	duration,
189	})
190	}
191
192	fn mix_to_mono(interleaved: &[f32], channels: usize) -> Vec<f32> {
193	if channels == 1 {
194	return interleaved.to_vec();
195	}
196	interleaved
197	.chunks_exact(channels)
198	.map(\|frame\| frame.iter().sum::<f32>() / channels as f32)
199	.collect()
200	}
201
202	#[cfg(test)]
203	mod tests {
204	use super::*;
205
206	#[test]
207	fn nonexistent_path_returns_err() {
208	let result = decode_to_mono(Path::new("/nonexistent/path/audio.wav"));
209	assert!(result.is_err());
210	let err = result.unwrap_err();
211	assert!(
212	matches!(err, CoreError::Io { .. }),
213	"expected CoreError::Io, got: {err}"
214	);
215	}
216
217	#[test]
218	fn decoded_audio_field_access() {
219	let audio = DecodedAudio {
220	samples: vec![0.0, 0.5, -0.5, 1.0],
221	sample_rate: 44100,
222	channels: 2,
223	duration: 0.5,
224	};
225	assert_eq!(audio.samples.len(), 4);
226	assert_eq!(audio.sample_rate, 44100);
227	assert_eq!(audio.channels, 2);
228	assert!((audio.duration - 0.5).abs() < f64::EPSILON);
229	}
230	}
231