Skip to main content

max / audiofiles

20.8 KB · 611 lines History Blame Raw
1 //! Audio preview: decodes samples to stereo f32 for playback.
2 //!
3 //! Decoding happens on the GUI thread via Symphonia; the resulting interleaved buffer is handed
4 //! to the cpal audio output thread through [`PreviewPlayback`] behind a `parking_lot::Mutex`.
5
6 use std::path::Path;
7 use std::sync::atomic::Ordering;
8
9 use tracing::{instrument, warn};
10
11 use symphonia::core::audio::SampleBuffer;
12 use symphonia::core::codecs::DecoderOptions;
13 use symphonia::core::formats::FormatOptions;
14 use symphonia::core::io::MediaSourceStream;
15 use symphonia::core::meta::MetadataOptions;
16 use symphonia::core::probe::Hint;
17
18 use crate::error::PreviewError;
19
20 /// Decoded audio data ready for playback, always stored as interleaved stereo f32.
21 pub struct PreviewBuffer {
22 /// Interleaved stereo sample data (L, R, L, R, ...).
23 pub data: Vec<f32>,
24 /// Number of channels (always 2 after mono/multi-channel conversion).
25 pub channels: usize,
26 /// Original sample rate of the decoded file.
27 pub sample_rate: u32,
28 }
29
30 /// Mutable playback state shared between the GUI and audio threads.
31 pub struct PreviewPlayback {
32 /// Currently loaded preview buffer, or `None` if nothing is loaded.
33 pub buffer: Option<PreviewBuffer>,
34 /// Current playback position in file-rate frames (fractional for resampling).
35 pub position_frac: f64,
36 /// Whether playback is active.
37 pub playing: bool,
38 /// Whether the sample should loop when it reaches the end.
39 pub loop_enabled: bool,
40 /// `true` while a background thread is still decoding and appending to the buffer.
41 pub streaming: bool,
42 /// Number of stereo frames decoded so far (grows during streaming).
43 pub decoded_frames: usize,
44 /// Total frame count from file metadata, for stable cursor display during streaming.
45 pub total_frames_estimate: Option<usize>,
46 }
47
48 impl Default for PreviewPlayback {
49 fn default() -> Self {
50 Self {
51 buffer: None,
52 position_frac: 0.0,
53 playing: false,
54 loop_enabled: false,
55 streaming: false,
56 decoded_frames: 0,
57 total_frames_estimate: None,
58 }
59 }
60 }
61
62 impl PreviewPlayback {
63 /// Create a new idle playback state with no buffer loaded.
64 pub fn new() -> Self {
65 Self::default()
66 }
67 }
68
69 /// Decode an audio file to interleaved stereo f32.
70 /// Mono files are doubled to stereo. Multi-channel files are mixed down to stereo.
71 #[instrument(skip_all)]
72 pub fn decode_to_f32(path: &Path) -> Result<PreviewBuffer, PreviewError> {
73 let file = std::fs::File::open(path).map_err(|e| PreviewError::Open {
74 path: path.to_path_buf(),
75 source: e,
76 })?;
77 let mss = MediaSourceStream::new(Box::new(file), Default::default());
78
79 let mut hint = Hint::new();
80 if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
81 hint.with_extension(ext);
82 }
83
84 let probed = match symphonia::default::get_probe().format(
85 &hint,
86 mss,
87 &FormatOptions::default(),
88 &MetadataOptions::default(),
89 ) {
90 Ok(p) => p,
91 Err(e) => {
92 let is_wav = path
93 .extension()
94 .and_then(|e| e.to_str())
95 .is_some_and(|e| e.eq_ignore_ascii_case("wav"));
96 if is_wav {
97 warn!(
98 path = %path.display(),
99 "symphonia probe failed for preview, trying hound fallback: {e}"
100 );
101 return decode_wav_hound_stereo(path);
102 }
103 return Err(PreviewError::Probe(e.to_string()));
104 }
105 };
106
107 let mut format = probed.format;
108
109 let track = format
110 .default_track()
111 .ok_or(PreviewError::NoTrack)?;
112
113 let track_id = track.id;
114 let source_sample_rate = track.codec_params.sample_rate.unwrap_or(44100);
115
116 let mut decoder = symphonia::default::get_codecs()
117 .make(&track.codec_params, &DecoderOptions::default())
118 .map_err(|e| PreviewError::Decoder(e.to_string()))?;
119
120 let mut all_samples: Vec<f32> = Vec::new();
121
122 // Cap at 10 minutes of stereo 48kHz to prevent OOM on files with bad metadata
123 // that bypass the streaming threshold.
124 const MAX_SAMPLES: usize = 10 * 60 * 48_000 * 2;
125
126 // Reuse SampleBuffer across packets to avoid per-packet allocation.
127 let mut sample_buf: Option<SampleBuffer<f32>> = None;
128
129 loop {
130 let packet = match format.next_packet() {
131 Ok(p) => p,
132 Err(symphonia::core::errors::Error::IoError(ref e))
133 if e.kind() == std::io::ErrorKind::UnexpectedEof =>
134 {
135 break;
136 }
137 Err(e) => return Err(PreviewError::Packet(e.to_string())),
138 };
139
140 if packet.track_id() != track_id {
141 continue;
142 }
143
144 let decoded = match decoder.decode(&packet) {
145 Ok(d) => d,
146 Err(symphonia::core::errors::Error::DecodeError(_)) => continue,
147 Err(e) => return Err(PreviewError::Decode(e.to_string())),
148 };
149
150 let spec = *decoded.spec();
151 let num_frames = decoded.frames();
152 let num_channels = spec.channels.count();
153
154 // Reallocate only when the buffer can't fit this packet.
155 let buf = match &mut sample_buf {
156 Some(buf) if buf.capacity() >= num_frames => buf,
157 _ => {
158 sample_buf = Some(SampleBuffer::<f32>::new(num_frames as u64, spec));
159 sample_buf.as_mut().unwrap()
160 }
161 };
162 buf.copy_interleaved_ref(decoded);
163 let samples = buf.samples();
164
165 // Convert to interleaved stereo
166 interleaved_to_stereo(samples, num_channels, num_frames, &mut all_samples);
167
168 if all_samples.len() >= MAX_SAMPLES {
169 tracing::warn!("decode_to_f32: hit {MAX_SAMPLES} sample cap, truncating");
170 break;
171 }
172 }
173
174 if all_samples.is_empty() {
175 return Err(PreviewError::NoData);
176 }
177
178 Ok(PreviewBuffer {
179 data: all_samples,
180 channels: 2,
181 sample_rate: source_sample_rate,
182 })
183 }
184
185 /// Estimate the duration of an audio file (in seconds) by reading codec metadata
186 /// without decoding. Returns `None` if metadata is unavailable.
187 #[instrument(skip_all)]
188 pub fn estimate_duration(path: &Path) -> Option<f64> {
189 let file = std::fs::File::open(path).ok()?;
190 let mss = MediaSourceStream::new(Box::new(file), Default::default());
191
192 let mut hint = Hint::new();
193 if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
194 hint.with_extension(ext);
195 }
196
197 if let Ok(probed) = symphonia::default::get_probe().format(
198 &hint,
199 mss,
200 &FormatOptions::default(),
201 &MetadataOptions::default(),
202 ) {
203 let track = probed.format.default_track()?;
204 let params = &track.codec_params;
205 let n_frames = params.n_frames?;
206 let sample_rate = params.sample_rate?;
207 if sample_rate == 0 {
208 return None;
209 }
210 return Some(n_frames as f64 / sample_rate as f64);
211 }
212 // Fallback for WAV files Symphonia rejects
213 let is_wav = path.extension().and_then(|e| e.to_str())
214 .is_some_and(|e| e.eq_ignore_ascii_case("wav"));
215 if is_wav {
216 let reader = hound::WavReader::open(path).ok()?;
217 let spec = reader.spec();
218 if spec.sample_rate == 0 {
219 return None;
220 }
221 let frames = reader.len() as f64 / spec.channels as f64;
222 return Some(frames / spec.sample_rate as f64);
223 }
224 None
225 }
226
227 /// Duration threshold in seconds: files longer than this use streaming decode.
228 pub const STREAMING_THRESHOLD_SECS: f64 = 30.0;
229
230 /// Number of seconds to pre-fill before enabling playback during streaming.
231 const PREFILL_SECS: f64 = 0.5;
232
233 /// Spawn a background thread that decodes the file and streams data into the
234 /// shared `PreviewPlayback`. The buffer is pre-filled with ~0.5s of audio before
235 /// `playing` is set to `true`, so playback starts without waiting for the full decode.
236 ///
237 /// Accepts `Arc<SharedState>` so the background thread can access the preview Mutex.
238 #[instrument(skip_all)]
239 pub fn start_streaming_decode(
240 path: &Path,
241 shared: &std::sync::Arc<crate::state::SharedState>,
242 ) -> Result<(), PreviewError> {
243 let file = std::fs::File::open(path).map_err(|e| PreviewError::Open {
244 path: path.to_path_buf(),
245 source: e,
246 })?;
247 let mss = MediaSourceStream::new(Box::new(file), Default::default());
248
249 let mut hint = Hint::new();
250 if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
251 hint.with_extension(ext);
252 }
253
254 let probed = match symphonia::default::get_probe().format(
255 &hint,
256 mss,
257 &FormatOptions::default(),
258 &MetadataOptions::default(),
259 ) {
260 Ok(p) => p,
261 Err(e) => {
262 let is_wav = path
263 .extension()
264 .and_then(|e| e.to_str())
265 .is_some_and(|e| e.eq_ignore_ascii_case("wav"));
266 if is_wav {
267 warn!(
268 path = %path.display(),
269 "symphonia probe failed for streaming, using hound fallback: {e}"
270 );
271 // For WAV fallback, just do a full decode (WAV is PCM, fast to read)
272 let buf = decode_wav_hound_stereo(path)?;
273 let mut guard = shared.preview.lock();
274 let total_frames = buf.data.len() / 2;
275 guard.buffer = Some(buf);
276 guard.position_frac = 0.0;
277 guard.playing = true;
278 guard.streaming = false;
279 guard.decoded_frames = total_frames;
280 guard.total_frames_estimate = Some(total_frames);
281 return Ok(());
282 }
283 return Err(PreviewError::Probe(e.to_string()));
284 }
285 };
286
287 let mut format = probed.format;
288 let track = format.default_track().ok_or(PreviewError::NoTrack)?;
289 let track_id = track.id;
290 let codec_params = track.codec_params.clone();
291 let source_sample_rate = codec_params.sample_rate.unwrap_or(44100);
292 let n_frames_estimate = codec_params.n_frames.map(|n| n as usize);
293 let prefill_frames = (PREFILL_SECS * source_sample_rate as f64) as usize;
294
295 let mut decoder = symphonia::default::get_codecs()
296 .make(&codec_params, &DecoderOptions::default())
297 .map_err(|e| PreviewError::Decoder(e.to_string()))?;
298
299 // Increment generation to cancel any previous streaming decode thread
300 let my_generation = shared.decode_generation.fetch_add(1, Ordering::AcqRel) + 1;
301
302 // Set up the initial buffer and playback state (not yet playing)
303 {
304 // Cap capacity to prevent OOM from malformed codec metadata (max ~30 min stereo)
305 let max_frames = source_sample_rate as usize * 60 * 30;
306 let capacity = n_frames_estimate
307 .map(|n| n.min(max_frames))
308 .unwrap_or(source_sample_rate as usize * 60)
309 * 2;
310 let mut guard = shared.preview.lock();
311 guard.buffer = Some(PreviewBuffer {
312 data: Vec::with_capacity(capacity),
313 channels: 2,
314 sample_rate: source_sample_rate,
315 });
316 guard.position_frac = 0.0;
317 guard.playing = false;
318 guard.streaming = true;
319 guard.decoded_frames = 0;
320 guard.total_frames_estimate = n_frames_estimate;
321 }
322
323 let shared = std::sync::Arc::clone(shared);
324 std::thread::spawn(move || {
325 let mut total_frames = 0usize;
326 let mut started = false;
327 // Reuse SampleBuffer across packets to avoid per-packet allocation.
328 let mut sample_buf: Option<SampleBuffer<f32>> = None;
329
330 loop {
331 // Check if a newer decode has started — if so, this thread exits
332 if shared.decode_generation.load(Ordering::Acquire) != my_generation {
333 let mut guard = shared.preview.lock();
334 guard.streaming = false;
335 return;
336 }
337
338 let packet = match format.next_packet() {
339 Ok(p) => p,
340 Err(symphonia::core::errors::Error::IoError(ref e))
341 if e.kind() == std::io::ErrorKind::UnexpectedEof =>
342 {
343 break;
344 }
345 Err(_) => break,
346 };
347
348 if packet.track_id() != track_id {
349 continue;
350 }
351
352 let decoded = match decoder.decode(&packet) {
353 Ok(d) => d,
354 Err(symphonia::core::errors::Error::DecodeError(_)) => continue,
355 Err(_) => break,
356 };
357
358 let spec = *decoded.spec();
359 let num_frames = decoded.frames();
360 let num_channels = spec.channels.count();
361
362 let buf = match &mut sample_buf {
363 Some(buf) if buf.capacity() >= num_frames => buf,
364 _ => {
365 sample_buf = Some(SampleBuffer::<f32>::new(num_frames as u64, spec));
366 sample_buf.as_mut().unwrap()
367 }
368 };
369 buf.copy_interleaved_ref(decoded);
370 let samples = buf.samples();
371
372 // Convert to interleaved stereo in a local batch
373 let mut batch = Vec::with_capacity(num_frames * 2);
374 interleaved_to_stereo(samples, num_channels, num_frames, &mut batch);
375
376 total_frames += num_frames;
377
378 // Append batch to shared buffer (brief lock)
379 {
380 let mut guard = shared.preview.lock();
381
382 // Check cancellation: if playing was set to false externally (stop_preview),
383 // abort the decode.
384 if started && !guard.playing {
385 guard.streaming = false;
386 return;
387 }
388
389 if let Some(ref mut buf) = guard.buffer {
390 buf.data.extend_from_slice(&batch);
391 }
392 guard.decoded_frames = total_frames;
393
394 // Start playback once pre-fill threshold is reached
395 if !started && total_frames >= prefill_frames {
396 guard.playing = true;
397 started = true;
398 }
399 }
400 }
401
402 // Decode complete
403 let mut guard = shared.preview.lock();
404 guard.streaming = false;
405 guard.decoded_frames = total_frames;
406 // If the file was very short and we never hit the prefill threshold, start now
407 if !started && total_frames > 0 {
408 guard.playing = true;
409 }
410 });
411
412 Ok(())
413 }
414
415 /// Convert interleaved multi-channel samples to interleaved stereo.
416 fn interleaved_to_stereo(
417 samples: &[f32],
418 num_channels: usize,
419 num_frames: usize,
420 out: &mut Vec<f32>,
421 ) {
422 // Sanitize: replace NaN/Inf with silence to prevent downstream propagation
423 // (NaN.clamp() returns NaN, so the audio output clamp won't catch it).
424 let clean = |s: f32| if s.is_finite() { s } else { 0.0 };
425
426 match num_channels {
427 1 => {
428 for &s in samples {
429 let v = clean(s);
430 out.push(v);
431 out.push(v);
432 }
433 }
434 2 => {
435 for &s in samples {
436 out.push(clean(s));
437 }
438 }
439 n => {
440 for frame in 0..num_frames {
441 let base = frame * n;
442 let left = clean(samples.get(base).copied().unwrap_or(0.0));
443 let right = clean(samples.get(base + 1).copied().unwrap_or(0.0));
444 out.push(left);
445 out.push(right);
446 }
447 }
448 }
449 }
450
451 /// Fallback WAV decoder using hound for files Symphonia rejects
452 /// (non-standard fmt chunk sizes: 18 or 20 bytes instead of 16 for PCM).
453 /// Returns interleaved stereo, matching the Symphonia decode path output.
454 fn decode_wav_hound_stereo(path: &Path) -> Result<PreviewBuffer, PreviewError> {
455 let reader =
456 hound::WavReader::open(path).map_err(|e| PreviewError::Probe(format!("hound: {e}")))?;
457
458 let spec = reader.spec();
459 let source_sample_rate = spec.sample_rate;
460 let channels = spec.channels as usize;
461
462 let raw: Vec<f32> = match spec.sample_format {
463 hound::SampleFormat::Int => {
464 let max_val = (1i64 << (spec.bits_per_sample - 1)) as f32;
465 reader
466 .into_samples::<i32>()
467 .map(|s| s.map(|v| v as f32 / max_val))
468 .collect::<Result<_, _>>()
469 .map_err(|e| PreviewError::Decode(format!("hound: {e}")))?
470 }
471 hound::SampleFormat::Float => reader
472 .into_samples::<f32>()
473 .collect::<Result<_, _>>()
474 .map_err(|e| PreviewError::Decode(format!("hound: {e}")))?,
475 };
476
477 let num_frames = raw.len() / channels.max(1);
478 let mut stereo = Vec::with_capacity(num_frames * 2);
479 interleaved_to_stereo(&raw, channels, num_frames, &mut stereo);
480
481 if stereo.is_empty() {
482 return Err(PreviewError::NoData);
483 }
484
485 Ok(PreviewBuffer {
486 data: stereo,
487 channels: 2,
488 sample_rate: source_sample_rate,
489 })
490 }
491
492 #[cfg(test)]
493 mod tests {
494 use super::*;
495
496 /// Write a minimal WAV file with f32 PCM data.
497 fn write_wav(path: &Path, channels: u16, sample_rate: u32, samples: &[f32]) {
498 use std::io::Write;
499
500 let bytes_per_sample = 4u16; // f32
501 let block_align = channels * bytes_per_sample;
502 let data_size = (samples.len() as u32) * 4;
503 let file_size = 36 + data_size;
504
505 let mut buf = Vec::with_capacity(44 + data_size as usize);
506
507 // RIFF header
508 buf.extend_from_slice(b"RIFF");
509 buf.extend_from_slice(&file_size.to_le_bytes());
510 buf.extend_from_slice(b"WAVE");
511
512 // fmt chunk — IEEE float
513 buf.extend_from_slice(b"fmt ");
514 buf.extend_from_slice(&16u32.to_le_bytes()); // chunk size
515 buf.extend_from_slice(&3u16.to_le_bytes()); // format = IEEE float
516 buf.extend_from_slice(&channels.to_le_bytes());
517 buf.extend_from_slice(&sample_rate.to_le_bytes());
518 buf.extend_from_slice(&(sample_rate * block_align as u32).to_le_bytes()); // byte rate
519 buf.extend_from_slice(&block_align.to_le_bytes());
520 buf.extend_from_slice(&(bytes_per_sample * 8).to_le_bytes()); // bits per sample
521
522 // data chunk
523 buf.extend_from_slice(b"data");
524 buf.extend_from_slice(&data_size.to_le_bytes());
525 for &s in samples {
526 buf.extend_from_slice(&s.to_le_bytes());
527 }
528
529 let mut file = std::fs::File::create(path).unwrap();
530 file.write_all(&buf).unwrap();
531 }
532
533 #[test]
534 fn decode_mono_duplicates_to_stereo() {
535 let dir = tempfile::tempdir().unwrap();
536 let path = dir.path().join("mono.wav");
537 write_wav(&path, 1, 44100, &[0.5, -0.5, 0.25]);
538
539 let buf = decode_to_f32(&path).unwrap();
540 assert_eq!(buf.channels, 2);
541 assert_eq!(buf.data, vec![0.5, 0.5, -0.5, -0.5, 0.25, 0.25]);
542 }
543
544 #[test]
545 fn decode_stereo_passthrough() {
546 let dir = tempfile::tempdir().unwrap();
547 let path = dir.path().join("stereo.wav");
548 let samples = vec![0.1, 0.2, 0.3, 0.4, 0.5, 0.6];
549 write_wav(&path, 2, 48000, &samples);
550
551 let buf = decode_to_f32(&path).unwrap();
552 assert_eq!(buf.channels, 2);
553 assert_eq!(buf.data, samples);
554 }
555
556 #[test]
557 fn decode_multichannel_takes_first_two() {
558 let dir = tempfile::tempdir().unwrap();
559 let path = dir.path().join("quad.wav");
560 // 4 channels, 2 frames: [L0, R0, C0, S0, L1, R1, C1, S1]
561 let samples = vec![0.1, 0.2, 0.9, 0.8, 0.3, 0.4, 0.7, 0.6];
562 write_wav(&path, 4, 44100, &samples);
563
564 let buf = decode_to_f32(&path).unwrap();
565 assert_eq!(buf.channels, 2);
566 assert_eq!(buf.data, vec![0.1, 0.2, 0.3, 0.4]);
567 }
568
569 #[test]
570 fn decode_preserves_sample_rate() {
571 let dir = tempfile::tempdir().unwrap();
572 let path = dir.path().join("rate.wav");
573 write_wav(&path, 1, 96000, &[0.0, 0.0]);
574
575 let buf = decode_to_f32(&path).unwrap();
576 assert_eq!(buf.sample_rate, 96000);
577 }
578
579 #[test]
580 fn decode_nonexistent_returns_open_error() {
581 let result = decode_to_f32(Path::new("/tmp/nonexistent_audiofiles_test.wav"));
582 assert!(matches!(result, Err(PreviewError::Open { .. })));
583 }
584
585 #[test]
586 fn decode_empty_wav_returns_no_data() {
587 let dir = tempfile::tempdir().unwrap();
588 let path = dir.path().join("empty.wav");
589 write_wav(&path, 1, 44100, &[]);
590
591 let result = decode_to_f32(&path);
592 assert!(matches!(result, Err(PreviewError::NoData)));
593 }
594
595 #[test]
596 fn output_is_always_stereo() {
597 let dir = tempfile::tempdir().unwrap();
598
599 for channels in [1u16, 2, 4] {
600 let path = dir.path().join(format!("{channels}ch.wav"));
601 let samples: Vec<f32> = (0..channels as usize * 3)
602 .map(|i| i as f32 * 0.1)
603 .collect();
604 write_wav(&path, channels, 44100, &samples);
605
606 let buf = decode_to_f32(&path).unwrap();
607 assert_eq!(buf.channels, 2, "expected stereo for {channels}-channel input");
608 }
609 }
610 }
611