| 1 |
|
| 2 |
|
| 3 |
|
| 4 |
|
| 5 |
|
| 6 |
|
| 7 |
use std::path::Path; |
| 8 |
|
| 9 |
use symphonia::core::audio::SampleBuffer; |
| 10 |
use symphonia::core::codecs::DecoderOptions; |
| 11 |
use symphonia::core::formats::FormatOptions; |
| 12 |
use symphonia::core::io::MediaSourceStream; |
| 13 |
use symphonia::core::meta::MetadataOptions; |
| 14 |
use symphonia::core::probe::Hint; |
| 15 |
use tracing::{instrument, warn}; |
| 16 |
|
| 17 |
use crate::error::{io_err, AnalysisError, CoreError}; |
| 18 |
|
| 19 |
|
| 20 |
#[derive(Debug)] |
| 21 |
pub struct DecodedAudio { |
| 22 |
|
| 23 |
pub samples: Vec<f32>, |
| 24 |
|
| 25 |
pub sample_rate: u32, |
| 26 |
|
| 27 |
pub channels: u16, |
| 28 |
|
| 29 |
pub duration: f64, |
| 30 |
} |
| 31 |
|
| 32 |
|
| 33 |
|
| 34 |
|
| 35 |
#[instrument(skip_all)] |
| 36 |
pub fn decode_to_mono(path: &Path) -> Result<DecodedAudio, CoreError> { |
| 37 |
let file = std::fs::File::open(path).map_err(|e| io_err(path, e))?; |
| 38 |
let mss = MediaSourceStream::new(Box::new(file), Default::default()); |
| 39 |
|
| 40 |
let mut hint = Hint::new(); |
| 41 |
if let Some(ext) = path.extension().and_then(|e| e.to_str()) { |
| 42 |
hint.with_extension(ext); |
| 43 |
} |
| 44 |
|
| 45 |
let probed = match symphonia::default::get_probe().format( |
| 46 |
&hint, |
| 47 |
mss, |
| 48 |
&FormatOptions::default(), |
| 49 |
&MetadataOptions::default(), |
| 50 |
) { |
| 51 |
Ok(p) => p, |
| 52 |
Err(e) => { |
| 53 |
let is_wav = path |
| 54 |
.extension() |
| 55 |
.and_then(|e| e.to_str()) |
| 56 |
.is_some_and(|e| e.eq_ignore_ascii_case("wav")); |
| 57 |
if is_wav { |
| 58 |
warn!( |
| 59 |
path = %path.display(), |
| 60 |
"symphonia probe failed, trying hound fallback: {e}" |
| 61 |
); |
| 62 |
return decode_wav_hound(path); |
| 63 |
} |
| 64 |
return Err(AnalysisError::ProbeFailed(e.to_string()).into()); |
| 65 |
} |
| 66 |
}; |
| 67 |
|
| 68 |
let mut format = probed.format; |
| 69 |
|
| 70 |
let track = format |
| 71 |
.default_track() |
| 72 |
.ok_or(AnalysisError::NoAudioTrack)?; |
| 73 |
|
| 74 |
let track_id = track.id; |
| 75 |
let source_sample_rate = track |
| 76 |
.codec_params |
| 77 |
.sample_rate |
| 78 |
.ok_or(AnalysisError::ProbeFailed("missing sample rate".to_string()))?; |
| 79 |
let source_channels = track |
| 80 |
.codec_params |
| 81 |
.channels |
| 82 |
.map(|c| c.count() as u16) |
| 83 |
.ok_or(AnalysisError::ProbeFailed("missing channel count".to_string()))?; |
| 84 |
|
| 85 |
let mut decoder = symphonia::default::get_codecs() |
| 86 |
.make(&track.codec_params, &DecoderOptions::default()) |
| 87 |
.map_err(|e| AnalysisError::DecoderFailed(e.to_string()))?; |
| 88 |
|
| 89 |
let mut mono_samples: Vec<f32> = Vec::new(); |
| 90 |
|
| 91 |
loop { |
| 92 |
let packet = match format.next_packet() { |
| 93 |
Ok(p) => p, |
| 94 |
Err(symphonia::core::errors::Error::IoError(ref e)) |
| 95 |
if e.kind() == std::io::ErrorKind::UnexpectedEof => |
| 96 |
{ |
| 97 |
break; |
| 98 |
} |
| 99 |
Err(e) => return Err(AnalysisError::PacketError(e.to_string()).into()), |
| 100 |
}; |
| 101 |
|
| 102 |
if packet.track_id() != track_id { |
| 103 |
continue; |
| 104 |
} |
| 105 |
|
| 106 |
let decoded = match decoder.decode(&packet) { |
| 107 |
Ok(d) => d, |
| 108 |
Err(symphonia::core::errors::Error::DecodeError(_)) => continue, |
| 109 |
Err(e) => return Err(AnalysisError::DecodeError(e.to_string()).into()), |
| 110 |
}; |
| 111 |
|
| 112 |
let spec = *decoded.spec(); |
| 113 |
let num_frames = decoded.frames(); |
| 114 |
let num_channels = spec.channels.count(); |
| 115 |
|
| 116 |
if num_channels == 0 { |
| 117 |
continue; |
| 118 |
} |
| 119 |
|
| 120 |
let mut sample_buf = SampleBuffer::<f32>::new(num_frames as u64, *decoded.spec()); |
| 121 |
sample_buf.copy_interleaved_ref(decoded); |
| 122 |
let samples = sample_buf.samples(); |
| 123 |
|
| 124 |
|
| 125 |
for frame in 0..num_frames { |
| 126 |
let mut sum = 0.0f32; |
| 127 |
for ch in 0..num_channels { |
| 128 |
sum += samples[frame * num_channels + ch]; |
| 129 |
} |
| 130 |
mono_samples.push(sum / num_channels as f32); |
| 131 |
} |
| 132 |
} |
| 133 |
|
| 134 |
if mono_samples.is_empty() { |
| 135 |
return Err(AnalysisError::NoAudioData.into()); |
| 136 |
} |
| 137 |
|
| 138 |
let duration = mono_samples.len() as f64 / source_sample_rate as f64; |
| 139 |
|
| 140 |
Ok(DecodedAudio { |
| 141 |
samples: mono_samples, |
| 142 |
sample_rate: source_sample_rate, |
| 143 |
channels: source_channels, |
| 144 |
duration, |
| 145 |
}) |
| 146 |
} |
| 147 |
|
| 148 |
|
| 149 |
|
| 150 |
#[instrument(skip_all)] |
| 151 |
fn decode_wav_hound(path: &Path) -> Result<DecodedAudio, CoreError> { |
| 152 |
let reader = hound::WavReader::open(path) |
| 153 |
.map_err(|e| AnalysisError::ProbeFailed(format!("hound: {e}")))?; |
| 154 |
|
| 155 |
let spec = reader.spec(); |
| 156 |
let source_sample_rate = spec.sample_rate; |
| 157 |
let source_channels = spec.channels; |
| 158 |
|
| 159 |
let mono_samples: Vec<f32> = match spec.sample_format { |
| 160 |
hound::SampleFormat::Int => { |
| 161 |
let max_val = (1i64 << (spec.bits_per_sample - 1)) as f32; |
| 162 |
let all: Vec<f32> = reader |
| 163 |
.into_samples::<i32>() |
| 164 |
.map(|s| s.map(|v| v as f32 / max_val)) |
| 165 |
.collect::<Result<_, _>>() |
| 166 |
.map_err(|e| AnalysisError::DecodeError(format!("hound: {e}")))?; |
| 167 |
mix_to_mono(&all, source_channels as usize) |
| 168 |
} |
| 169 |
hound::SampleFormat::Float => { |
| 170 |
let all: Vec<f32> = reader |
| 171 |
.into_samples::<f32>() |
| 172 |
.collect::<Result<_, _>>() |
| 173 |
.map_err(|e| AnalysisError::DecodeError(format!("hound: {e}")))?; |
| 174 |
mix_to_mono(&all, source_channels as usize) |
| 175 |
} |
| 176 |
}; |
| 177 |
|
| 178 |
if mono_samples.is_empty() { |
| 179 |
return Err(AnalysisError::NoAudioData.into()); |
| 180 |
} |
| 181 |
|
| 182 |
let duration = mono_samples.len() as f64 / source_sample_rate as f64; |
| 183 |
|
| 184 |
Ok(DecodedAudio { |
| 185 |
samples: mono_samples, |
| 186 |
sample_rate: source_sample_rate, |
| 187 |
channels: source_channels, |
| 188 |
duration, |
| 189 |
}) |
| 190 |
} |
| 191 |
|
| 192 |
fn mix_to_mono(interleaved: &[f32], channels: usize) -> Vec<f32> { |
| 193 |
if channels == 1 { |
| 194 |
return interleaved.to_vec(); |
| 195 |
} |
| 196 |
interleaved |
| 197 |
.chunks_exact(channels) |
| 198 |
.map(|frame| frame.iter().sum::<f32>() / channels as f32) |
| 199 |
.collect() |
| 200 |
} |
| 201 |
|
| 202 |
#[cfg(test)] |
| 203 |
mod tests { |
| 204 |
use super::*; |
| 205 |
|
| 206 |
#[test] |
| 207 |
fn nonexistent_path_returns_err() { |
| 208 |
let result = decode_to_mono(Path::new("/nonexistent/path/audio.wav")); |
| 209 |
assert!(result.is_err()); |
| 210 |
let err = result.unwrap_err(); |
| 211 |
assert!( |
| 212 |
matches!(err, CoreError::Io { .. }), |
| 213 |
"expected CoreError::Io, got: {err}" |
| 214 |
); |
| 215 |
} |
| 216 |
|
| 217 |
#[test] |
| 218 |
fn decoded_audio_field_access() { |
| 219 |
let audio = DecodedAudio { |
| 220 |
samples: vec![0.0, 0.5, -0.5, 1.0], |
| 221 |
sample_rate: 44100, |
| 222 |
channels: 2, |
| 223 |
duration: 0.5, |
| 224 |
}; |
| 225 |
assert_eq!(audio.samples.len(), 4); |
| 226 |
assert_eq!(audio.sample_rate, 44100); |
| 227 |
assert_eq!(audio.channels, 2); |
| 228 |
assert!((audio.duration - 0.5).abs() < f64::EPSILON); |
| 229 |
} |
| 230 |
} |
| 231 |
|