| 1 |
|
| 2 |
|
| 3 |
use crate::db::Database; |
| 4 |
use crate::error::{CoreError, Result}; |
| 5 |
use crate::vp_tree::VpTree; |
| 6 |
use tracing::instrument; |
| 7 |
|
| 8 |
|
| 9 |
|
| 10 |
|
| 11 |
#[derive(Debug, Clone, Default)] |
| 12 |
pub struct FeatureVector { |
| 13 |
pub bpm: Option<f64>, |
| 14 |
pub duration: Option<f64>, |
| 15 |
pub lufs: Option<f64>, |
| 16 |
pub spectral_centroid: Option<f64>, |
| 17 |
pub spectral_flatness: Option<f64>, |
| 18 |
pub spectral_rolloff: Option<f64>, |
| 19 |
pub zero_crossing_rate: Option<f64>, |
| 20 |
pub onset_strength: Option<f64>, |
| 21 |
pub spectral_bandwidth: Option<f64>, |
| 22 |
pub centroid_variance: Option<f64>, |
| 23 |
pub crest_factor: Option<f64>, |
| 24 |
pub attack_time: Option<f64>, |
| 25 |
} |
| 26 |
|
| 27 |
|
| 28 |
#[derive(Debug, Clone)] |
| 29 |
pub struct FeatureWeights { |
| 30 |
pub bpm: f64, |
| 31 |
pub duration: f64, |
| 32 |
pub lufs: f64, |
| 33 |
pub spectral_centroid: f64, |
| 34 |
pub spectral_flatness: f64, |
| 35 |
pub spectral_rolloff: f64, |
| 36 |
pub zero_crossing_rate: f64, |
| 37 |
pub onset_strength: f64, |
| 38 |
pub spectral_bandwidth: f64, |
| 39 |
pub centroid_variance: f64, |
| 40 |
pub crest_factor: f64, |
| 41 |
pub attack_time: f64, |
| 42 |
} |
| 43 |
|
| 44 |
impl Default for FeatureWeights { |
| 45 |
fn default() -> Self { |
| 46 |
Self { |
| 47 |
bpm: 1.0, |
| 48 |
duration: 1.0, |
| 49 |
lufs: 1.0, |
| 50 |
spectral_centroid: 1.0, |
| 51 |
spectral_flatness: 1.0, |
| 52 |
spectral_rolloff: 1.0, |
| 53 |
zero_crossing_rate: 1.0, |
| 54 |
onset_strength: 1.0, |
| 55 |
spectral_bandwidth: 1.0, |
| 56 |
centroid_variance: 1.0, |
| 57 |
crest_factor: 1.0, |
| 58 |
attack_time: 1.0, |
| 59 |
} |
| 60 |
} |
| 61 |
} |
| 62 |
|
| 63 |
|
| 64 |
#[derive(Debug, Clone)] |
| 65 |
pub struct SimilarResult { |
| 66 |
pub hash: String, |
| 67 |
pub distance: f64, |
| 68 |
} |
| 69 |
|
| 70 |
|
| 71 |
#[derive(Debug, Clone, Default)] |
| 72 |
struct NormRanges { |
| 73 |
bpm: (f64, f64), |
| 74 |
duration: (f64, f64), |
| 75 |
lufs: (f64, f64), |
| 76 |
spectral_centroid: (f64, f64), |
| 77 |
spectral_flatness: (f64, f64), |
| 78 |
spectral_rolloff: (f64, f64), |
| 79 |
zero_crossing_rate: (f64, f64), |
| 80 |
onset_strength: (f64, f64), |
| 81 |
spectral_bandwidth: (f64, f64), |
| 82 |
centroid_variance: (f64, f64), |
| 83 |
crest_factor: (f64, f64), |
| 84 |
attack_time: (f64, f64), |
| 85 |
} |
| 86 |
|
| 87 |
|
| 88 |
fn norm(val: f64, min: f64, max: f64) -> f64 { |
| 89 |
if (max - min).abs() < f64::EPSILON { |
| 90 |
0.0 |
| 91 |
} else { |
| 92 |
(val - min) / (max - min) |
| 93 |
} |
| 94 |
} |
| 95 |
|
| 96 |
|
| 97 |
fn normalize(fv: &FeatureVector, ranges: &NormRanges) -> FeatureVector { |
| 98 |
FeatureVector { |
| 99 |
bpm: fv.bpm.map(|v| norm(v, ranges.bpm.0, ranges.bpm.1)), |
| 100 |
duration: fv.duration.map(|v| norm(v, ranges.duration.0, ranges.duration.1)), |
| 101 |
lufs: fv.lufs.map(|v| norm(v, ranges.lufs.0, ranges.lufs.1)), |
| 102 |
spectral_centroid: fv.spectral_centroid.map(|v| norm(v, ranges.spectral_centroid.0, ranges.spectral_centroid.1)), |
| 103 |
spectral_flatness: fv.spectral_flatness.map(|v| norm(v, ranges.spectral_flatness.0, ranges.spectral_flatness.1)), |
| 104 |
spectral_rolloff: fv.spectral_rolloff.map(|v| norm(v, ranges.spectral_rolloff.0, ranges.spectral_rolloff.1)), |
| 105 |
zero_crossing_rate: fv.zero_crossing_rate.map(|v| norm(v, ranges.zero_crossing_rate.0, ranges.zero_crossing_rate.1)), |
| 106 |
onset_strength: fv.onset_strength.map(|v| norm(v, ranges.onset_strength.0, ranges.onset_strength.1)), |
| 107 |
spectral_bandwidth: fv.spectral_bandwidth.map(|v| norm(v, ranges.spectral_bandwidth.0, ranges.spectral_bandwidth.1)), |
| 108 |
centroid_variance: fv.centroid_variance.map(|v| norm(v, ranges.centroid_variance.0, ranges.centroid_variance.1)), |
| 109 |
crest_factor: fv.crest_factor.map(|v| norm(v, ranges.crest_factor.0, ranges.crest_factor.1)), |
| 110 |
attack_time: fv.attack_time.map(|v| norm(v, ranges.attack_time.0, ranges.attack_time.1)), |
| 111 |
} |
| 112 |
} |
| 113 |
|
| 114 |
|
| 115 |
|
| 116 |
pub fn feature_distance(a: &FeatureVector, b: &FeatureVector, weights: &FeatureWeights) -> f64 { |
| 117 |
let mut sum = 0.0; |
| 118 |
let mut dims = 0.0; |
| 119 |
|
| 120 |
let pairs: [(Option<f64>, Option<f64>, f64); 12] = [ |
| 121 |
(a.bpm, b.bpm, weights.bpm), |
| 122 |
(a.duration, b.duration, weights.duration), |
| 123 |
(a.lufs, b.lufs, weights.lufs), |
| 124 |
(a.spectral_centroid, b.spectral_centroid, weights.spectral_centroid), |
| 125 |
(a.spectral_flatness, b.spectral_flatness, weights.spectral_flatness), |
| 126 |
(a.spectral_rolloff, b.spectral_rolloff, weights.spectral_rolloff), |
| 127 |
(a.zero_crossing_rate, b.zero_crossing_rate, weights.zero_crossing_rate), |
| 128 |
(a.onset_strength, b.onset_strength, weights.onset_strength), |
| 129 |
(a.spectral_bandwidth, b.spectral_bandwidth, weights.spectral_bandwidth), |
| 130 |
(a.centroid_variance, b.centroid_variance, weights.centroid_variance), |
| 131 |
(a.crest_factor, b.crest_factor, weights.crest_factor), |
| 132 |
(a.attack_time, b.attack_time, weights.attack_time), |
| 133 |
]; |
| 134 |
|
| 135 |
for (va, vb, w) in &pairs { |
| 136 |
if let (Some(va), Some(vb)) = (va, vb) { |
| 137 |
let diff = va - vb; |
| 138 |
sum += w * diff * diff; |
| 139 |
dims += 1.0; |
| 140 |
} |
| 141 |
} |
| 142 |
|
| 143 |
|
| 144 |
|
| 145 |
if dims == 0.0 { 1e10 } else { (sum / dims).sqrt() } |
| 146 |
} |
| 147 |
|
| 148 |
|
| 149 |
#[instrument(skip_all)] |
| 150 |
pub fn load_features(db: &Database, hash: &str) -> Result<FeatureVector> { |
| 151 |
db.conn() |
| 152 |
.query_row( |
| 153 |
"SELECT bpm, duration, lufs, spectral_centroid, spectral_flatness, |
| 154 |
spectral_rolloff, zero_crossing_rate, onset_strength, |
| 155 |
spectral_bandwidth, centroid_variance, crest_factor, attack_time |
| 156 |
FROM audio_analysis WHERE hash = ?1", |
| 157 |
[hash], |
| 158 |
|row| { |
| 159 |
Ok(FeatureVector { |
| 160 |
bpm: row.get(0)?, |
| 161 |
duration: row.get(1)?, |
| 162 |
lufs: row.get(2)?, |
| 163 |
spectral_centroid: row.get(3)?, |
| 164 |
spectral_flatness: row.get(4)?, |
| 165 |
spectral_rolloff: row.get(5)?, |
| 166 |
zero_crossing_rate: row.get(6)?, |
| 167 |
onset_strength: row.get(7)?, |
| 168 |
spectral_bandwidth: row.get(8)?, |
| 169 |
centroid_variance: row.get(9)?, |
| 170 |
crest_factor: row.get(10)?, |
| 171 |
attack_time: row.get(11)?, |
| 172 |
}) |
| 173 |
}, |
| 174 |
) |
| 175 |
.map_err(|_| CoreError::SampleNotFound(hash.to_string())) |
| 176 |
} |
| 177 |
|
| 178 |
|
| 179 |
|
| 180 |
|
| 181 |
|
| 182 |
|
| 183 |
|
| 184 |
|
| 185 |
#[instrument(skip_all, fields(hash = %hash))] |
| 186 |
pub fn find_similar(db: &Database, hash: &str, limit: usize) -> Result<Vec<SimilarResult>> { |
| 187 |
let ref_features = load_features(db, hash)?; |
| 188 |
|
| 189 |
|
| 190 |
let mut stmt = db.conn().prepare( |
| 191 |
"SELECT hash, bpm, duration, lufs, spectral_centroid, spectral_flatness, |
| 192 |
spectral_rolloff, zero_crossing_rate, onset_strength, |
| 193 |
spectral_bandwidth, centroid_variance, crest_factor, attack_time |
| 194 |
FROM audio_analysis WHERE hash != ?1", |
| 195 |
)?; |
| 196 |
let all: Vec<(String, FeatureVector)> = stmt |
| 197 |
.query_map([hash], |row| { |
| 198 |
Ok(( |
| 199 |
row.get::<_, String>(0)?, |
| 200 |
FeatureVector { |
| 201 |
bpm: row.get(1)?, |
| 202 |
duration: row.get(2)?, |
| 203 |
lufs: row.get(3)?, |
| 204 |
spectral_centroid: row.get(4)?, |
| 205 |
spectral_flatness: row.get(5)?, |
| 206 |
spectral_rolloff: row.get(6)?, |
| 207 |
zero_crossing_rate: row.get(7)?, |
| 208 |
onset_strength: row.get(8)?, |
| 209 |
spectral_bandwidth: row.get(9)?, |
| 210 |
centroid_variance: row.get(10)?, |
| 211 |
crest_factor: row.get(11)?, |
| 212 |
attack_time: row.get(12)?, |
| 213 |
}, |
| 214 |
)) |
| 215 |
})? |
| 216 |
.collect::<std::result::Result<Vec<_>, _>>()?; |
| 217 |
|
| 218 |
if all.is_empty() { |
| 219 |
return Ok(Vec::new()); |
| 220 |
} |
| 221 |
|
| 222 |
|
| 223 |
let ranges = compute_ranges(&ref_features, &all); |
| 224 |
let ref_norm = normalize(&ref_features, &ranges); |
| 225 |
let weights = FeatureWeights::default(); |
| 226 |
|
| 227 |
let mut results: Vec<SimilarResult> = all |
| 228 |
.iter() |
| 229 |
.map(|(h, fv)| { |
| 230 |
let fv_norm = normalize(fv, &ranges); |
| 231 |
SimilarResult { |
| 232 |
hash: h.clone(), |
| 233 |
distance: feature_distance(&ref_norm, &fv_norm, &weights), |
| 234 |
} |
| 235 |
}) |
| 236 |
.collect(); |
| 237 |
|
| 238 |
results.sort_by(|a, b| a.distance.total_cmp(&b.distance)); |
| 239 |
results.truncate(limit); |
| 240 |
Ok(results) |
| 241 |
} |
| 242 |
|
| 243 |
|
| 244 |
fn compute_ranges(reference: &FeatureVector, others: &[(String, FeatureVector)]) -> NormRanges { |
| 245 |
let mut ranges = NormRanges::default(); |
| 246 |
|
| 247 |
|
| 248 |
macro_rules! update_range { |
| 249 |
($field:ident, $range_field:ident) => { |
| 250 |
let mut min = f64::MAX; |
| 251 |
let mut max = f64::MIN; |
| 252 |
if let Some(v) = reference.$field { |
| 253 |
min = min.min(v); |
| 254 |
max = max.max(v); |
| 255 |
} |
| 256 |
for (_, fv) in others { |
| 257 |
if let Some(v) = fv.$field { |
| 258 |
min = min.min(v); |
| 259 |
max = max.max(v); |
| 260 |
} |
| 261 |
} |
| 262 |
if min < f64::MAX { |
| 263 |
ranges.$range_field = (min, max); |
| 264 |
} |
| 265 |
}; |
| 266 |
} |
| 267 |
|
| 268 |
update_range!(bpm, bpm); |
| 269 |
update_range!(duration, duration); |
| 270 |
update_range!(lufs, lufs); |
| 271 |
update_range!(spectral_centroid, spectral_centroid); |
| 272 |
update_range!(spectral_flatness, spectral_flatness); |
| 273 |
update_range!(spectral_rolloff, spectral_rolloff); |
| 274 |
update_range!(zero_crossing_rate, zero_crossing_rate); |
| 275 |
update_range!(onset_strength, onset_strength); |
| 276 |
update_range!(spectral_bandwidth, spectral_bandwidth); |
| 277 |
update_range!(centroid_variance, centroid_variance); |
| 278 |
update_range!(crest_factor, crest_factor); |
| 279 |
update_range!(attack_time, attack_time); |
| 280 |
|
| 281 |
ranges |
| 282 |
} |
| 283 |
|
| 284 |
|
| 285 |
|
| 286 |
|
| 287 |
struct SimilarityEntry { |
| 288 |
hash: String, |
| 289 |
features: FeatureVector, |
| 290 |
} |
| 291 |
|
| 292 |
|
| 293 |
fn entry_distance(a: &SimilarityEntry, b: &SimilarityEntry) -> f64 { |
| 294 |
feature_distance(&a.features, &b.features, &DEFAULT_WEIGHTS) |
| 295 |
} |
| 296 |
|
| 297 |
|
| 298 |
const DEFAULT_WEIGHTS: FeatureWeights = FeatureWeights { |
| 299 |
bpm: 1.0, |
| 300 |
duration: 1.0, |
| 301 |
lufs: 1.0, |
| 302 |
spectral_centroid: 1.0, |
| 303 |
spectral_flatness: 1.0, |
| 304 |
spectral_rolloff: 1.0, |
| 305 |
zero_crossing_rate: 1.0, |
| 306 |
onset_strength: 1.0, |
| 307 |
spectral_bandwidth: 1.0, |
| 308 |
centroid_variance: 1.0, |
| 309 |
crest_factor: 1.0, |
| 310 |
attack_time: 1.0, |
| 311 |
}; |
| 312 |
|
| 313 |
|
| 314 |
|
| 315 |
|
| 316 |
|
| 317 |
|
| 318 |
|
| 319 |
|
| 320 |
|
| 321 |
|
| 322 |
|
| 323 |
pub struct SimilarityIndex { |
| 324 |
tree: VpTree<SimilarityEntry>, |
| 325 |
ranges: NormRanges, |
| 326 |
} |
| 327 |
|
| 328 |
impl SimilarityIndex { |
| 329 |
|
| 330 |
#[instrument(skip_all)] |
| 331 |
|
| 332 |
pub fn load_data(db: &Database) -> Result<Vec<(String, FeatureVector)>> { |
| 333 |
let mut stmt = db.conn().prepare( |
| 334 |
"SELECT hash, bpm, duration, lufs, spectral_centroid, spectral_flatness, |
| 335 |
spectral_rolloff, zero_crossing_rate, onset_strength, |
| 336 |
spectral_bandwidth, centroid_variance, crest_factor, attack_time |
| 337 |
FROM audio_analysis", |
| 338 |
)?; |
| 339 |
let all: Vec<(String, FeatureVector)> = stmt |
| 340 |
.query_map([], |row| { |
| 341 |
Ok(( |
| 342 |
row.get::<_, String>(0)?, |
| 343 |
FeatureVector { |
| 344 |
bpm: row.get(1)?, |
| 345 |
duration: row.get(2)?, |
| 346 |
lufs: row.get(3)?, |
| 347 |
spectral_centroid: row.get(4)?, |
| 348 |
spectral_flatness: row.get(5)?, |
| 349 |
spectral_rolloff: row.get(6)?, |
| 350 |
zero_crossing_rate: row.get(7)?, |
| 351 |
onset_strength: row.get(8)?, |
| 352 |
spectral_bandwidth: row.get(9)?, |
| 353 |
centroid_variance: row.get(10)?, |
| 354 |
crest_factor: row.get(11)?, |
| 355 |
attack_time: row.get(12)?, |
| 356 |
}, |
| 357 |
)) |
| 358 |
})? |
| 359 |
.collect::<std::result::Result<Vec<_>, _>>()?; |
| 360 |
Ok(all) |
| 361 |
} |
| 362 |
|
| 363 |
|
| 364 |
pub fn build_from_data(all: Vec<(String, FeatureVector)>) -> Self { |
| 365 |
if all.is_empty() { |
| 366 |
return Self { |
| 367 |
tree: VpTree::build(vec![], entry_distance), |
| 368 |
ranges: NormRanges::default(), |
| 369 |
}; |
| 370 |
} |
| 371 |
|
| 372 |
let ranges = compute_ranges_all(&all); |
| 373 |
|
| 374 |
let entries: Vec<SimilarityEntry> = all |
| 375 |
.into_iter() |
| 376 |
.map(|(hash, fv)| SimilarityEntry { |
| 377 |
hash, |
| 378 |
features: normalize(&fv, &ranges), |
| 379 |
}) |
| 380 |
.collect(); |
| 381 |
|
| 382 |
let tree = VpTree::build(entries, entry_distance); |
| 383 |
|
| 384 |
Self { tree, ranges } |
| 385 |
} |
| 386 |
|
| 387 |
pub fn build(db: &Database) -> Result<Self> { |
| 388 |
let all = Self::load_data(db)?; |
| 389 |
Ok(Self::build_from_data(all)) |
| 390 |
} |
| 391 |
|
| 392 |
|
| 393 |
pub fn len(&self) -> usize { |
| 394 |
self.tree.len() |
| 395 |
} |
| 396 |
|
| 397 |
|
| 398 |
pub fn is_empty(&self) -> bool { |
| 399 |
self.tree.is_empty() |
| 400 |
} |
| 401 |
|
| 402 |
|
| 403 |
#[instrument(skip_all)] |
| 404 |
pub fn find_similar( |
| 405 |
&self, |
| 406 |
hash: &str, |
| 407 |
features: &FeatureVector, |
| 408 |
limit: usize, |
| 409 |
) -> Vec<SimilarResult> { |
| 410 |
let query = SimilarityEntry { |
| 411 |
hash: hash.to_string(), |
| 412 |
features: normalize(features, &self.ranges), |
| 413 |
}; |
| 414 |
|
| 415 |
|
| 416 |
let candidates = self.tree.find_nearest(&query, limit + 1, entry_distance); |
| 417 |
|
| 418 |
candidates |
| 419 |
.into_iter() |
| 420 |
.filter(|c| self.tree.get(c.index).hash != hash) |
| 421 |
.take(limit) |
| 422 |
.map(|c| SimilarResult { |
| 423 |
hash: self.tree.get(c.index).hash.clone(), |
| 424 |
distance: c.distance, |
| 425 |
}) |
| 426 |
.collect() |
| 427 |
} |
| 428 |
} |
| 429 |
|
| 430 |
|
| 431 |
fn compute_ranges_all(samples: &[(String, FeatureVector)]) -> NormRanges { |
| 432 |
let mut ranges = NormRanges::default(); |
| 433 |
|
| 434 |
macro_rules! update_range { |
| 435 |
($field:ident, $range_field:ident) => { |
| 436 |
let mut min = f64::MAX; |
| 437 |
let mut max = f64::MIN; |
| 438 |
for (_, fv) in samples { |
| 439 |
if let Some(v) = fv.$field { |
| 440 |
min = min.min(v); |
| 441 |
max = max.max(v); |
| 442 |
} |
| 443 |
} |
| 444 |
if min < f64::MAX { |
| 445 |
ranges.$range_field = (min, max); |
| 446 |
} |
| 447 |
}; |
| 448 |
} |
| 449 |
|
| 450 |
update_range!(bpm, bpm); |
| 451 |
update_range!(duration, duration); |
| 452 |
update_range!(lufs, lufs); |
| 453 |
update_range!(spectral_centroid, spectral_centroid); |
| 454 |
update_range!(spectral_flatness, spectral_flatness); |
| 455 |
update_range!(spectral_rolloff, spectral_rolloff); |
| 456 |
update_range!(zero_crossing_rate, zero_crossing_rate); |
| 457 |
update_range!(onset_strength, onset_strength); |
| 458 |
update_range!(spectral_bandwidth, spectral_bandwidth); |
| 459 |
update_range!(centroid_variance, centroid_variance); |
| 460 |
update_range!(crest_factor, crest_factor); |
| 461 |
update_range!(attack_time, attack_time); |
| 462 |
|
| 463 |
ranges |
| 464 |
} |
| 465 |
|
| 466 |
#[cfg(test)] |
| 467 |
mod tests { |
| 468 |
use super::*; |
| 469 |
use crate::test_helpers::insert_fake_sample; |
| 470 |
use crate::analysis::{self, AnalysisResult}; |
| 471 |
|
| 472 |
fn insert_with_features(db: &Database, hash: &str, bpm: f64, duration: f64) { |
| 473 |
insert_fake_sample(db, hash); |
| 474 |
let result = AnalysisResult { |
| 475 |
hash: hash.to_string(), |
| 476 |
duration, |
| 477 |
sample_rate: 44100, |
| 478 |
channels: 1, |
| 479 |
peak_db: None, |
| 480 |
rms_db: None, |
| 481 |
lufs: Some(-14.0), |
| 482 |
bpm: Some(bpm), |
| 483 |
musical_key: None, |
| 484 |
is_loop: None, |
| 485 |
spectral_centroid: Some(1000.0), |
| 486 |
spectral_flatness: Some(0.5), |
| 487 |
spectral_rolloff: Some(5000.0), |
| 488 |
zero_crossing_rate: Some(0.1), |
| 489 |
onset_strength: Some(20.0), |
| 490 |
classification: None, |
| 491 |
fingerprint: None, |
| 492 |
spectral_bandwidth: Some(2000.0), |
| 493 |
centroid_variance: Some(50000.0), |
| 494 |
crest_factor: Some(3.0), |
| 495 |
attack_time: Some(0.01), |
| 496 |
classification_confidence: None, |
| 497 |
}; |
| 498 |
analysis::save_analysis(db, &result).unwrap(); |
| 499 |
} |
| 500 |
|
| 501 |
#[test] |
| 502 |
fn normalize_values() { |
| 503 |
let fv = FeatureVector { |
| 504 |
bpm: Some(120.0), |
| 505 |
duration: Some(2.0), |
| 506 |
..Default::default() |
| 507 |
}; |
| 508 |
let ranges = NormRanges { |
| 509 |
bpm: (100.0, 200.0), |
| 510 |
duration: (1.0, 3.0), |
| 511 |
..Default::default() |
| 512 |
}; |
| 513 |
let normed = normalize(&fv, &ranges); |
| 514 |
assert!((normed.bpm.unwrap() - 0.2).abs() < 1e-10); |
| 515 |
assert!((normed.duration.unwrap() - 0.5).abs() < 1e-10); |
| 516 |
} |
| 517 |
|
| 518 |
#[test] |
| 519 |
fn distance_zero_for_identical() { |
| 520 |
let fv = FeatureVector { |
| 521 |
bpm: Some(0.5), |
| 522 |
duration: Some(0.5), |
| 523 |
lufs: Some(0.5), |
| 524 |
spectral_centroid: Some(0.5), |
| 525 |
spectral_flatness: Some(0.5), |
| 526 |
spectral_rolloff: Some(0.5), |
| 527 |
zero_crossing_rate: Some(0.5), |
| 528 |
onset_strength: Some(0.5), |
| 529 |
spectral_bandwidth: Some(0.5), |
| 530 |
centroid_variance: Some(0.5), |
| 531 |
crest_factor: Some(0.5), |
| 532 |
attack_time: Some(0.5), |
| 533 |
}; |
| 534 |
let d = feature_distance(&fv, &fv, &FeatureWeights::default()); |
| 535 |
assert!((d - 0.0).abs() < f64::EPSILON); |
| 536 |
} |
| 537 |
|
| 538 |
#[test] |
| 539 |
fn distance_symmetric() { |
| 540 |
let a = FeatureVector { bpm: Some(0.0), duration: Some(1.0), ..Default::default() }; |
| 541 |
let b = FeatureVector { bpm: Some(1.0), duration: Some(0.0), ..Default::default() }; |
| 542 |
let w = FeatureWeights::default(); |
| 543 |
let d1 = feature_distance(&a, &b, &w); |
| 544 |
let d2 = feature_distance(&b, &a, &w); |
| 545 |
assert!((d1 - d2).abs() < f64::EPSILON); |
| 546 |
} |
| 547 |
|
| 548 |
#[test] |
| 549 |
fn ranking_correctness() { |
| 550 |
let db = Database::open_in_memory().unwrap(); |
| 551 |
insert_with_features(&db, "ref", 120.0, 1.0); |
| 552 |
insert_with_features(&db, "close", 122.0, 1.1); |
| 553 |
insert_with_features(&db, "far", 200.0, 10.0); |
| 554 |
|
| 555 |
let results = find_similar(&db, "ref", 10).unwrap(); |
| 556 |
assert_eq!(results.len(), 2); |
| 557 |
assert_eq!(results[0].hash, "close"); |
| 558 |
assert_eq!(results[1].hash, "far"); |
| 559 |
assert!(results[0].distance < results[1].distance); |
| 560 |
} |
| 561 |
|
| 562 |
#[test] |
| 563 |
fn limit_respected() { |
| 564 |
let db = Database::open_in_memory().unwrap(); |
| 565 |
insert_with_features(&db, "ref", 120.0, 1.0); |
| 566 |
insert_with_features(&db, "a", 121.0, 1.0); |
| 567 |
insert_with_features(&db, "b", 122.0, 1.0); |
| 568 |
insert_with_features(&db, "c", 123.0, 1.0); |
| 569 |
|
| 570 |
let results = find_similar(&db, "ref", 2).unwrap(); |
| 571 |
assert_eq!(results.len(), 2); |
| 572 |
} |
| 573 |
|
| 574 |
#[test] |
| 575 |
fn missing_hash_errors() { |
| 576 |
let db = Database::open_in_memory().unwrap(); |
| 577 |
let result = find_similar(&db, "nonexistent", 10); |
| 578 |
assert!(result.is_err()); |
| 579 |
} |
| 580 |
|
| 581 |
|
| 582 |
|
| 583 |
#[test] |
| 584 |
fn index_build_empty() { |
| 585 |
let db = Database::open_in_memory().unwrap(); |
| 586 |
let idx = SimilarityIndex::build(&db).unwrap(); |
| 587 |
assert!(idx.is_empty()); |
| 588 |
assert_eq!(idx.len(), 0); |
| 589 |
} |
| 590 |
|
| 591 |
#[test] |
| 592 |
fn index_ranking_matches_linear() { |
| 593 |
let db = Database::open_in_memory().unwrap(); |
| 594 |
insert_with_features(&db, "ref", 120.0, 1.0); |
| 595 |
insert_with_features(&db, "close", 122.0, 1.1); |
| 596 |
insert_with_features(&db, "far", 200.0, 10.0); |
| 597 |
|
| 598 |
let linear = find_similar(&db, "ref", 10).unwrap(); |
| 599 |
let idx = SimilarityIndex::build(&db).unwrap(); |
| 600 |
let ref_features = load_features(&db, "ref").unwrap(); |
| 601 |
let indexed = idx.find_similar("ref", &ref_features, 10); |
| 602 |
|
| 603 |
|
| 604 |
assert_eq!(linear.len(), indexed.len()); |
| 605 |
for (l, i) in linear.iter().zip(indexed.iter()) { |
| 606 |
assert_eq!(l.hash, i.hash, "Ranking order differs"); |
| 607 |
} |
| 608 |
} |
| 609 |
|
| 610 |
#[test] |
| 611 |
fn index_limit_respected() { |
| 612 |
let db = Database::open_in_memory().unwrap(); |
| 613 |
insert_with_features(&db, "ref", 120.0, 1.0); |
| 614 |
insert_with_features(&db, "a", 121.0, 1.0); |
| 615 |
insert_with_features(&db, "b", 122.0, 1.0); |
| 616 |
insert_with_features(&db, "c", 123.0, 1.0); |
| 617 |
|
| 618 |
let idx = SimilarityIndex::build(&db).unwrap(); |
| 619 |
let ref_features = load_features(&db, "ref").unwrap(); |
| 620 |
let results = idx.find_similar("ref", &ref_features, 2); |
| 621 |
assert_eq!(results.len(), 2); |
| 622 |
} |
| 623 |
|
| 624 |
#[test] |
| 625 |
fn index_excludes_self() { |
| 626 |
let db = Database::open_in_memory().unwrap(); |
| 627 |
insert_with_features(&db, "only", 120.0, 1.0); |
| 628 |
|
| 629 |
let idx = SimilarityIndex::build(&db).unwrap(); |
| 630 |
let features = load_features(&db, "only").unwrap(); |
| 631 |
let results = idx.find_similar("only", &features, 10); |
| 632 |
assert!(results.is_empty()); |
| 633 |
} |
| 634 |
|
| 635 |
#[test] |
| 636 |
fn index_sorted_by_distance() { |
| 637 |
let db = Database::open_in_memory().unwrap(); |
| 638 |
insert_with_features(&db, "ref", 120.0, 1.0); |
| 639 |
insert_with_features(&db, "a", 125.0, 2.0); |
| 640 |
insert_with_features(&db, "b", 130.0, 3.0); |
| 641 |
insert_with_features(&db, "c", 140.0, 5.0); |
| 642 |
insert_with_features(&db, "d", 200.0, 10.0); |
| 643 |
|
| 644 |
let idx = SimilarityIndex::build(&db).unwrap(); |
| 645 |
let ref_features = load_features(&db, "ref").unwrap(); |
| 646 |
let results = idx.find_similar("ref", &ref_features, 10); |
| 647 |
|
| 648 |
for w in results.windows(2) { |
| 649 |
assert!( |
| 650 |
w[0].distance <= w[1].distance, |
| 651 |
"Results not sorted: {} > {}", |
| 652 |
w[0].distance, |
| 653 |
w[1].distance |
| 654 |
); |
| 655 |
} |
| 656 |
} |
| 657 |
} |
| 658 |
|