max / audiofiles
4 files changed,
+84 insertions,
-19 deletions
| @@ -539,13 +539,20 @@ impl Backend for DirectBackend { | |||
| 539 | 539 | hash: &str, | |
| 540 | 540 | limit: usize, | |
| 541 | 541 | ) -> BackendResult<Vec<similarity::SimilarResult>> { | |
| 542 | - | let db = self.db.lock(); | |
| 543 | 542 | // Build VP-tree index lazily on first query. | |
| 543 | + | // Load data under DB lock, release lock, then build tree (CPU-intensive). | |
| 544 | 544 | let mut idx = self.similarity_index.lock(); | |
| 545 | 545 | if idx.is_none() { | |
| 546 | - | *idx = Some(similarity::SimilarityIndex::build(&db)?); | |
| 546 | + | let data = { | |
| 547 | + | let db = self.db.lock(); | |
| 548 | + | similarity::SimilarityIndex::load_data(&db)? | |
| 549 | + | }; | |
| 550 | + | *idx = Some(similarity::SimilarityIndex::build_from_data(data)); | |
| 547 | 551 | } | |
| 548 | - | let features = similarity::load_features(&db, hash)?; | |
| 552 | + | let features = { | |
| 553 | + | let db = self.db.lock(); | |
| 554 | + | similarity::load_features(&db, hash)? | |
| 555 | + | }; | |
| 549 | 556 | Ok(idx.as_ref().unwrap().find_similar(hash, &features, limit)) | |
| 550 | 557 | } | |
| 551 | 558 | ||
| @@ -554,13 +561,20 @@ impl Backend for DirectBackend { | |||
| 554 | 561 | hash: &str, | |
| 555 | 562 | limit: usize, | |
| 556 | 563 | ) -> BackendResult<Vec<fingerprint::DuplicateResult>> { | |
| 557 | - | let db = self.db.lock(); | |
| 558 | 564 | // Build VP-tree index lazily on first query. | |
| 565 | + | // Load data under DB lock, release lock, then build tree (CPU-intensive). | |
| 559 | 566 | let mut idx = self.fingerprint_index.lock(); | |
| 560 | 567 | if idx.is_none() { | |
| 561 | - | *idx = Some(fingerprint::FingerprintIndex::build(&db)?); | |
| 568 | + | let entries = { | |
| 569 | + | let db = self.db.lock(); | |
| 570 | + | fingerprint::FingerprintIndex::load_data(&db)? | |
| 571 | + | }; | |
| 572 | + | *idx = Some(fingerprint::FingerprintIndex::build_from_data(entries)); | |
| 562 | 573 | } | |
| 563 | - | let reference = fingerprint::load_fingerprint(&db, hash)?; | |
| 574 | + | let reference = { | |
| 575 | + | let db = self.db.lock(); | |
| 576 | + | fingerprint::load_fingerprint(&db, hash)? | |
| 577 | + | }; | |
| 564 | 578 | Ok(idx | |
| 565 | 579 | .as_ref() | |
| 566 | 580 | .unwrap() |
| @@ -202,9 +202,42 @@ fn map_export_item(row: &rusqlite::Row) -> rusqlite::Result<Option<ExportItem>> | |||
| 202 | 202 | ||
| 203 | 203 | /// Populate the `tags` field on each export item by querying the database. | |
| 204 | 204 | pub fn enrich_with_tags(db: &Database, items: &mut [ExportItem]) { | |
| 205 | + | if items.is_empty() { | |
| 206 | + | return; | |
| 207 | + | } | |
| 208 | + | ||
| 209 | + | // Batch query: fetch all tags for all hashes in one statement. | |
| 210 | + | let hashes: Vec<String> = items.iter().map(|i| i.hash.to_string()).collect(); | |
| 211 | + | let mut tag_map = std::collections::HashMap::<String, Vec<String>>::new(); | |
| 212 | + | ||
| 213 | + | // SQLite variable limit is 999 in older builds; chunk to stay safe. | |
| 214 | + | for chunk in hashes.chunks(500) { | |
| 215 | + | let placeholders: String = chunk.iter().enumerate() | |
| 216 | + | .map(|(i, _)| format!("?{}", i + 1)) | |
| 217 | + | .collect::<Vec<_>>() | |
| 218 | + | .join(", "); | |
| 219 | + | let sql = format!( | |
| 220 | + | "SELECT sample_hash, tag FROM tags WHERE sample_hash IN ({}) ORDER BY tag", | |
| 221 | + | placeholders, | |
| 222 | + | ); | |
| 223 | + | if let Ok(mut stmt) = db.conn().prepare(&sql) { | |
| 224 | + | let params: Vec<&dyn rusqlite::types::ToSql> = chunk | |
| 225 | + | .iter() | |
| 226 | + | .map(|h| h as &dyn rusqlite::types::ToSql) | |
| 227 | + | .collect(); | |
| 228 | + | if let Ok(rows) = stmt.query_map(params.as_slice(), |row| { | |
| 229 | + | Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?)) | |
| 230 | + | }) { | |
| 231 | + | for row in rows.flatten() { | |
| 232 | + | tag_map.entry(row.0).or_default().push(row.1); | |
| 233 | + | } | |
| 234 | + | } | |
| 235 | + | } | |
| 236 | + | } | |
| 237 | + | ||
| 205 | 238 | for item in items.iter_mut() { | |
| 206 | - | if let Ok(t) = tags::get_sample_tags(db, &item.hash) { | |
| 207 | - | item.tags = t; | |
| 239 | + | if let Some(tags) = tag_map.remove(item.hash.as_str()) { | |
| 240 | + | item.tags = tags; | |
| 208 | 241 | } | |
| 209 | 242 | } | |
| 210 | 243 | } |
| @@ -249,10 +249,10 @@ const SUMMARY_BINS: usize = 16; | |||
| 249 | 249 | const SUMMARY_SEARCH_RADIUS: f64 = 1.0; | |
| 250 | 250 | ||
| 251 | 251 | /// Entry stored in the VP-tree: hash + full envelope + compact summary features. | |
| 252 | - | struct FingerprintEntry { | |
| 253 | - | hash: String, | |
| 254 | - | envelope: Vec<u8>, | |
| 255 | - | features: [f64; SUMMARY_BINS], | |
| 252 | + | pub struct FingerprintEntry { | |
| 253 | + | pub(crate) hash: String, | |
| 254 | + | pub(crate) envelope: Vec<u8>, | |
| 255 | + | pub(crate) features: [f64; SUMMARY_BINS], | |
| 256 | 256 | } | |
| 257 | 257 | ||
| 258 | 258 | /// Compute compact features from an envelope: 16 mean-amplitude bins in [0, 1]. | |
| @@ -302,7 +302,8 @@ pub struct FingerprintIndex { | |||
| 302 | 302 | impl FingerprintIndex { | |
| 303 | 303 | /// Build an index from all fingerprints in the database. | |
| 304 | 304 | #[instrument(skip_all)] | |
| 305 | - | pub fn build(db: &Database) -> Result<Self> { | |
| 305 | + | /// Load raw fingerprint data from the database (fast, just I/O). | |
| 306 | + | pub fn load_data(db: &Database) -> Result<Vec<FingerprintEntry>> { | |
| 306 | 307 | let mut stmt = db.conn().prepare( | |
| 307 | 308 | "SELECT hash, envelope FROM fingerprints", | |
| 308 | 309 | )?; | |
| @@ -318,10 +319,18 @@ impl FingerprintIndex { | |||
| 318 | 319 | }) | |
| 319 | 320 | })? | |
| 320 | 321 | .collect::<std::result::Result<Vec<_>, _>>()?; | |
| 322 | + | Ok(entries) | |
| 323 | + | } | |
| 321 | 324 | ||
| 325 | + | /// Build the index from pre-loaded data (CPU-intensive, no DB needed). | |
| 326 | + | pub fn build_from_data(entries: Vec<FingerprintEntry>) -> Self { | |
| 322 | 327 | let tree = VpTree::build(entries, summary_distance); | |
| 328 | + | Self { tree } | |
| 329 | + | } | |
| 323 | 330 | ||
| 324 | - | Ok(Self { tree }) | |
| 331 | + | pub fn build(db: &Database) -> Result<Self> { | |
| 332 | + | let entries = Self::load_data(db)?; | |
| 333 | + | Ok(Self::build_from_data(entries)) | |
| 325 | 334 | } | |
| 326 | 335 | ||
| 327 | 336 | /// Number of fingerprints in the index. |
| @@ -328,7 +328,8 @@ pub struct SimilarityIndex { | |||
| 328 | 328 | impl SimilarityIndex { | |
| 329 | 329 | /// Build an index from all analysed samples in the database. | |
| 330 | 330 | #[instrument(skip_all)] | |
| 331 | - | pub fn build(db: &Database) -> Result<Self> { | |
| 331 | + | /// Load raw feature data from the database (fast, just I/O). | |
| 332 | + | pub fn load_data(db: &Database) -> Result<Vec<(String, FeatureVector)>> { | |
| 332 | 333 | let mut stmt = db.conn().prepare( | |
| 333 | 334 | "SELECT hash, bpm, duration, lufs, spectral_centroid, spectral_flatness, | |
| 334 | 335 | spectral_rolloff, zero_crossing_rate, onset_strength, | |
| @@ -356,15 +357,18 @@ impl SimilarityIndex { | |||
| 356 | 357 | )) | |
| 357 | 358 | })? | |
| 358 | 359 | .collect::<std::result::Result<Vec<_>, _>>()?; | |
| 360 | + | Ok(all) | |
| 361 | + | } | |
| 359 | 362 | ||
| 363 | + | /// Build the index from pre-loaded data (CPU-intensive, no DB needed). | |
| 364 | + | pub fn build_from_data(all: Vec<(String, FeatureVector)>) -> Self { | |
| 360 | 365 | if all.is_empty() { | |
| 361 | - | return Ok(Self { | |
| 366 | + | return Self { | |
| 362 | 367 | tree: VpTree::build(vec![], entry_distance), | |
| 363 | 368 | ranges: NormRanges::default(), | |
| 364 | - | }); | |
| 369 | + | }; | |
| 365 | 370 | } | |
| 366 | 371 | ||
| 367 | - | // Compute ranges from the full dataset (fixed, not per-query). | |
| 368 | 372 | let ranges = compute_ranges_all(&all); | |
| 369 | 373 | ||
| 370 | 374 | let entries: Vec<SimilarityEntry> = all | |
| @@ -377,7 +381,12 @@ impl SimilarityIndex { | |||
| 377 | 381 | ||
| 378 | 382 | let tree = VpTree::build(entries, entry_distance); | |
| 379 | 383 | ||
| 380 | - | Ok(Self { tree, ranges }) | |
| 384 | + | Self { tree, ranges } | |
| 385 | + | } | |
| 386 | + | ||
| 387 | + | pub fn build(db: &Database) -> Result<Self> { | |
| 388 | + | let all = Self::load_data(db)?; | |
| 389 | + | Ok(Self::build_from_data(all)) | |
| 381 | 390 | } | |
| 382 | 391 | ||
| 383 | 392 | /// Number of samples in the index. |