Skip to main content

max / tagtree

57.9 KB · 1871 lines History Blame Raw
1 //! Hierarchical dot-notation tag standard.
2 //!
3 //! Tags are lowercase strings with dot-separated segments representing a hierarchy:
4 //! `genre.electronic.house`, `work.meeting.standup`, `news.tech.rust`.
5 //!
6 //! # Format (invariants enforced by all configs)
7 //!
8 //! - Segments: lowercase alphanumeric and hyphens (`[a-z0-9-]`)
9 //! - Separator: `.` (dot)
10 //! - No empty segments, no leading/trailing dots, no consecutive dots
11 //!
12 //! # Per-app configuration
13 //!
14 //! Each app provides a [`TagConfig`] that controls:
15 //! - **`max_depth`** — maximum number of segments (e.g., 5 for AF, 3 for GO)
16 //! - **`max_length`** — maximum character length of the entire tag string
17 //! - **`semantic_depth`** — how many leading segments carry dispatch meaning
18 //! (e.g., 1 means the first segment is a namespace like `genre`, `mood`)
19 //!
20 //! # SQL
21 //!
22 //! Hierarchy queries use `LIKE` prefix matching, which works identically on SQLite
23 //! and PostgreSQL. Use [`like_descendant_pattern`] to build the pattern and
24 //! [`escape_like`] to sanitize user input embedded in patterns.
25
26 use std::fmt;
27
28 /// Separator between tag levels.
29 pub const SEPARATOR: char = '.';
30
31 // ---------------------------------------------------------------------------
32 // Configuration
33 // ---------------------------------------------------------------------------
34
35 /// Per-app tag rules. Each consumer defines one of these as a constant.
36 ///
37 /// ```
38 /// use tagtree::TagConfig;
39 ///
40 /// // audiofiles: deep hierarchy, namespace-driven
41 /// const AF_TAGS: TagConfig = TagConfig { max_depth: 5, max_length: 100, semantic_depth: 1 };
42 ///
43 /// // goingson: shallow tags, no required prefix
44 /// const GO_TAGS: TagConfig = TagConfig { max_depth: 3, max_length: 60, semantic_depth: 0 };
45 /// ```
46 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
47 pub struct TagConfig {
48 /// Maximum number of segments allowed (e.g., 5 means `a.b.c.d.e` is valid).
49 pub max_depth: usize,
50 /// Maximum character length of the entire tag string.
51 pub max_length: usize,
52 /// Number of leading segments that carry app-specific dispatch meaning.
53 ///
54 /// - `0` — no semantic prefix; all segments are free-form hierarchy
55 /// - `1` — first segment is a namespace (e.g., `genre`, `mood`, `source`)
56 /// - `2` — first two segments are structured (e.g., `type.subtype`)
57 ///
58 /// When > 0, [`validate_with`] enforces that the tag has at least this many
59 /// segments, and [`semantic_prefix`] / [`free_suffix`] can split the tag.
60 pub semantic_depth: usize,
61 }
62
63 /// Error returned when a tag string is invalid.
64 #[derive(Debug, Clone, PartialEq, Eq)]
65 pub struct TagError(pub String);
66
67 impl fmt::Display for TagError {
68 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
69 write!(f, "invalid tag: {}", self.0)
70 }
71 }
72
73 impl std::error::Error for TagError {}
74
75 // ---------------------------------------------------------------------------
76 // Validation
77 // ---------------------------------------------------------------------------
78
79 /// Validate a tag against an app-specific [`TagConfig`].
80 ///
81 /// Checks format invariants (charset, no empty segments) plus the config's
82 /// depth, length, and semantic-depth constraints.
83 ///
84 /// ```
85 /// use tagtree::{TagConfig, validate_with};
86 ///
87 /// const CFG: TagConfig = TagConfig { max_depth: 3, max_length: 50, semantic_depth: 1 };
88 ///
89 /// assert!(validate_with("genre.rock", &CFG).is_ok());
90 /// assert!(validate_with("genre.rock.punk", &CFG).is_ok()); // depth 3
91 /// assert!(validate_with("genre.rock.punk.oi", &CFG).is_err()); // depth 4 > max 3
92 /// assert!(validate_with("rock", &CFG).is_err()); // semantic_depth=1 requires 2+ segments
93 /// ```
94 pub fn validate_with(tag: &str, config: &TagConfig) -> Result<(), TagError> {
95 if tag.is_empty() {
96 return Err(TagError("tag cannot be empty".into()));
97 }
98 if tag.len() > config.max_length {
99 return Err(TagError(format!(
100 "tag exceeds {} characters",
101 config.max_length
102 )));
103 }
104 if tag.starts_with(SEPARATOR) || tag.ends_with(SEPARATOR) {
105 return Err(TagError("tag cannot start or end with a dot".into()));
106 }
107 if tag.contains("..") {
108 return Err(TagError("tag cannot contain consecutive dots".into()));
109 }
110 for ch in tag.chars() {
111 if !(ch.is_ascii_lowercase() || ch.is_ascii_digit() || ch == '-' || ch == SEPARATOR) {
112 return Err(TagError(format!(
113 "invalid character '{ch}': only lowercase alphanumeric, hyphens, and dots allowed"
114 )));
115 }
116 }
117 let d = depth(tag);
118 if d > config.max_depth {
119 return Err(TagError(format!(
120 "tag has {} levels, maximum is {}",
121 d, config.max_depth
122 )));
123 }
124 if config.semantic_depth > 0 && d < config.semantic_depth + 1 {
125 return Err(TagError(format!(
126 "tag must have at least {} segments (semantic prefix requires {} + at least 1 value)",
127 config.semantic_depth + 1,
128 config.semantic_depth
129 )));
130 }
131 Ok(())
132 }
133
134 /// Validate a tag with default limits (max_depth=5, max_length=100, no semantic prefix).
135 ///
136 /// Convenience wrapper for quick validation without a config.
137 ///
138 /// ```
139 /// # use tagtree::validate;
140 /// assert!(validate("genre.electronic.house").is_ok());
141 /// assert!(validate("a.b.c.d.e.f").is_err()); // 6 levels > default max 5
142 /// ```
143 pub fn validate(tag: &str) -> Result<(), TagError> {
144 const DEFAULT: TagConfig = TagConfig {
145 max_depth: 5,
146 max_length: 100,
147 semantic_depth: 0,
148 };
149 validate_with(tag, &DEFAULT)
150 }
151
152 // ---------------------------------------------------------------------------
153 // Hierarchy parsing
154 // ---------------------------------------------------------------------------
155
156 /// Get the parent path of a tag, or `None` if it's a root-level tag.
157 ///
158 /// ```
159 /// # use tagtree::parent;
160 /// assert_eq!(parent("genre.electronic.house"), Some("genre.electronic"));
161 /// assert_eq!(parent("genre"), None);
162 /// ```
163 pub fn parent(tag: &str) -> Option<&str> {
164 tag.rfind(SEPARATOR).map(|pos| &tag[..pos])
165 }
166
167 /// Get the leaf (last segment) of a tag.
168 ///
169 /// ```
170 /// # use tagtree::leaf;
171 /// assert_eq!(leaf("genre.electronic.house"), "house");
172 /// assert_eq!(leaf("genre"), "genre");
173 /// ```
174 pub fn leaf(tag: &str) -> &str {
175 match tag.rfind(SEPARATOR) {
176 Some(pos) => &tag[pos + 1..],
177 None => tag,
178 }
179 }
180
181 /// Count the depth (number of segments) in a tag.
182 ///
183 /// ```
184 /// # use tagtree::depth;
185 /// assert_eq!(depth("genre"), 1);
186 /// assert_eq!(depth("genre.electronic"), 2);
187 /// assert_eq!(depth("genre.electronic.house"), 3);
188 /// ```
189 pub fn depth(tag: &str) -> usize {
190 if tag.is_empty() {
191 return 0;
192 }
193 tag.chars().filter(|&c| c == SEPARATOR).count() + 1
194 }
195
196 /// Get the Nth segment (0-indexed) from a tag, or `None` if out of bounds.
197 ///
198 /// ```
199 /// # use tagtree::segment;
200 /// assert_eq!(segment("genre.electronic.house", 0), Some("genre"));
201 /// assert_eq!(segment("genre.electronic.house", 1), Some("electronic"));
202 /// assert_eq!(segment("genre.electronic.house", 2), Some("house"));
203 /// assert_eq!(segment("genre.electronic.house", 3), None);
204 /// ```
205 pub fn segment(tag: &str, index: usize) -> Option<&str> {
206 tag.split(SEPARATOR).nth(index)
207 }
208
209 /// Get the path formed by the first `n` segments, or `None` if the tag has fewer.
210 ///
211 /// ```
212 /// # use tagtree::prefix_at_depth;
213 /// assert_eq!(prefix_at_depth("genre.electronic.house", 1), Some("genre"));
214 /// assert_eq!(prefix_at_depth("genre.electronic.house", 2), Some("genre.electronic"));
215 /// assert_eq!(prefix_at_depth("genre.electronic.house", 3), Some("genre.electronic.house"));
216 /// assert_eq!(prefix_at_depth("genre.electronic.house", 4), None);
217 /// ```
218 pub fn prefix_at_depth(tag: &str, n: usize) -> Option<&str> {
219 if n == 0 {
220 return None;
221 }
222 let mut dots_seen = 0;
223 for (i, ch) in tag.bytes().enumerate() {
224 if ch == SEPARATOR as u8 {
225 dots_seen += 1;
226 if dots_seen == n {
227 return Some(&tag[..i]);
228 }
229 }
230 }
231 // If we've seen fewer dots than n-1, the tag doesn't have n segments
232 if dots_seen == n - 1 {
233 Some(tag)
234 } else {
235 None
236 }
237 }
238
239 /// Get all ancestor paths from root to parent (excludes the tag itself).
240 ///
241 /// ```
242 /// # use tagtree::ancestors;
243 /// assert_eq!(ancestors("genre.electronic.house"), vec!["genre", "genre.electronic"]);
244 /// assert!(ancestors("genre").is_empty());
245 /// ```
246 pub fn ancestors(tag: &str) -> Vec<&str> {
247 let mut result = Vec::new();
248 let mut remaining = tag;
249 while let Some(pos) = remaining.rfind(SEPARATOR) {
250 remaining = &remaining[..pos];
251 result.push(remaining);
252 }
253 result.reverse();
254 result
255 }
256
257 /// Check whether `ancestor` is an ancestor of `descendant`.
258 ///
259 /// A tag is NOT considered its own ancestor.
260 ///
261 /// ```
262 /// # use tagtree::is_ancestor_of;
263 /// assert!(is_ancestor_of("genre", "genre.electronic"));
264 /// assert!(is_ancestor_of("genre", "genre.electronic.house"));
265 /// assert!(!is_ancestor_of("genre.electronic", "genre"));
266 /// assert!(!is_ancestor_of("genre", "genre")); // not self
267 /// assert!(!is_ancestor_of("gen", "genre.rock")); // not a segment boundary
268 /// ```
269 pub fn is_ancestor_of(ancestor: &str, descendant: &str) -> bool {
270 descendant.len() > ancestor.len()
271 && descendant.starts_with(ancestor)
272 && descendant.as_bytes()[ancestor.len()] == SEPARATOR as u8
273 }
274
275 /// Find the longest common ancestor of two tags, or `None` if they share no prefix.
276 ///
277 /// ```
278 /// # use tagtree::common_ancestor;
279 /// assert_eq!(common_ancestor("genre.electronic.house", "genre.electronic.techno"), Some("genre.electronic"));
280 /// assert_eq!(common_ancestor("genre.rock", "genre.electronic"), Some("genre"));
281 /// assert_eq!(common_ancestor("genre.rock", "mood.dark"), None);
282 /// assert_eq!(common_ancestor("genre", "genre"), None); // same tag, no ancestor
283 /// ```
284 pub fn common_ancestor<'a>(a: &'a str, b: &str) -> Option<&'a str> {
285 let mut last_sep = None;
286 for (i, (ca, cb)) in a.bytes().zip(b.bytes()).enumerate() {
287 if ca != cb {
288 break;
289 }
290 if ca == SEPARATOR as u8 {
291 last_sep = Some(i);
292 }
293 }
294 // If one is a prefix of the other and ends at a separator boundary,
295 // the shorter one is the common ancestor.
296 let min_len = a.len().min(b.len());
297 if a.len() != b.len()
298 && a.as_bytes()[..min_len] == b.as_bytes()[..min_len]
299 && (min_len == a.len() || min_len == b.len())
300 {
301 let longer = if a.len() > b.len() { a } else { b };
302 if longer.as_bytes()[min_len] == SEPARATOR as u8 {
303 return Some(&a[..min_len]);
304 }
305 }
306 last_sep.map(|pos| &a[..pos])
307 }
308
309 // ---------------------------------------------------------------------------
310 // Semantic prefix helpers
311 // ---------------------------------------------------------------------------
312
313 /// Extract the semantic prefix (first `n` segments) from a tag.
314 ///
315 /// Returns `None` if the tag has fewer than `n` segments.
316 /// This is equivalent to `prefix_at_depth(tag, n)` but named for clarity
317 /// when used with [`TagConfig::semantic_depth`].
318 ///
319 /// ```
320 /// # use tagtree::semantic_prefix;
321 /// // AF uses semantic_depth=1: first segment is the namespace
322 /// assert_eq!(semantic_prefix("genre.electronic.house", 1), Some("genre"));
323 ///
324 /// // A hypothetical app with semantic_depth=2
325 /// assert_eq!(semantic_prefix("type.audio.podcast", 2), Some("type.audio"));
326 ///
327 /// // No semantic prefix
328 /// assert_eq!(semantic_prefix("rock", 0), None);
329 /// ```
330 pub fn semantic_prefix(tag: &str, semantic_depth: usize) -> Option<&str> {
331 if semantic_depth == 0 {
332 return None;
333 }
334 prefix_at_depth(tag, semantic_depth)
335 }
336
337 /// Extract the free-form suffix after the semantic prefix.
338 ///
339 /// Returns `None` if the tag has no segments beyond the semantic prefix.
340 ///
341 /// ```
342 /// # use tagtree::free_suffix;
343 /// assert_eq!(free_suffix("genre.electronic.house", 1), Some("electronic.house"));
344 /// assert_eq!(free_suffix("genre.rock", 1), Some("rock"));
345 /// assert_eq!(free_suffix("genre", 1), None); // no suffix beyond prefix
346 ///
347 /// // semantic_depth=0 means the whole tag is free-form
348 /// assert_eq!(free_suffix("genre.rock", 0), Some("genre.rock"));
349 /// ```
350 pub fn free_suffix(tag: &str, semantic_depth: usize) -> Option<&str> {
351 if semantic_depth == 0 {
352 if tag.is_empty() {
353 return None;
354 }
355 return Some(tag);
356 }
357 let prefix = prefix_at_depth(tag, semantic_depth)?;
358 if prefix.len() == tag.len() {
359 // Tag is exactly the semantic prefix, no suffix
360 None
361 } else {
362 Some(&tag[prefix.len() + 1..]) // skip the dot
363 }
364 }
365
366 // ---------------------------------------------------------------------------
367 // Tree operations on tag sets (in-memory)
368 // ---------------------------------------------------------------------------
369
370 /// List the immediate child segment names under a prefix.
371 ///
372 /// Given a set of tags and a prefix, returns the unique next-level segment names
373 /// (not full paths). Pass an empty string for root-level children.
374 ///
375 /// ```
376 /// # use tagtree::children_at_prefix;
377 /// let tags = ["genre.electronic.house", "genre.electronic.techno", "genre.rock", "mood.dark"];
378 /// assert_eq!(children_at_prefix("", &tags), vec!["genre", "mood"]);
379 /// assert_eq!(children_at_prefix("genre", &tags), vec!["electronic", "rock"]);
380 /// assert_eq!(children_at_prefix("genre.electronic", &tags), vec!["house", "techno"]);
381 /// ```
382 pub fn children_at_prefix(prefix: &str, tags: &[impl AsRef<str>]) -> Vec<String> {
383 let strip_len = if prefix.is_empty() {
384 0
385 } else {
386 prefix.len() + 1 // prefix + dot
387 };
388
389 let mut children: Vec<String> = Vec::new();
390 for tag in tags {
391 let tag = tag.as_ref();
392 let matches = if prefix.is_empty() {
393 true
394 } else {
395 tag.starts_with(prefix)
396 && tag.len() > prefix.len()
397 && tag.as_bytes()[prefix.len()] == SEPARATOR as u8
398 };
399 if !matches || tag.len() <= strip_len {
400 continue;
401 }
402 let suffix = &tag[strip_len..];
403 let child = match suffix.find(SEPARATOR) {
404 Some(pos) => &suffix[..pos],
405 None => suffix,
406 };
407 if !child.is_empty() && !children.iter().any(|c| c == child) {
408 children.push(child.to_string());
409 }
410 }
411 children.sort();
412 children
413 }
414
415 /// Get all tags that are descendants of a prefix (including the prefix itself if present).
416 ///
417 /// ```
418 /// # use tagtree::subtree;
419 /// let tags = ["genre.electronic.house", "genre.electronic.techno", "genre.rock", "mood.dark"];
420 /// let sub = subtree("genre.electronic", &tags);
421 /// assert_eq!(sub, vec!["genre.electronic.house", "genre.electronic.techno"]);
422 /// ```
423 pub fn subtree<'a>(prefix: &str, tags: &'a [impl AsRef<str>]) -> Vec<&'a str> {
424 tags.iter()
425 .map(|t| t.as_ref())
426 .filter(|tag| {
427 *tag == prefix
428 || (tag.starts_with(prefix)
429 && tag.len() > prefix.len()
430 && tag.as_bytes()[prefix.len()] == SEPARATOR as u8)
431 })
432 .collect()
433 }
434
435 // ---------------------------------------------------------------------------
436 // SQL helpers
437 // ---------------------------------------------------------------------------
438
439 /// Escape special characters in a string for use in SQL `LIKE` patterns.
440 ///
441 /// Handles `\`, `%`, and `_`. Use with `ESCAPE '\'` in the SQL query.
442 ///
443 /// ```
444 /// # use tagtree::escape_like;
445 /// assert_eq!(escape_like("100%"), r"100\%");
446 /// assert_eq!(escape_like("a_b"), r"a\_b");
447 /// ```
448 pub fn escape_like(s: &str) -> String {
449 s.replace('\\', "\\\\")
450 .replace('%', "\\%")
451 .replace('_', "\\_")
452 }
453
454 /// Build a `LIKE` pattern that matches all descendants of a prefix (not the prefix itself).
455 ///
456 /// For use in SQL: `WHERE tag = ?1 OR tag LIKE ?2 ESCAPE '\'`
457 /// where `?1` is the prefix itself and `?2` is this pattern.
458 ///
459 /// ```
460 /// # use tagtree::like_descendant_pattern;
461 /// assert_eq!(like_descendant_pattern("genre"), r"genre.%");
462 /// assert_eq!(like_descendant_pattern("a-b"), r"a-b.%");
463 /// ```
464 pub fn like_descendant_pattern(prefix: &str) -> String {
465 format!("{}.%", escape_like(prefix))
466 }
467
468 // ---------------------------------------------------------------------------
469 // Rename / move
470 // ---------------------------------------------------------------------------
471
472 /// Rename a prefix within a tag. Returns `Some(new_tag)` if the tag starts with
473 /// `old_prefix` at a segment boundary, or `None` if it doesn't match.
474 ///
475 /// ```
476 /// # use tagtree::rename_prefix;
477 /// assert_eq!(
478 /// rename_prefix("genre.electronic", "genre.dance", "genre.electronic.house"),
479 /// Some("genre.dance.house".to_string())
480 /// );
481 /// assert_eq!(
482 /// rename_prefix("genre.electronic", "genre.dance", "genre.electronic"),
483 /// Some("genre.dance".to_string())
484 /// );
485 /// assert_eq!(rename_prefix("gen", "xxx", "genre.rock"), None);
486 /// ```
487 pub fn rename_prefix(old_prefix: &str, new_prefix: &str, tag: &str) -> Option<String> {
488 if tag == old_prefix {
489 Some(new_prefix.to_string())
490 } else if is_ancestor_of(old_prefix, tag) {
491 Some(format!("{}{}", new_prefix, &tag[old_prefix.len()..]))
492 } else {
493 None
494 }
495 }
496
497 // ---------------------------------------------------------------------------
498 // Edit distance (private)
499 // ---------------------------------------------------------------------------
500
501 /// Levenshtein distance with early termination. Returns `None` if the distance
502 /// exceeds `max`. Uses single-row DP (O(min(m,n)) space).
503 fn edit_distance(a: &str, b: &str, max: usize) -> Option<usize> {
504 let a = a.as_bytes();
505 let b = b.as_bytes();
506 let (a, b) = if a.len() > b.len() { (b, a) } else { (a, b) };
507 // Short-circuit: length difference alone exceeds threshold
508 if b.len() - a.len() > max {
509 return None;
510 }
511 let mut row: Vec<usize> = (0..=a.len()).collect();
512 for j in 1..=b.len() {
513 let mut prev = row[0];
514 row[0] = j;
515 let mut row_min = row[0];
516 for i in 1..=a.len() {
517 let cost = if a[i - 1] == b[j - 1] { 0 } else { 1 };
518 let val = (row[i] + 1).min(row[i - 1] + 1).min(prev + cost);
519 prev = row[i];
520 row[i] = val;
521 row_min = row_min.min(val);
522 }
523 if row_min > max {
524 return None;
525 }
526 }
527 let d = row[a.len()];
528 if d <= max { Some(d) } else { None }
529 }
530
531 // ---------------------------------------------------------------------------
532 // Bulk tag operations
533 // ---------------------------------------------------------------------------
534
535 /// Rename a prefix across all tags in an index. Returns the number of tags modified.
536 ///
537 /// ```
538 /// # use tagtree::{TagIndex, rename_prefix_bulk};
539 /// let mut idx = TagIndex::new(vec![
540 /// "genre.electronic.house".into(),
541 /// "genre.electronic.techno".into(),
542 /// "genre.rock".into(),
543 /// ]);
544 /// assert_eq!(rename_prefix_bulk("genre.electronic", "genre.dance", &mut idx), 2);
545 /// assert!(idx.contains("genre.dance.house"));
546 /// assert!(idx.contains("genre.dance.techno"));
547 /// assert!(!idx.contains("genre.electronic.house"));
548 /// ```
549 pub fn rename_prefix_bulk(old_prefix: &str, new_prefix: &str, index: &mut TagIndex) -> usize {
550 let mut count = 0;
551 let new_tags: Vec<String> = index
552 .tags
553 .iter()
554 .map(|tag| {
555 if let Some(renamed) = rename_prefix(old_prefix, new_prefix, tag) {
556 count += 1;
557 renamed
558 } else {
559 tag.clone()
560 }
561 })
562 .collect();
563 if count > 0 {
564 index.rebuild(new_tags);
565 }
566 count
567 }
568
569 /// Remove all tags matching a prefix (the prefix itself and all descendants).
570 /// Returns the number of tags removed.
571 ///
572 /// ```
573 /// # use tagtree::{TagIndex, remove_subtree};
574 /// let mut idx = TagIndex::new(vec![
575 /// "genre.electronic.house".into(),
576 /// "genre.electronic.techno".into(),
577 /// "genre.rock".into(),
578 /// ]);
579 /// assert_eq!(remove_subtree("genre.electronic", &mut idx), 2);
580 /// assert_eq!(idx.len(), 1);
581 /// assert!(idx.contains("genre.rock"));
582 /// ```
583 pub fn remove_subtree(prefix: &str, index: &mut TagIndex) -> usize {
584 let before = index.tags.len();
585 let remaining: Vec<String> = index
586 .tags
587 .iter()
588 .filter(|tag| tag.as_str() != prefix && !is_ancestor_of(prefix, tag))
589 .cloned()
590 .collect();
591 let removed = before - remaining.len();
592 if removed > 0 {
593 index.rebuild(remaining);
594 }
595 removed
596 }
597
598 /// Merge one tag into another: all occurrences of `source` become `target`.
599 /// If `target` already exists, `source` is simply removed (deduplication).
600 /// Returns the number of tags affected (0 or 1).
601 ///
602 /// ```
603 /// # use tagtree::{TagIndex, merge_tags};
604 /// let mut idx = TagIndex::new(vec![
605 /// "genre.electronic".into(),
606 /// "genre.dance".into(),
607 /// ]);
608 /// assert_eq!(merge_tags("genre.electronic", "genre.dance", &mut idx), 1);
609 /// assert_eq!(idx.len(), 1);
610 /// assert!(idx.contains("genre.dance"));
611 /// ```
612 pub fn merge_tags(source: &str, target: &str, index: &mut TagIndex) -> usize {
613 if !index.contains(source) {
614 return 0;
615 }
616 index.remove(source);
617 if !index.contains(target) {
618 index.insert(target.to_string());
619 }
620 1
621 }
622
623 /// Apply a batch of prefix-rename operations in order. Returns total tags modified.
624 ///
625 /// ```
626 /// # use tagtree::{TagIndex, batch_rename};
627 /// let mut idx = TagIndex::new(vec![
628 /// "genre.electronic.house".into(),
629 /// "mood.dark".into(),
630 /// ]);
631 /// let ops = [("genre.electronic", "genre.dance"), ("mood", "vibe")];
632 /// let total = batch_rename(&ops, &mut idx);
633 /// assert_eq!(total, 2);
634 /// assert!(idx.contains("genre.dance.house"));
635 /// assert!(idx.contains("vibe.dark"));
636 /// ```
637 pub fn batch_rename(operations: &[(&str, &str)], index: &mut TagIndex) -> usize {
638 let mut total = 0;
639 for &(old, new) in operations {
640 total += rename_prefix_bulk(old, new, index);
641 }
642 total
643 }
644
645 // ---------------------------------------------------------------------------
646 // In-memory suggestion index
647 // ---------------------------------------------------------------------------
648
649 /// In-memory tag index for keystroke-speed autocomplete.
650 ///
651 /// Backed by a sorted `Vec<String>` — binary search for path-prefix matching
652 /// (O(log n) to find the start, then linear scan for results), with a segment-prefix
653 /// fallback for mid-tag matches. At typical corpus sizes (hundreds to low thousands
654 /// of tags), the entire `suggest` call completes in single-digit microseconds.
655 ///
656 /// ```
657 /// use tagtree::TagIndex;
658 ///
659 /// let mut idx = TagIndex::new(vec![
660 /// "genre.electronic.house".into(),
661 /// "genre.electronic.techno".into(),
662 /// "genre.rock".into(),
663 /// "mood.dark".into(),
664 /// "mood.upbeat".into(),
665 /// ]);
666 ///
667 /// // Path prefix: binary search
668 /// assert_eq!(idx.suggest("genre.el", 10), vec![
669 /// "genre.electronic.house",
670 /// "genre.electronic.techno",
671 /// ]);
672 ///
673 /// // Segment prefix: finds "mood.dark" because "dark" starts with "dar"
674 /// assert_eq!(idx.suggest("dar", 10), vec!["mood.dark"]);
675 ///
676 /// // Mixed: path prefix hits first, segment hits fill remainder
677 /// assert_eq!(idx.suggest("mood", 10), vec!["mood.dark", "mood.upbeat"]);
678 ///
679 /// // Incremental update
680 /// idx.insert("mood.chill".into());
681 /// assert_eq!(idx.suggest("mood", 10), vec!["mood.chill", "mood.dark", "mood.upbeat"]);
682 /// ```
683 #[derive(Debug, Clone)]
684 pub struct TagIndex {
685 /// All known tags, sorted lexicographically and deduplicated.
686 tags: Vec<String>,
687 /// All unique segments across all tags, sorted. Used as a fast gate for
688 /// tier-2 segment matching: if no segment starts with the input, the full
689 /// tag scan is skipped entirely. This turns the no-match worst case from
690 /// O(n * avg_segments) to O(log m) where m = unique segment count.
691 segments: Vec<String>,
692 }
693
694 impl TagIndex {
695 /// Build an index from an iterable of tag strings.
696 ///
697 /// Duplicates are removed. The input does not need to be sorted.
698 pub fn new(tags: impl IntoIterator<Item = String>) -> Self {
699 let mut tags: Vec<String> = tags.into_iter().collect();
700 tags.sort_unstable();
701 tags.dedup();
702 let segments = Self::build_segments(&tags);
703 Self { tags, segments }
704 }
705
706 /// Build an empty index.
707 pub fn empty() -> Self {
708 Self {
709 tags: Vec::new(),
710 segments: Vec::new(),
711 }
712 }
713
714 fn build_segments(tags: &[String]) -> Vec<String> {
715 let mut segs: Vec<String> = tags
716 .iter()
717 .flat_map(|t| t.split(SEPARATOR).map(|s| s.to_string()))
718 .collect();
719 segs.sort_unstable();
720 segs.dedup();
721 segs
722 }
723
724 /// Number of unique tags in the index.
725 pub fn len(&self) -> usize {
726 self.tags.len()
727 }
728
729 /// Whether the index is empty.
730 pub fn is_empty(&self) -> bool {
731 self.tags.is_empty()
732 }
733
734 /// Check if a tag exists in the index.
735 pub fn contains(&self, tag: &str) -> bool {
736 self.tags.binary_search_by(|t| t.as_str().cmp(tag)).is_ok()
737 }
738
739 /// Insert a tag into the index. Maintains sorted order. Idempotent.
740 pub fn insert(&mut self, tag: String) {
741 if let Err(pos) = self.tags.binary_search(&tag) {
742 // Add new segments
743 for seg in tag.split(SEPARATOR) {
744 let s = seg.to_string();
745 if let Err(sp) = self.segments.binary_search(&s) {
746 self.segments.insert(sp, s);
747 }
748 }
749 self.tags.insert(pos, tag);
750 }
751 }
752
753 /// Remove a tag from the index. Returns `true` if it was present.
754 ///
755 /// Note: orphaned segments are not eagerly pruned. Call [`rebuild`](Self::rebuild)
756 /// if exact segment accuracy matters after many removals.
757 pub fn remove(&mut self, tag: &str) -> bool {
758 if let Ok(pos) = self.tags.binary_search_by(|t| t.as_str().cmp(tag)) {
759 self.tags.remove(pos);
760 true
761 } else {
762 false
763 }
764 }
765
766 /// Replace the entire tag set. Useful when reloading from a database.
767 pub fn rebuild(&mut self, tags: impl IntoIterator<Item = String>) {
768 self.tags = tags.into_iter().collect();
769 self.tags.sort_unstable();
770 self.tags.dedup();
771 self.segments = Self::build_segments(&self.tags);
772 }
773
774 /// Iterate all tags in sorted order.
775 pub fn iter(&self) -> impl Iterator<Item = &str> {
776 self.tags.iter().map(|s| s.as_str())
777 }
778
779 /// Check if any segment in the index starts with `prefix` (binary search, O(log m)).
780 fn any_segment_starts_with(&self, prefix: &str) -> bool {
781 let start = self.segments.partition_point(|s| s.as_str() < prefix);
782 start < self.segments.len() && self.segments[start].starts_with(prefix)
783 }
784
785 /// Suggest tags matching `input`, returning up to `limit` results.
786 ///
787 /// **Matching strategy (two tiers, in order):**
788 ///
789 /// 1. **Path prefix** — tags whose full path starts with `input`.
790 /// Found via binary search, so this is O(log n + k).
791 /// Example: `"genre.el"` matches `"genre.electronic.house"`.
792 ///
793 /// 2. **Segment prefix** — tags where any individual segment starts with `input`.
794 /// Only runs if tier 1 didn't fill the limit AND `input` contains no dots.
795 /// A segment index gates entry: if no known segment starts with `input`,
796 /// the scan is skipped in O(log m). Otherwise, linear scan over tags.
797 /// Example: `"hou"` matches `"genre.electronic.house"` (segment `"house"`).
798 ///
799 /// Results are returned in sorted order within each tier, with tier-1 matches
800 /// always before tier-2 matches.
801 pub fn suggest(&self, input: &str, limit: usize) -> Vec<&str> {
802 if input.is_empty() || limit == 0 {
803 return Vec::new();
804 }
805
806 let mut results = Vec::with_capacity(limit.min(32));
807
808 // Tier 1: path prefix via binary search
809 let start = self.tags.partition_point(|t| t.as_str() < input);
810 for tag in &self.tags[start..] {
811 if results.len() >= limit {
812 return results;
813 }
814 if !tag.starts_with(input) {
815 break;
816 }
817 results.push(tag.as_str());
818 }
819
820 // Tier 2: segment prefix (only for single-segment input)
821 if results.len() < limit
822 && !input.contains(SEPARATOR)
823 && self.any_segment_starts_with(input)
824 {
825 for tag in &self.tags {
826 if results.len() >= limit {
827 break;
828 }
829 // Skip anything already matched by tier 1
830 if tag.starts_with(input) {
831 continue;
832 }
833 // Check if any non-first segment starts with input.
834 let mut first = true;
835 for seg in tag.split(SEPARATOR) {
836 if !first && seg.starts_with(input) {
837 results.push(tag.as_str());
838 break;
839 }
840 first = false;
841 }
842 }
843 }
844
845 results
846 }
847
848 /// Like [`suggest`](Self::suggest), but also returns exact-match status.
849 ///
850 /// Returns `(suggestions, exact_exists)` where `exact_exists` is `true` if
851 /// `input` itself is already a tag in the index. This lets the UI distinguish
852 /// between "this tag exists, you're reusing it" and "this is a new tag."
853 pub fn suggest_with_status(&self, input: &str, limit: usize) -> (Vec<&str>, bool) {
854 let exact = self.contains(input);
855 let suggestions = self.suggest(input, limit);
856 (suggestions, exact)
857 }
858
859 /// Suggest tags tolerating typos. Returns up to `limit` results scored by
860 /// edit distance (exact/prefix matches first, then fuzzy, sorted by distance).
861 ///
862 /// Runs the same two tiers as [`suggest`](Self::suggest), then adds a third
863 /// fuzzy tier using segment-level Levenshtein distance (threshold ≤ 2).
864 ///
865 /// ```
866 /// use tagtree::TagIndex;
867 ///
868 /// let idx = TagIndex::new(vec![
869 /// "genre.electronic.house".into(),
870 /// "genre.rock".into(),
871 /// "mood.dark".into(),
872 /// ]);
873 ///
874 /// // Typo: "genr" → matches "genre" segment (distance 1)
875 /// assert!(idx.suggest_fuzzy("genr", 10).contains(&"genre.rock"));
876 ///
877 /// // Exact prefix still returned first
878 /// let results = idx.suggest_fuzzy("genre", 10);
879 /// assert_eq!(results[0], "genre.electronic.house");
880 /// ```
881 pub fn suggest_fuzzy(&self, input: &str, limit: usize) -> Vec<&str> {
882 if input.is_empty() || limit == 0 {
883 return Vec::new();
884 }
885
886 // Tiers 1+2: reuse existing suggest
887 let mut results = self.suggest(input, limit);
888 if results.len() >= limit {
889 return results;
890 }
891
892 // Tier 3: fuzzy segment matching via Levenshtein
893 let mut fuzzy_hits: Vec<(&str, usize)> = Vec::new();
894 for tag in &self.tags {
895 if results.contains(&tag.as_str()) {
896 continue;
897 }
898 let mut best = usize::MAX;
899 for seg in tag.split(SEPARATOR) {
900 if let Some(d) = edit_distance(input, seg, 2) {
901 best = best.min(d);
902 if best == 0 {
903 break;
904 }
905 }
906 }
907 if best <= 2 {
908 fuzzy_hits.push((tag.as_str(), best));
909 }
910 }
911 fuzzy_hits.sort_by(|a, b| a.1.cmp(&b.1).then_with(|| a.0.cmp(b.0)));
912 for (tag, _) in fuzzy_hits {
913 if results.len() >= limit {
914 break;
915 }
916 results.push(tag);
917 }
918
919 results
920 }
921 }
922
923 impl Default for TagIndex {
924 fn default() -> Self {
925 Self::empty()
926 }
927 }
928
929 // ---------------------------------------------------------------------------
930 // Tests
931 // ---------------------------------------------------------------------------
932
933 #[cfg(test)]
934 mod tests {
935 use super::*;
936
937 // --- TagConfig & validate_with ---
938
939 const STRICT: TagConfig = TagConfig {
940 max_depth: 3,
941 max_length: 30,
942 semantic_depth: 1,
943 };
944
945 const LOOSE: TagConfig = TagConfig {
946 max_depth: 8,
947 max_length: 200,
948 semantic_depth: 0,
949 };
950
951 #[test]
952 fn config_enforces_max_depth() {
953 let cfg = TagConfig {
954 max_depth: 3,
955 max_length: 100,
956 semantic_depth: 0,
957 };
958 assert!(validate_with("a.b.c", &cfg).is_ok());
959 assert!(validate_with("a.b.c.d", &cfg).is_err());
960 }
961
962 #[test]
963 fn config_enforces_max_length() {
964 let cfg = TagConfig {
965 max_depth: 10,
966 max_length: 10,
967 semantic_depth: 0,
968 };
969 assert!(validate_with("abcdefghij", &cfg).is_ok()); // 10 chars
970 assert!(validate_with("abcdefghijk", &cfg).is_err()); // 11 chars
971 }
972
973 #[test]
974 fn config_enforces_semantic_depth() {
975 // semantic_depth=1 requires at least 2 segments
976 assert!(validate_with("genre.rock", &STRICT).is_ok());
977 assert!(validate_with("genre.rock.punk", &STRICT).is_ok());
978 assert!(validate_with("rock", &STRICT).is_err()); // only 1 segment
979
980 // semantic_depth=0 allows single segments
981 assert!(validate_with("rock", &LOOSE).is_ok());
982 }
983
984 #[test]
985 fn config_semantic_depth_2() {
986 let cfg = TagConfig {
987 max_depth: 5,
988 max_length: 100,
989 semantic_depth: 2,
990 };
991 assert!(validate_with("a.b.c", &cfg).is_ok()); // 3 segments >= 2+1
992 assert!(validate_with("a.b", &cfg).is_err()); // 2 segments < 2+1
993 assert!(validate_with("a", &cfg).is_err());
994 }
995
996 // --- Default validate (backwards compat) ---
997
998 #[test]
999 fn valid_tags() {
1000 assert!(validate("genre").is_ok());
1001 assert!(validate("genre.electronic").is_ok());
1002 assert!(validate("genre.electronic.house").is_ok());
1003 assert!(validate("instrument.drum.kick.808").is_ok());
1004 assert!(validate("source.field-recording").is_ok());
1005 assert!(validate("a.b.c.d.e").is_ok());
1006 assert!(validate("a").is_ok());
1007 assert!(validate("123").is_ok());
1008 assert!(validate("a-b-c").is_ok());
1009 }
1010
1011 #[test]
1012 fn reject_empty() {
1013 assert!(validate("").is_err());
1014 }
1015
1016 #[test]
1017 fn reject_uppercase() {
1018 assert!(validate("Genre").is_err());
1019 assert!(validate("genre.Electronic").is_err());
1020 assert!(validate("LOUD").is_err());
1021 }
1022
1023 #[test]
1024 fn reject_spaces() {
1025 assert!(validate("genre electronic").is_err());
1026 assert!(validate(" genre").is_err());
1027 }
1028
1029 #[test]
1030 fn reject_special_chars() {
1031 assert!(validate("genre/electronic").is_err());
1032 assert!(validate("genre:electronic").is_err());
1033 assert!(validate("genre_electronic").is_err());
1034 assert!(validate("genre@electronic").is_err());
1035 }
1036
1037 #[test]
1038 fn reject_leading_trailing_dots() {
1039 assert!(validate(".genre").is_err());
1040 assert!(validate("genre.").is_err());
1041 assert!(validate(".").is_err());
1042 }
1043
1044 #[test]
1045 fn reject_consecutive_dots() {
1046 assert!(validate("genre..electronic").is_err());
1047 }
1048
1049 #[test]
1050 fn reject_too_many_levels_default() {
1051 assert!(validate("a.b.c.d.e.f").is_err()); // 6 levels
1052 assert!(validate("a.b.c.d.e").is_ok()); // 5 levels
1053 }
1054
1055 #[test]
1056 fn reject_too_long_default() {
1057 let long = "a".repeat(101);
1058 assert!(validate(&long).is_err());
1059 let ok = "a".repeat(100);
1060 assert!(validate(&ok).is_ok());
1061 }
1062
1063 // --- segment ---
1064
1065 #[test]
1066 fn segment_indices() {
1067 assert_eq!(segment("genre.electronic.house", 0), Some("genre"));
1068 assert_eq!(segment("genre.electronic.house", 1), Some("electronic"));
1069 assert_eq!(segment("genre.electronic.house", 2), Some("house"));
1070 assert_eq!(segment("genre.electronic.house", 3), None);
1071 }
1072
1073 #[test]
1074 fn segment_single() {
1075 assert_eq!(segment("genre", 0), Some("genre"));
1076 assert_eq!(segment("genre", 1), None);
1077 }
1078
1079 // --- prefix_at_depth ---
1080
1081 #[test]
1082 fn prefix_at_depth_values() {
1083 assert_eq!(prefix_at_depth("a.b.c.d", 0), None);
1084 assert_eq!(prefix_at_depth("a.b.c.d", 1), Some("a"));
1085 assert_eq!(prefix_at_depth("a.b.c.d", 2), Some("a.b"));
1086 assert_eq!(prefix_at_depth("a.b.c.d", 3), Some("a.b.c"));
1087 assert_eq!(prefix_at_depth("a.b.c.d", 4), Some("a.b.c.d"));
1088 assert_eq!(prefix_at_depth("a.b.c.d", 5), None);
1089 }
1090
1091 #[test]
1092 fn prefix_at_depth_single() {
1093 assert_eq!(prefix_at_depth("genre", 1), Some("genre"));
1094 assert_eq!(prefix_at_depth("genre", 2), None);
1095 }
1096
1097 // --- semantic_prefix / free_suffix ---
1098
1099 #[test]
1100 fn semantic_prefix_depth_1() {
1101 assert_eq!(semantic_prefix("genre.electronic.house", 1), Some("genre"));
1102 assert_eq!(free_suffix("genre.electronic.house", 1), Some("electronic.house"));
1103 }
1104
1105 #[test]
1106 fn semantic_prefix_depth_2() {
1107 assert_eq!(semantic_prefix("type.audio.podcast", 2), Some("type.audio"));
1108 assert_eq!(free_suffix("type.audio.podcast", 2), Some("podcast"));
1109 }
1110
1111 #[test]
1112 fn semantic_prefix_depth_0() {
1113 assert_eq!(semantic_prefix("genre.rock", 0), None);
1114 assert_eq!(free_suffix("genre.rock", 0), Some("genre.rock"));
1115 }
1116
1117 #[test]
1118 fn free_suffix_at_boundary() {
1119 // Tag is exactly the semantic prefix length — no suffix
1120 assert_eq!(free_suffix("genre", 1), None);
1121 assert_eq!(free_suffix("type.audio", 2), None);
1122 }
1123
1124 #[test]
1125 fn free_suffix_empty_tag() {
1126 assert_eq!(free_suffix("", 0), None);
1127 }
1128
1129 // --- Parent ---
1130
1131 #[test]
1132 fn parent_of_nested() {
1133 assert_eq!(parent("a.b.c"), Some("a.b"));
1134 assert_eq!(parent("a.b"), Some("a"));
1135 }
1136
1137 #[test]
1138 fn parent_of_root() {
1139 assert_eq!(parent("a"), None);
1140 }
1141
1142 // --- Leaf ---
1143
1144 #[test]
1145 fn leaf_of_nested() {
1146 assert_eq!(leaf("a.b.c"), "c");
1147 assert_eq!(leaf("genre.electronic.house"), "house");
1148 }
1149
1150 #[test]
1151 fn leaf_of_root() {
1152 assert_eq!(leaf("genre"), "genre");
1153 }
1154
1155 // --- Depth ---
1156
1157 #[test]
1158 fn depth_levels() {
1159 assert_eq!(depth(""), 0);
1160 assert_eq!(depth("a"), 1);
1161 assert_eq!(depth("a.b"), 2);
1162 assert_eq!(depth("a.b.c"), 3);
1163 assert_eq!(depth("a.b.c.d.e"), 5);
1164 }
1165
1166 // --- Ancestors ---
1167
1168 #[test]
1169 fn ancestors_chain() {
1170 assert_eq!(
1171 ancestors("genre.electronic.house"),
1172 vec!["genre", "genre.electronic"]
1173 );
1174 }
1175
1176 #[test]
1177 fn ancestors_single_level() {
1178 let result: Vec<&str> = ancestors("genre");
1179 assert!(result.is_empty());
1180 }
1181
1182 #[test]
1183 fn ancestors_two_levels() {
1184 assert_eq!(ancestors("genre.rock"), vec!["genre"]);
1185 }
1186
1187 // --- is_ancestor_of ---
1188
1189 #[test]
1190 fn ancestor_relationship() {
1191 assert!(is_ancestor_of("genre", "genre.electronic"));
1192 assert!(is_ancestor_of("genre", "genre.electronic.house"));
1193 assert!(is_ancestor_of("genre.electronic", "genre.electronic.house"));
1194 }
1195
1196 #[test]
1197 fn not_ancestor_of_self() {
1198 assert!(!is_ancestor_of("genre", "genre"));
1199 }
1200
1201 #[test]
1202 fn not_ancestor_reversed() {
1203 assert!(!is_ancestor_of("genre.electronic", "genre"));
1204 }
1205
1206 #[test]
1207 fn not_ancestor_partial_segment() {
1208 assert!(!is_ancestor_of("gen", "genre.rock"));
1209 assert!(!is_ancestor_of("genre.electro", "genre.electronic.house"));
1210 }
1211
1212 #[test]
1213 fn not_ancestor_unrelated() {
1214 assert!(!is_ancestor_of("genre", "mood.dark"));
1215 }
1216
1217 // --- common_ancestor ---
1218
1219 #[test]
1220 fn common_ancestor_siblings() {
1221 assert_eq!(
1222 common_ancestor("genre.electronic.house", "genre.electronic.techno"),
1223 Some("genre.electronic")
1224 );
1225 }
1226
1227 #[test]
1228 fn common_ancestor_cousins() {
1229 assert_eq!(
1230 common_ancestor("genre.rock.punk", "genre.electronic.house"),
1231 Some("genre")
1232 );
1233 }
1234
1235 #[test]
1236 fn common_ancestor_parent_child() {
1237 assert_eq!(common_ancestor("genre", "genre.rock"), Some("genre"));
1238 assert_eq!(common_ancestor("genre.rock", "genre"), Some("genre"));
1239 }
1240
1241 #[test]
1242 fn common_ancestor_none() {
1243 assert_eq!(common_ancestor("genre.rock", "mood.dark"), None);
1244 }
1245
1246 #[test]
1247 fn common_ancestor_same_tag() {
1248 assert_eq!(common_ancestor("genre", "genre"), None);
1249 }
1250
1251 #[test]
1252 fn common_ancestor_partial_segment_no_match() {
1253 assert_eq!(common_ancestor("genre.rock", "genetic.code"), None);
1254 }
1255
1256 // --- children_at_prefix ---
1257
1258 #[test]
1259 fn children_at_root() {
1260 let tags = [
1261 "genre.electronic.house",
1262 "genre.rock",
1263 "mood.dark",
1264 "instrument.kick",
1265 ];
1266 assert_eq!(
1267 children_at_prefix("", &tags),
1268 vec!["genre", "instrument", "mood"]
1269 );
1270 }
1271
1272 #[test]
1273 fn children_one_level() {
1274 let tags = [
1275 "genre.electronic.house",
1276 "genre.electronic.techno",
1277 "genre.rock",
1278 ];
1279 assert_eq!(
1280 children_at_prefix("genre", &tags),
1281 vec!["electronic", "rock"]
1282 );
1283 }
1284
1285 #[test]
1286 fn children_two_levels() {
1287 let tags = [
1288 "genre.electronic.house",
1289 "genre.electronic.techno",
1290 ];
1291 assert_eq!(
1292 children_at_prefix("genre.electronic", &tags),
1293 vec!["house", "techno"]
1294 );
1295 }
1296
1297 #[test]
1298 fn children_at_leaf() {
1299 let tags = ["genre.electronic.house"];
1300 let result = children_at_prefix("genre.electronic.house", &tags);
1301 assert!(result.is_empty());
1302 }
1303
1304 #[test]
1305 fn children_no_partial_segment_match() {
1306 let tags = ["genre.rock"];
1307 let result = children_at_prefix("gen", &tags);
1308 assert!(result.is_empty());
1309 }
1310
1311 #[test]
1312 fn children_with_string_vec() {
1313 let tags: Vec<String> = vec![
1314 "genre.rock".to_string(),
1315 "genre.electronic".to_string(),
1316 ];
1317 assert_eq!(children_at_prefix("genre", &tags), vec!["electronic", "rock"]);
1318 }
1319
1320 // --- subtree ---
1321
1322 #[test]
1323 fn subtree_includes_self_and_descendants() {
1324 let tags = [
1325 "genre.electronic",
1326 "genre.electronic.house",
1327 "genre.electronic.techno",
1328 "genre.rock",
1329 "mood.dark",
1330 ];
1331 assert_eq!(
1332 subtree("genre.electronic", &tags),
1333 vec![
1334 "genre.electronic",
1335 "genre.electronic.house",
1336 "genre.electronic.techno",
1337 ]
1338 );
1339 }
1340
1341 #[test]
1342 fn subtree_no_partial_segment() {
1343 let tags = ["genre.rock", "gen.something"];
1344 assert_eq!(subtree("gen", &tags), vec!["gen.something"]);
1345 }
1346
1347 #[test]
1348 fn subtree_empty() {
1349 let tags = ["genre.rock", "mood.dark"];
1350 let result = subtree("instrument", &tags);
1351 assert!(result.is_empty());
1352 }
1353
1354 // --- escape_like ---
1355
1356 #[test]
1357 fn escape_like_special_chars() {
1358 assert_eq!(escape_like("100%"), r"100\%");
1359 assert_eq!(escape_like("a_b"), r"a\_b");
1360 assert_eq!(escape_like(r"a\b"), r"a\\b");
1361 assert_eq!(escape_like("normal"), "normal");
1362 }
1363
1364 // --- like_descendant_pattern ---
1365
1366 #[test]
1367 fn like_descendant_pattern_simple() {
1368 assert_eq!(like_descendant_pattern("genre"), "genre.%");
1369 assert_eq!(like_descendant_pattern("genre.electronic"), "genre.electronic.%");
1370 }
1371
1372 #[test]
1373 fn like_descendant_pattern_escapes() {
1374 assert_eq!(like_descendant_pattern("a%b"), r"a\%b.%");
1375 }
1376
1377 // --- rename_prefix ---
1378
1379 #[test]
1380 fn rename_descendant() {
1381 assert_eq!(
1382 rename_prefix("genre.electronic", "genre.dance", "genre.electronic.house"),
1383 Some("genre.dance.house".to_string())
1384 );
1385 }
1386
1387 #[test]
1388 fn rename_exact_match() {
1389 assert_eq!(
1390 rename_prefix("genre.electronic", "genre.dance", "genre.electronic"),
1391 Some("genre.dance".to_string())
1392 );
1393 }
1394
1395 #[test]
1396 fn rename_no_match() {
1397 assert_eq!(
1398 rename_prefix("genre.electronic", "genre.dance", "mood.dark"),
1399 None
1400 );
1401 }
1402
1403 #[test]
1404 fn rename_partial_segment_no_match() {
1405 assert_eq!(
1406 rename_prefix("gen", "xxx", "genre.rock"),
1407 None
1408 );
1409 }
1410
1411 #[test]
1412 fn rename_preserves_deep_suffix() {
1413 assert_eq!(
1414 rename_prefix("a", "x", "a.b.c.d"),
1415 Some("x.b.c.d".to_string())
1416 );
1417 }
1418
1419 // --- TagIndex ---
1420
1421 fn test_index() -> TagIndex {
1422 TagIndex::new(vec![
1423 "genre.electronic.house".into(),
1424 "genre.electronic.techno".into(),
1425 "genre.rock".into(),
1426 "genre.rock.punk".into(),
1427 "instrument.drum.kick".into(),
1428 "instrument.drum.snare".into(),
1429 "instrument.synth".into(),
1430 "mood.dark".into(),
1431 "mood.upbeat".into(),
1432 "source.field-recording".into(),
1433 ])
1434 }
1435
1436 #[test]
1437 fn index_new_deduplicates_and_sorts() {
1438 let idx = TagIndex::new(vec![
1439 "b".into(),
1440 "a".into(),
1441 "b".into(),
1442 "c".into(),
1443 ]);
1444 assert_eq!(idx.len(), 3);
1445 assert_eq!(idx.iter().collect::<Vec<_>>(), vec!["a", "b", "c"]);
1446 }
1447
1448 #[test]
1449 fn index_contains() {
1450 let idx = test_index();
1451 assert!(idx.contains("genre.rock"));
1452 assert!(idx.contains("mood.dark"));
1453 assert!(!idx.contains("genre"));
1454 assert!(!idx.contains("nonexistent"));
1455 }
1456
1457 #[test]
1458 fn index_insert_and_remove() {
1459 let mut idx = TagIndex::empty();
1460 assert!(idx.is_empty());
1461
1462 idx.insert("genre.rock".into());
1463 assert_eq!(idx.len(), 1);
1464 assert!(idx.contains("genre.rock"));
1465
1466 // Idempotent insert
1467 idx.insert("genre.rock".into());
1468 assert_eq!(idx.len(), 1);
1469
1470 // Remove
1471 assert!(idx.remove("genre.rock"));
1472 assert!(!idx.contains("genre.rock"));
1473 assert!(idx.is_empty());
1474
1475 // Remove nonexistent
1476 assert!(!idx.remove("nope"));
1477 }
1478
1479 #[test]
1480 fn index_rebuild() {
1481 let mut idx = test_index();
1482 assert_eq!(idx.len(), 10);
1483
1484 idx.rebuild(vec!["a".into(), "b".into()]);
1485 assert_eq!(idx.len(), 2);
1486 assert_eq!(idx.iter().collect::<Vec<_>>(), vec!["a", "b"]);
1487 }
1488
1489 // --- suggest: tier 1 (path prefix) ---
1490
1491 #[test]
1492 fn suggest_full_path_prefix() {
1493 let idx = test_index();
1494 assert_eq!(
1495 idx.suggest("genre.electronic", 10),
1496 vec!["genre.electronic.house", "genre.electronic.techno"]
1497 );
1498 }
1499
1500 #[test]
1501 fn suggest_partial_segment_in_path() {
1502 let idx = test_index();
1503 // "genre.el" matches "genre.electronic.*"
1504 assert_eq!(
1505 idx.suggest("genre.el", 10),
1506 vec!["genre.electronic.house", "genre.electronic.techno"]
1507 );
1508 }
1509
1510 #[test]
1511 fn suggest_root_prefix() {
1512 let idx = test_index();
1513 let results = idx.suggest("genre", 10);
1514 assert_eq!(results, vec![
1515 "genre.electronic.house",
1516 "genre.electronic.techno",
1517 "genre.rock",
1518 "genre.rock.punk",
1519 ]);
1520 }
1521
1522 #[test]
1523 fn suggest_exact_tag() {
1524 let idx = test_index();
1525 // "mood.dark" is an exact tag — should still appear
1526 assert_eq!(idx.suggest("mood.dark", 10), vec!["mood.dark"]);
1527 }
1528
1529 #[test]
1530 fn suggest_no_match() {
1531 let idx = test_index();
1532 assert!(idx.suggest("zzz", 10).is_empty());
1533 }
1534
1535 #[test]
1536 fn suggest_empty_input() {
1537 let idx = test_index();
1538 assert!(idx.suggest("", 10).is_empty());
1539 }
1540
1541 #[test]
1542 fn suggest_zero_limit() {
1543 let idx = test_index();
1544 assert!(idx.suggest("genre", 0).is_empty());
1545 }
1546
1547 #[test]
1548 fn suggest_respects_limit() {
1549 let idx = test_index();
1550 let results = idx.suggest("genre", 2);
1551 assert_eq!(results.len(), 2);
1552 assert_eq!(results, vec!["genre.electronic.house", "genre.electronic.techno"]);
1553 }
1554
1555 // --- suggest: tier 2 (segment prefix) ---
1556
1557 #[test]
1558 fn suggest_segment_prefix_mid_tag() {
1559 let idx = test_index();
1560 // "hou" doesn't match any tag path-prefix, but "house" starts with "hou"
1561 assert_eq!(idx.suggest("hou", 10), vec!["genre.electronic.house"]);
1562 }
1563
1564 #[test]
1565 fn suggest_segment_prefix_finds_multiple() {
1566 let idx = test_index();
1567 // "drum" matches instrument.drum.kick and instrument.drum.snare via segment
1568 // But it also matches as a path prefix of "instrument.drum" — wait, no.
1569 // "drum" as path prefix: tags starting with "drum" — none.
1570 // "drum" as segment prefix: "instrument.drum.kick" has segment "drum".
1571 assert_eq!(
1572 idx.suggest("drum", 10),
1573 vec!["instrument.drum.kick", "instrument.drum.snare"]
1574 );
1575 }
1576
1577 #[test]
1578 fn suggest_segment_prefix_partial() {
1579 let idx = test_index();
1580 // "dar" matches "mood.dark" via segment "dark" starting with "dar"
1581 assert_eq!(idx.suggest("dar", 10), vec!["mood.dark"]);
1582 }
1583
1584 #[test]
1585 fn suggest_no_segment_scan_when_input_has_dot() {
1586 let idx = test_index();
1587 // "electronic.hou" is a dotted input — tier 2 should NOT run
1588 // This means only path-prefix matching, which finds nothing
1589 // (no tag starts with "electronic.hou")
1590 assert!(idx.suggest("electronic.hou", 10).is_empty());
1591 }
1592
1593 #[test]
1594 fn suggest_path_prefix_before_segment_prefix() {
1595 let idx = TagIndex::new(vec![
1596 "rock.classic".into(),
1597 "genre.rock".into(),
1598 "genre.rock.punk".into(),
1599 ]);
1600 // "rock" matches "rock.classic" as path prefix (tier 1)
1601 // and "genre.rock", "genre.rock.punk" as segment prefix (tier 2)
1602 let results = idx.suggest("rock", 10);
1603 assert_eq!(results, vec![
1604 "rock.classic", // tier 1
1605 "genre.rock", // tier 2
1606 "genre.rock.punk", // tier 2
1607 ]);
1608 }
1609
1610 #[test]
1611 fn suggest_tier2_skips_tier1_matches() {
1612 let idx = TagIndex::new(vec![
1613 "mood.dark".into(),
1614 "mood.dark.brooding".into(),
1615 ]);
1616 // "mood" matches both via path prefix. Tier 2 should not duplicate.
1617 let results = idx.suggest("mood", 10);
1618 assert_eq!(results, vec!["mood.dark", "mood.dark.brooding"]);
1619 }
1620
1621 #[test]
1622 fn suggest_segment_only_matches_segment_start() {
1623 let idx = test_index();
1624 // "ock" should NOT match "genre.rock" — we match segment starts, not substrings
1625 assert!(idx.suggest("ock", 10).is_empty());
1626 }
1627
1628 // --- suggest_with_status ---
1629
1630 #[test]
1631 fn suggest_with_status_exact_exists() {
1632 let idx = test_index();
1633 let (suggestions, exact) = idx.suggest_with_status("genre.rock", 10);
1634 assert!(exact);
1635 assert!(suggestions.contains(&"genre.rock"));
1636 }
1637
1638 #[test]
1639 fn suggest_with_status_exact_missing() {
1640 let idx = test_index();
1641 let (suggestions, exact) = idx.suggest_with_status("genre.el", 10);
1642 assert!(!exact);
1643 assert!(!suggestions.is_empty());
1644 }
1645
1646 #[test]
1647 fn suggest_with_status_novel_tag() {
1648 let idx = test_index();
1649 let (_suggestions, exact) = idx.suggest_with_status("brand-new-tag", 10);
1650 assert!(!exact);
1651 }
1652
1653 // --- edit_distance ---
1654
1655 #[test]
1656 fn edit_distance_identical() {
1657 assert_eq!(edit_distance("genre", "genre", 2), Some(0));
1658 }
1659
1660 #[test]
1661 fn edit_distance_single_substitution() {
1662 assert_eq!(edit_distance("rock", "rack", 2), Some(1));
1663 }
1664
1665 #[test]
1666 fn edit_distance_single_insertion() {
1667 assert_eq!(edit_distance("genr", "genre", 2), Some(1));
1668 }
1669
1670 #[test]
1671 fn edit_distance_single_deletion() {
1672 assert_eq!(edit_distance("genre", "genr", 2), Some(1));
1673 }
1674
1675 #[test]
1676 fn edit_distance_transposition_as_two_ops() {
1677 // "ab" → "ba" is 2 (sub+sub), not 1 (no Damerau)
1678 assert_eq!(edit_distance("ab", "ba", 2), Some(2));
1679 }
1680
1681 #[test]
1682 fn edit_distance_exceeds_threshold() {
1683 assert_eq!(edit_distance("abc", "xyz", 2), None);
1684 }
1685
1686 #[test]
1687 fn edit_distance_length_shortcircuit() {
1688 assert_eq!(edit_distance("a", "abcd", 2), None);
1689 }
1690
1691 #[test]
1692 fn edit_distance_empty_strings() {
1693 assert_eq!(edit_distance("", "", 2), Some(0));
1694 assert_eq!(edit_distance("", "ab", 2), Some(2));
1695 assert_eq!(edit_distance("ab", "", 2), Some(2));
1696 assert_eq!(edit_distance("", "abc", 2), None);
1697 }
1698
1699 // --- suggest_fuzzy ---
1700
1701 #[test]
1702 fn suggest_fuzzy_typo_in_root_segment() {
1703 let idx = test_index();
1704 // "genr" → "genre" (distance 1)
1705 let results = idx.suggest_fuzzy("genr", 10);
1706 assert!(results.contains(&"genre.electronic.house"));
1707 assert!(results.contains(&"genre.rock"));
1708 }
1709
1710 #[test]
1711 fn suggest_fuzzy_typo_in_leaf_segment() {
1712 let idx = test_index();
1713 // "hous" → "house" (distance 1)
1714 let results = idx.suggest_fuzzy("hous", 10);
1715 assert!(results.contains(&"genre.electronic.house"));
1716 }
1717
1718 #[test]
1719 fn suggest_fuzzy_exact_prefix_first() {
1720 let idx = test_index();
1721 // "mood" matches exactly via tier 1 path prefix
1722 let results = idx.suggest_fuzzy("mood", 10);
1723 assert_eq!(results[0], "mood.dark");
1724 assert_eq!(results[1], "mood.upbeat");
1725 }
1726
1727 #[test]
1728 fn suggest_fuzzy_no_false_positives() {
1729 let idx = test_index();
1730 // "zzz" is too far from anything
1731 assert!(idx.suggest_fuzzy("zzz", 10).is_empty());
1732 }
1733
1734 #[test]
1735 fn suggest_fuzzy_respects_limit() {
1736 let idx = test_index();
1737 let results = idx.suggest_fuzzy("genr", 2);
1738 assert_eq!(results.len(), 2);
1739 }
1740
1741 #[test]
1742 fn suggest_fuzzy_distance_2() {
1743 let idx = test_index();
1744 // "drak" → "dark" (distance 2: transposition = sub+sub)
1745 let results = idx.suggest_fuzzy("drak", 10);
1746 assert!(results.contains(&"mood.dark"));
1747 }
1748
1749 #[test]
1750 fn suggest_fuzzy_empty_input() {
1751 let idx = test_index();
1752 assert!(idx.suggest_fuzzy("", 10).is_empty());
1753 }
1754
1755 #[test]
1756 fn suggest_fuzzy_sorted_by_distance() {
1757 let idx = TagIndex::new(vec![
1758 "mood.dark".into(),
1759 "mood.dork".into(),
1760 "mood.dxxx".into(),
1761 ]);
1762 // "dark" → segment "dark"=0 (tier 2 hit), "dork"=1, "dxxx"=3 (exceeds threshold)
1763 let results = idx.suggest_fuzzy("dark", 10);
1764 // tier 2 picks up mood.dark (segment "dark" starts_with "dark")
1765 // tier 3 fuzzy adds mood.dork (distance 1)
1766 // mood.dxxx not matched (distance > 2)
1767 assert_eq!(results.len(), 2);
1768 assert_eq!(results[0], "mood.dark");
1769 assert_eq!(results[1], "mood.dork");
1770 }
1771
1772 // --- rename_prefix_bulk ---
1773
1774 #[test]
1775 fn rename_prefix_bulk_renames_matching() {
1776 let mut idx = TagIndex::new(vec![
1777 "genre.electronic.house".into(),
1778 "genre.electronic.techno".into(),
1779 "genre.rock".into(),
1780 ]);
1781 assert_eq!(rename_prefix_bulk("genre.electronic", "genre.dance", &mut idx), 2);
1782 assert!(idx.contains("genre.dance.house"));
1783 assert!(idx.contains("genre.dance.techno"));
1784 assert!(idx.contains("genre.rock"));
1785 assert!(!idx.contains("genre.electronic.house"));
1786 }
1787
1788 #[test]
1789 fn rename_prefix_bulk_no_match() {
1790 let mut idx = TagIndex::new(vec!["genre.rock".into()]);
1791 assert_eq!(rename_prefix_bulk("mood", "vibe", &mut idx), 0);
1792 assert!(idx.contains("genre.rock"));
1793 }
1794
1795 // --- remove_subtree ---
1796
1797 #[test]
1798 fn remove_subtree_removes_prefix_and_descendants() {
1799 let mut idx = TagIndex::new(vec![
1800 "genre.electronic.house".into(),
1801 "genre.electronic.techno".into(),
1802 "genre.electronic".into(),
1803 "genre.rock".into(),
1804 ]);
1805 assert_eq!(remove_subtree("genre.electronic", &mut idx), 3);
1806 assert_eq!(idx.len(), 1);
1807 assert!(idx.contains("genre.rock"));
1808 }
1809
1810 #[test]
1811 fn remove_subtree_no_match() {
1812 let mut idx = TagIndex::new(vec!["genre.rock".into()]);
1813 assert_eq!(remove_subtree("mood", &mut idx), 0);
1814 assert_eq!(idx.len(), 1);
1815 }
1816
1817 // --- merge_tags ---
1818
1819 #[test]
1820 fn merge_tags_replaces_source() {
1821 let mut idx = TagIndex::new(vec![
1822 "genre.electronic".into(),
1823 "genre.rock".into(),
1824 ]);
1825 assert_eq!(merge_tags("genre.electronic", "genre.dance", &mut idx), 1);
1826 assert!(idx.contains("genre.dance"));
1827 assert!(idx.contains("genre.rock"));
1828 assert!(!idx.contains("genre.electronic"));
1829 }
1830
1831 #[test]
1832 fn merge_tags_deduplicates() {
1833 let mut idx = TagIndex::new(vec![
1834 "genre.electronic".into(),
1835 "genre.dance".into(),
1836 ]);
1837 assert_eq!(merge_tags("genre.electronic", "genre.dance", &mut idx), 1);
1838 assert_eq!(idx.len(), 1);
1839 assert!(idx.contains("genre.dance"));
1840 }
1841
1842 #[test]
1843 fn merge_tags_source_missing() {
1844 let mut idx = TagIndex::new(vec!["genre.rock".into()]);
1845 assert_eq!(merge_tags("genre.electronic", "genre.dance", &mut idx), 0);
1846 assert_eq!(idx.len(), 1);
1847 }
1848
1849 // --- batch_rename ---
1850
1851 #[test]
1852 fn batch_rename_multiple_ops() {
1853 let mut idx = TagIndex::new(vec![
1854 "genre.electronic.house".into(),
1855 "mood.dark".into(),
1856 ]);
1857 let ops = [("genre.electronic", "genre.dance"), ("mood", "vibe")];
1858 assert_eq!(batch_rename(&ops, &mut idx), 2);
1859 assert!(idx.contains("genre.dance.house"));
1860 assert!(idx.contains("vibe.dark"));
1861 }
1862
1863 #[test]
1864 fn batch_rename_chained() {
1865 let mut idx = TagIndex::new(vec!["a.b.c".into()]);
1866 let ops = [("a", "x"), ("x.b", "x.y")];
1867 assert_eq!(batch_rename(&ops, &mut idx), 2);
1868 assert!(idx.contains("x.y.c"));
1869 }
1870 }
1871