//! Hierarchical dot-notation tag standard. //! //! Tags are lowercase strings with dot-separated segments representing a hierarchy: //! `genre.electronic.house`, `work.meeting.standup`, `news.tech.rust`. //! //! # Format (invariants enforced by all configs) //! //! - Segments: lowercase alphanumeric and hyphens (`[a-z0-9-]`) //! - Separator: `.` (dot) //! - No empty segments, no leading/trailing dots, no consecutive dots //! //! # Per-app configuration //! //! Each app provides a [`TagConfig`] that controls: //! - **`max_depth`** — maximum number of segments (e.g., 5 for AF, 3 for GO) //! - **`max_length`** — maximum character length of the entire tag string //! - **`semantic_depth`** — how many leading segments carry dispatch meaning //! (e.g., 1 means the first segment is a namespace like `genre`, `mood`) //! //! # SQL //! //! Hierarchy queries use `LIKE` prefix matching, which works identically on SQLite //! and PostgreSQL. Use [`like_descendant_pattern`] to build the pattern and //! [`escape_like`] to sanitize user input embedded in patterns. use std::fmt; /// Separator between tag levels. pub const SEPARATOR: char = '.'; // --------------------------------------------------------------------------- // Configuration // --------------------------------------------------------------------------- /// Per-app tag rules. Each consumer defines one of these as a constant. /// /// ``` /// use tagtree::TagConfig; /// /// // audiofiles: deep hierarchy, namespace-driven /// const AF_TAGS: TagConfig = TagConfig { max_depth: 5, max_length: 100, semantic_depth: 1 }; /// /// // goingson: shallow tags, no required prefix /// const GO_TAGS: TagConfig = TagConfig { max_depth: 3, max_length: 60, semantic_depth: 0 }; /// ``` #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct TagConfig { /// Maximum number of segments allowed (e.g., 5 means `a.b.c.d.e` is valid). pub max_depth: usize, /// Maximum character length of the entire tag string. pub max_length: usize, /// Number of leading segments that carry app-specific dispatch meaning. /// /// - `0` — no semantic prefix; all segments are free-form hierarchy /// - `1` — first segment is a namespace (e.g., `genre`, `mood`, `source`) /// - `2` — first two segments are structured (e.g., `type.subtype`) /// /// When > 0, [`validate_with`] enforces that the tag has at least this many /// segments, and [`semantic_prefix`] / [`free_suffix`] can split the tag. pub semantic_depth: usize, } /// Error returned when a tag string is invalid. #[derive(Debug, Clone, PartialEq, Eq)] pub struct TagError(pub String); impl fmt::Display for TagError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "invalid tag: {}", self.0) } } impl std::error::Error for TagError {} // --------------------------------------------------------------------------- // Validation // --------------------------------------------------------------------------- /// Validate a tag against an app-specific [`TagConfig`]. /// /// Checks format invariants (charset, no empty segments) plus the config's /// depth, length, and semantic-depth constraints. /// /// ``` /// use tagtree::{TagConfig, validate_with}; /// /// const CFG: TagConfig = TagConfig { max_depth: 3, max_length: 50, semantic_depth: 1 }; /// /// assert!(validate_with("genre.rock", &CFG).is_ok()); /// assert!(validate_with("genre.rock.punk", &CFG).is_ok()); // depth 3 /// assert!(validate_with("genre.rock.punk.oi", &CFG).is_err()); // depth 4 > max 3 /// assert!(validate_with("rock", &CFG).is_err()); // semantic_depth=1 requires 2+ segments /// ``` pub fn validate_with(tag: &str, config: &TagConfig) -> Result<(), TagError> { if tag.is_empty() { return Err(TagError("tag cannot be empty".into())); } if tag.len() > config.max_length { return Err(TagError(format!( "tag exceeds {} characters", config.max_length ))); } if tag.starts_with(SEPARATOR) || tag.ends_with(SEPARATOR) { return Err(TagError("tag cannot start or end with a dot".into())); } if tag.contains("..") { return Err(TagError("tag cannot contain consecutive dots".into())); } for ch in tag.chars() { if !(ch.is_ascii_lowercase() || ch.is_ascii_digit() || ch == '-' || ch == SEPARATOR) { return Err(TagError(format!( "invalid character '{ch}': only lowercase alphanumeric, hyphens, and dots allowed" ))); } } let d = depth(tag); if d > config.max_depth { return Err(TagError(format!( "tag has {} levels, maximum is {}", d, config.max_depth ))); } if config.semantic_depth > 0 && d < config.semantic_depth + 1 { return Err(TagError(format!( "tag must have at least {} segments (semantic prefix requires {} + at least 1 value)", config.semantic_depth + 1, config.semantic_depth ))); } Ok(()) } /// Validate a tag with default limits (max_depth=5, max_length=100, no semantic prefix). /// /// Convenience wrapper for quick validation without a config. /// /// ``` /// # use tagtree::validate; /// assert!(validate("genre.electronic.house").is_ok()); /// assert!(validate("a.b.c.d.e.f").is_err()); // 6 levels > default max 5 /// ``` pub fn validate(tag: &str) -> Result<(), TagError> { const DEFAULT: TagConfig = TagConfig { max_depth: 5, max_length: 100, semantic_depth: 0, }; validate_with(tag, &DEFAULT) } // --------------------------------------------------------------------------- // Hierarchy parsing // --------------------------------------------------------------------------- /// Get the parent path of a tag, or `None` if it's a root-level tag. /// /// ``` /// # use tagtree::parent; /// assert_eq!(parent("genre.electronic.house"), Some("genre.electronic")); /// assert_eq!(parent("genre"), None); /// ``` pub fn parent(tag: &str) -> Option<&str> { tag.rfind(SEPARATOR).map(|pos| &tag[..pos]) } /// Get the leaf (last segment) of a tag. /// /// ``` /// # use tagtree::leaf; /// assert_eq!(leaf("genre.electronic.house"), "house"); /// assert_eq!(leaf("genre"), "genre"); /// ``` pub fn leaf(tag: &str) -> &str { match tag.rfind(SEPARATOR) { Some(pos) => &tag[pos + 1..], None => tag, } } /// Count the depth (number of segments) in a tag. /// /// ``` /// # use tagtree::depth; /// assert_eq!(depth("genre"), 1); /// assert_eq!(depth("genre.electronic"), 2); /// assert_eq!(depth("genre.electronic.house"), 3); /// ``` pub fn depth(tag: &str) -> usize { if tag.is_empty() { return 0; } tag.chars().filter(|&c| c == SEPARATOR).count() + 1 } /// Get the Nth segment (0-indexed) from a tag, or `None` if out of bounds. /// /// ``` /// # use tagtree::segment; /// assert_eq!(segment("genre.electronic.house", 0), Some("genre")); /// assert_eq!(segment("genre.electronic.house", 1), Some("electronic")); /// assert_eq!(segment("genre.electronic.house", 2), Some("house")); /// assert_eq!(segment("genre.electronic.house", 3), None); /// ``` pub fn segment(tag: &str, index: usize) -> Option<&str> { tag.split(SEPARATOR).nth(index) } /// Get the path formed by the first `n` segments, or `None` if the tag has fewer. /// /// ``` /// # use tagtree::prefix_at_depth; /// assert_eq!(prefix_at_depth("genre.electronic.house", 1), Some("genre")); /// assert_eq!(prefix_at_depth("genre.electronic.house", 2), Some("genre.electronic")); /// assert_eq!(prefix_at_depth("genre.electronic.house", 3), Some("genre.electronic.house")); /// assert_eq!(prefix_at_depth("genre.electronic.house", 4), None); /// ``` pub fn prefix_at_depth(tag: &str, n: usize) -> Option<&str> { if n == 0 { return None; } let mut dots_seen = 0; for (i, ch) in tag.bytes().enumerate() { if ch == SEPARATOR as u8 { dots_seen += 1; if dots_seen == n { return Some(&tag[..i]); } } } // If we've seen fewer dots than n-1, the tag doesn't have n segments if dots_seen == n - 1 { Some(tag) } else { None } } /// Get all ancestor paths from root to parent (excludes the tag itself). /// /// ``` /// # use tagtree::ancestors; /// assert_eq!(ancestors("genre.electronic.house"), vec!["genre", "genre.electronic"]); /// assert!(ancestors("genre").is_empty()); /// ``` pub fn ancestors(tag: &str) -> Vec<&str> { let mut result = Vec::new(); let mut remaining = tag; while let Some(pos) = remaining.rfind(SEPARATOR) { remaining = &remaining[..pos]; result.push(remaining); } result.reverse(); result } /// Check whether `ancestor` is an ancestor of `descendant`. /// /// A tag is NOT considered its own ancestor. /// /// ``` /// # use tagtree::is_ancestor_of; /// assert!(is_ancestor_of("genre", "genre.electronic")); /// assert!(is_ancestor_of("genre", "genre.electronic.house")); /// assert!(!is_ancestor_of("genre.electronic", "genre")); /// assert!(!is_ancestor_of("genre", "genre")); // not self /// assert!(!is_ancestor_of("gen", "genre.rock")); // not a segment boundary /// ``` pub fn is_ancestor_of(ancestor: &str, descendant: &str) -> bool { descendant.len() > ancestor.len() && descendant.starts_with(ancestor) && descendant.as_bytes()[ancestor.len()] == SEPARATOR as u8 } /// Find the longest common ancestor of two tags, or `None` if they share no prefix. /// /// ``` /// # use tagtree::common_ancestor; /// assert_eq!(common_ancestor("genre.electronic.house", "genre.electronic.techno"), Some("genre.electronic")); /// assert_eq!(common_ancestor("genre.rock", "genre.electronic"), Some("genre")); /// assert_eq!(common_ancestor("genre.rock", "mood.dark"), None); /// assert_eq!(common_ancestor("genre", "genre"), None); // same tag, no ancestor /// ``` pub fn common_ancestor<'a>(a: &'a str, b: &str) -> Option<&'a str> { let mut last_sep = None; for (i, (ca, cb)) in a.bytes().zip(b.bytes()).enumerate() { if ca != cb { break; } if ca == SEPARATOR as u8 { last_sep = Some(i); } } // If one is a prefix of the other and ends at a separator boundary, // the shorter one is the common ancestor. let min_len = a.len().min(b.len()); if a.len() != b.len() && a.as_bytes()[..min_len] == b.as_bytes()[..min_len] && (min_len == a.len() || min_len == b.len()) { let longer = if a.len() > b.len() { a } else { b }; if longer.as_bytes()[min_len] == SEPARATOR as u8 { return Some(&a[..min_len]); } } last_sep.map(|pos| &a[..pos]) } // --------------------------------------------------------------------------- // Semantic prefix helpers // --------------------------------------------------------------------------- /// Extract the semantic prefix (first `n` segments) from a tag. /// /// Returns `None` if the tag has fewer than `n` segments. /// This is equivalent to `prefix_at_depth(tag, n)` but named for clarity /// when used with [`TagConfig::semantic_depth`]. /// /// ``` /// # use tagtree::semantic_prefix; /// // AF uses semantic_depth=1: first segment is the namespace /// assert_eq!(semantic_prefix("genre.electronic.house", 1), Some("genre")); /// /// // A hypothetical app with semantic_depth=2 /// assert_eq!(semantic_prefix("type.audio.podcast", 2), Some("type.audio")); /// /// // No semantic prefix /// assert_eq!(semantic_prefix("rock", 0), None); /// ``` pub fn semantic_prefix(tag: &str, semantic_depth: usize) -> Option<&str> { if semantic_depth == 0 { return None; } prefix_at_depth(tag, semantic_depth) } /// Extract the free-form suffix after the semantic prefix. /// /// Returns `None` if the tag has no segments beyond the semantic prefix. /// /// ``` /// # use tagtree::free_suffix; /// assert_eq!(free_suffix("genre.electronic.house", 1), Some("electronic.house")); /// assert_eq!(free_suffix("genre.rock", 1), Some("rock")); /// assert_eq!(free_suffix("genre", 1), None); // no suffix beyond prefix /// /// // semantic_depth=0 means the whole tag is free-form /// assert_eq!(free_suffix("genre.rock", 0), Some("genre.rock")); /// ``` pub fn free_suffix(tag: &str, semantic_depth: usize) -> Option<&str> { if semantic_depth == 0 { if tag.is_empty() { return None; } return Some(tag); } let prefix = prefix_at_depth(tag, semantic_depth)?; if prefix.len() == tag.len() { // Tag is exactly the semantic prefix, no suffix None } else { Some(&tag[prefix.len() + 1..]) // skip the dot } } // --------------------------------------------------------------------------- // Tree operations on tag sets (in-memory) // --------------------------------------------------------------------------- /// List the immediate child segment names under a prefix. /// /// Given a set of tags and a prefix, returns the unique next-level segment names /// (not full paths). Pass an empty string for root-level children. /// /// ``` /// # use tagtree::children_at_prefix; /// let tags = ["genre.electronic.house", "genre.electronic.techno", "genre.rock", "mood.dark"]; /// assert_eq!(children_at_prefix("", &tags), vec!["genre", "mood"]); /// assert_eq!(children_at_prefix("genre", &tags), vec!["electronic", "rock"]); /// assert_eq!(children_at_prefix("genre.electronic", &tags), vec!["house", "techno"]); /// ``` pub fn children_at_prefix(prefix: &str, tags: &[impl AsRef]) -> Vec { let strip_len = if prefix.is_empty() { 0 } else { prefix.len() + 1 // prefix + dot }; let mut children: Vec = Vec::new(); for tag in tags { let tag = tag.as_ref(); let matches = if prefix.is_empty() { true } else { tag.starts_with(prefix) && tag.len() > prefix.len() && tag.as_bytes()[prefix.len()] == SEPARATOR as u8 }; if !matches || tag.len() <= strip_len { continue; } let suffix = &tag[strip_len..]; let child = match suffix.find(SEPARATOR) { Some(pos) => &suffix[..pos], None => suffix, }; if !child.is_empty() && !children.iter().any(|c| c == child) { children.push(child.to_string()); } } children.sort(); children } /// Get all tags that are descendants of a prefix (including the prefix itself if present). /// /// ``` /// # use tagtree::subtree; /// let tags = ["genre.electronic.house", "genre.electronic.techno", "genre.rock", "mood.dark"]; /// let sub = subtree("genre.electronic", &tags); /// assert_eq!(sub, vec!["genre.electronic.house", "genre.electronic.techno"]); /// ``` pub fn subtree<'a>(prefix: &str, tags: &'a [impl AsRef]) -> Vec<&'a str> { tags.iter() .map(|t| t.as_ref()) .filter(|tag| { *tag == prefix || (tag.starts_with(prefix) && tag.len() > prefix.len() && tag.as_bytes()[prefix.len()] == SEPARATOR as u8) }) .collect() } // --------------------------------------------------------------------------- // SQL helpers // --------------------------------------------------------------------------- /// Escape special characters in a string for use in SQL `LIKE` patterns. /// /// Handles `\`, `%`, and `_`. Use with `ESCAPE '\'` in the SQL query. /// /// ``` /// # use tagtree::escape_like; /// assert_eq!(escape_like("100%"), r"100\%"); /// assert_eq!(escape_like("a_b"), r"a\_b"); /// ``` pub fn escape_like(s: &str) -> String { s.replace('\\', "\\\\") .replace('%', "\\%") .replace('_', "\\_") } /// Build a `LIKE` pattern that matches all descendants of a prefix (not the prefix itself). /// /// For use in SQL: `WHERE tag = ?1 OR tag LIKE ?2 ESCAPE '\'` /// where `?1` is the prefix itself and `?2` is this pattern. /// /// ``` /// # use tagtree::like_descendant_pattern; /// assert_eq!(like_descendant_pattern("genre"), r"genre.%"); /// assert_eq!(like_descendant_pattern("a-b"), r"a-b.%"); /// ``` pub fn like_descendant_pattern(prefix: &str) -> String { format!("{}.%", escape_like(prefix)) } // --------------------------------------------------------------------------- // Rename / move // --------------------------------------------------------------------------- /// Rename a prefix within a tag. Returns `Some(new_tag)` if the tag starts with /// `old_prefix` at a segment boundary, or `None` if it doesn't match. /// /// ``` /// # use tagtree::rename_prefix; /// assert_eq!( /// rename_prefix("genre.electronic", "genre.dance", "genre.electronic.house"), /// Some("genre.dance.house".to_string()) /// ); /// assert_eq!( /// rename_prefix("genre.electronic", "genre.dance", "genre.electronic"), /// Some("genre.dance".to_string()) /// ); /// assert_eq!(rename_prefix("gen", "xxx", "genre.rock"), None); /// ``` pub fn rename_prefix(old_prefix: &str, new_prefix: &str, tag: &str) -> Option { if tag == old_prefix { Some(new_prefix.to_string()) } else if is_ancestor_of(old_prefix, tag) { Some(format!("{}{}", new_prefix, &tag[old_prefix.len()..])) } else { None } } // --------------------------------------------------------------------------- // Edit distance (private) // --------------------------------------------------------------------------- /// Levenshtein distance with early termination. Returns `None` if the distance /// exceeds `max`. Uses single-row DP (O(min(m,n)) space). fn edit_distance(a: &str, b: &str, max: usize) -> Option { let a = a.as_bytes(); let b = b.as_bytes(); let (a, b) = if a.len() > b.len() { (b, a) } else { (a, b) }; // Short-circuit: length difference alone exceeds threshold if b.len() - a.len() > max { return None; } let mut row: Vec = (0..=a.len()).collect(); for j in 1..=b.len() { let mut prev = row[0]; row[0] = j; let mut row_min = row[0]; for i in 1..=a.len() { let cost = if a[i - 1] == b[j - 1] { 0 } else { 1 }; let val = (row[i] + 1).min(row[i - 1] + 1).min(prev + cost); prev = row[i]; row[i] = val; row_min = row_min.min(val); } if row_min > max { return None; } } let d = row[a.len()]; if d <= max { Some(d) } else { None } } // --------------------------------------------------------------------------- // Bulk tag operations // --------------------------------------------------------------------------- /// Rename a prefix across all tags in an index. Returns the number of tags modified. /// /// ``` /// # use tagtree::{TagIndex, rename_prefix_bulk}; /// let mut idx = TagIndex::new(vec![ /// "genre.electronic.house".into(), /// "genre.electronic.techno".into(), /// "genre.rock".into(), /// ]); /// assert_eq!(rename_prefix_bulk("genre.electronic", "genre.dance", &mut idx), 2); /// assert!(idx.contains("genre.dance.house")); /// assert!(idx.contains("genre.dance.techno")); /// assert!(!idx.contains("genre.electronic.house")); /// ``` pub fn rename_prefix_bulk(old_prefix: &str, new_prefix: &str, index: &mut TagIndex) -> usize { let mut count = 0; let new_tags: Vec = index .tags .iter() .map(|tag| { if let Some(renamed) = rename_prefix(old_prefix, new_prefix, tag) { count += 1; renamed } else { tag.clone() } }) .collect(); if count > 0 { index.rebuild(new_tags); } count } /// Remove all tags matching a prefix (the prefix itself and all descendants). /// Returns the number of tags removed. /// /// ``` /// # use tagtree::{TagIndex, remove_subtree}; /// let mut idx = TagIndex::new(vec![ /// "genre.electronic.house".into(), /// "genre.electronic.techno".into(), /// "genre.rock".into(), /// ]); /// assert_eq!(remove_subtree("genre.electronic", &mut idx), 2); /// assert_eq!(idx.len(), 1); /// assert!(idx.contains("genre.rock")); /// ``` pub fn remove_subtree(prefix: &str, index: &mut TagIndex) -> usize { let before = index.tags.len(); let remaining: Vec = index .tags .iter() .filter(|tag| tag.as_str() != prefix && !is_ancestor_of(prefix, tag)) .cloned() .collect(); let removed = before - remaining.len(); if removed > 0 { index.rebuild(remaining); } removed } /// Merge one tag into another: all occurrences of `source` become `target`. /// If `target` already exists, `source` is simply removed (deduplication). /// Returns the number of tags affected (0 or 1). /// /// ``` /// # use tagtree::{TagIndex, merge_tags}; /// let mut idx = TagIndex::new(vec![ /// "genre.electronic".into(), /// "genre.dance".into(), /// ]); /// assert_eq!(merge_tags("genre.electronic", "genre.dance", &mut idx), 1); /// assert_eq!(idx.len(), 1); /// assert!(idx.contains("genre.dance")); /// ``` pub fn merge_tags(source: &str, target: &str, index: &mut TagIndex) -> usize { if !index.contains(source) { return 0; } index.remove(source); if !index.contains(target) { index.insert(target.to_string()); } 1 } /// Apply a batch of prefix-rename operations in order. Returns total tags modified. /// /// ``` /// # use tagtree::{TagIndex, batch_rename}; /// let mut idx = TagIndex::new(vec![ /// "genre.electronic.house".into(), /// "mood.dark".into(), /// ]); /// let ops = [("genre.electronic", "genre.dance"), ("mood", "vibe")]; /// let total = batch_rename(&ops, &mut idx); /// assert_eq!(total, 2); /// assert!(idx.contains("genre.dance.house")); /// assert!(idx.contains("vibe.dark")); /// ``` pub fn batch_rename(operations: &[(&str, &str)], index: &mut TagIndex) -> usize { let mut total = 0; for &(old, new) in operations { total += rename_prefix_bulk(old, new, index); } total } // --------------------------------------------------------------------------- // In-memory suggestion index // --------------------------------------------------------------------------- /// In-memory tag index for keystroke-speed autocomplete. /// /// Backed by a sorted `Vec` — binary search for path-prefix matching /// (O(log n) to find the start, then linear scan for results), with a segment-prefix /// fallback for mid-tag matches. At typical corpus sizes (hundreds to low thousands /// of tags), the entire `suggest` call completes in single-digit microseconds. /// /// ``` /// use tagtree::TagIndex; /// /// let mut idx = TagIndex::new(vec![ /// "genre.electronic.house".into(), /// "genre.electronic.techno".into(), /// "genre.rock".into(), /// "mood.dark".into(), /// "mood.upbeat".into(), /// ]); /// /// // Path prefix: binary search /// assert_eq!(idx.suggest("genre.el", 10), vec![ /// "genre.electronic.house", /// "genre.electronic.techno", /// ]); /// /// // Segment prefix: finds "mood.dark" because "dark" starts with "dar" /// assert_eq!(idx.suggest("dar", 10), vec!["mood.dark"]); /// /// // Mixed: path prefix hits first, segment hits fill remainder /// assert_eq!(idx.suggest("mood", 10), vec!["mood.dark", "mood.upbeat"]); /// /// // Incremental update /// idx.insert("mood.chill".into()); /// assert_eq!(idx.suggest("mood", 10), vec!["mood.chill", "mood.dark", "mood.upbeat"]); /// ``` #[derive(Debug, Clone)] pub struct TagIndex { /// All known tags, sorted lexicographically and deduplicated. tags: Vec, /// All unique segments across all tags, sorted. Used as a fast gate for /// tier-2 segment matching: if no segment starts with the input, the full /// tag scan is skipped entirely. This turns the no-match worst case from /// O(n * avg_segments) to O(log m) where m = unique segment count. segments: Vec, } impl TagIndex { /// Build an index from an iterable of tag strings. /// /// Duplicates are removed. The input does not need to be sorted. pub fn new(tags: impl IntoIterator) -> Self { let mut tags: Vec = tags.into_iter().collect(); tags.sort_unstable(); tags.dedup(); let segments = Self::build_segments(&tags); Self { tags, segments } } /// Build an empty index. pub fn empty() -> Self { Self { tags: Vec::new(), segments: Vec::new(), } } fn build_segments(tags: &[String]) -> Vec { let mut segs: Vec = tags .iter() .flat_map(|t| t.split(SEPARATOR).map(|s| s.to_string())) .collect(); segs.sort_unstable(); segs.dedup(); segs } /// Number of unique tags in the index. pub fn len(&self) -> usize { self.tags.len() } /// Whether the index is empty. pub fn is_empty(&self) -> bool { self.tags.is_empty() } /// Check if a tag exists in the index. pub fn contains(&self, tag: &str) -> bool { self.tags.binary_search_by(|t| t.as_str().cmp(tag)).is_ok() } /// Insert a tag into the index. Maintains sorted order. Idempotent. pub fn insert(&mut self, tag: String) { if let Err(pos) = self.tags.binary_search(&tag) { // Add new segments for seg in tag.split(SEPARATOR) { let s = seg.to_string(); if let Err(sp) = self.segments.binary_search(&s) { self.segments.insert(sp, s); } } self.tags.insert(pos, tag); } } /// Remove a tag from the index. Returns `true` if it was present. /// /// Note: orphaned segments are not eagerly pruned. Call [`rebuild`](Self::rebuild) /// if exact segment accuracy matters after many removals. pub fn remove(&mut self, tag: &str) -> bool { if let Ok(pos) = self.tags.binary_search_by(|t| t.as_str().cmp(tag)) { self.tags.remove(pos); true } else { false } } /// Replace the entire tag set. Useful when reloading from a database. pub fn rebuild(&mut self, tags: impl IntoIterator) { self.tags = tags.into_iter().collect(); self.tags.sort_unstable(); self.tags.dedup(); self.segments = Self::build_segments(&self.tags); } /// Iterate all tags in sorted order. pub fn iter(&self) -> impl Iterator { self.tags.iter().map(|s| s.as_str()) } /// Check if any segment in the index starts with `prefix` (binary search, O(log m)). fn any_segment_starts_with(&self, prefix: &str) -> bool { let start = self.segments.partition_point(|s| s.as_str() < prefix); start < self.segments.len() && self.segments[start].starts_with(prefix) } /// Suggest tags matching `input`, returning up to `limit` results. /// /// **Matching strategy (two tiers, in order):** /// /// 1. **Path prefix** — tags whose full path starts with `input`. /// Found via binary search, so this is O(log n + k). /// Example: `"genre.el"` matches `"genre.electronic.house"`. /// /// 2. **Segment prefix** — tags where any individual segment starts with `input`. /// Only runs if tier 1 didn't fill the limit AND `input` contains no dots. /// A segment index gates entry: if no known segment starts with `input`, /// the scan is skipped in O(log m). Otherwise, linear scan over tags. /// Example: `"hou"` matches `"genre.electronic.house"` (segment `"house"`). /// /// Results are returned in sorted order within each tier, with tier-1 matches /// always before tier-2 matches. pub fn suggest(&self, input: &str, limit: usize) -> Vec<&str> { if input.is_empty() || limit == 0 { return Vec::new(); } let mut results = Vec::with_capacity(limit.min(32)); // Tier 1: path prefix via binary search let start = self.tags.partition_point(|t| t.as_str() < input); for tag in &self.tags[start..] { if results.len() >= limit { return results; } if !tag.starts_with(input) { break; } results.push(tag.as_str()); } // Tier 2: segment prefix (only for single-segment input) if results.len() < limit && !input.contains(SEPARATOR) && self.any_segment_starts_with(input) { for tag in &self.tags { if results.len() >= limit { break; } // Skip anything already matched by tier 1 if tag.starts_with(input) { continue; } // Check if any non-first segment starts with input. let mut first = true; for seg in tag.split(SEPARATOR) { if !first && seg.starts_with(input) { results.push(tag.as_str()); break; } first = false; } } } results } /// Like [`suggest`](Self::suggest), but also returns exact-match status. /// /// Returns `(suggestions, exact_exists)` where `exact_exists` is `true` if /// `input` itself is already a tag in the index. This lets the UI distinguish /// between "this tag exists, you're reusing it" and "this is a new tag." pub fn suggest_with_status(&self, input: &str, limit: usize) -> (Vec<&str>, bool) { let exact = self.contains(input); let suggestions = self.suggest(input, limit); (suggestions, exact) } /// Suggest tags tolerating typos. Returns up to `limit` results scored by /// edit distance (exact/prefix matches first, then fuzzy, sorted by distance). /// /// Runs the same two tiers as [`suggest`](Self::suggest), then adds a third /// fuzzy tier using segment-level Levenshtein distance (threshold ≤ 2). /// /// ``` /// use tagtree::TagIndex; /// /// let idx = TagIndex::new(vec![ /// "genre.electronic.house".into(), /// "genre.rock".into(), /// "mood.dark".into(), /// ]); /// /// // Typo: "genr" → matches "genre" segment (distance 1) /// assert!(idx.suggest_fuzzy("genr", 10).contains(&"genre.rock")); /// /// // Exact prefix still returned first /// let results = idx.suggest_fuzzy("genre", 10); /// assert_eq!(results[0], "genre.electronic.house"); /// ``` pub fn suggest_fuzzy(&self, input: &str, limit: usize) -> Vec<&str> { if input.is_empty() || limit == 0 { return Vec::new(); } // Tiers 1+2: reuse existing suggest let mut results = self.suggest(input, limit); if results.len() >= limit { return results; } // Tier 3: fuzzy segment matching via Levenshtein let mut fuzzy_hits: Vec<(&str, usize)> = Vec::new(); for tag in &self.tags { if results.contains(&tag.as_str()) { continue; } let mut best = usize::MAX; for seg in tag.split(SEPARATOR) { if let Some(d) = edit_distance(input, seg, 2) { best = best.min(d); if best == 0 { break; } } } if best <= 2 { fuzzy_hits.push((tag.as_str(), best)); } } fuzzy_hits.sort_by(|a, b| a.1.cmp(&b.1).then_with(|| a.0.cmp(b.0))); for (tag, _) in fuzzy_hits { if results.len() >= limit { break; } results.push(tag); } results } } impl Default for TagIndex { fn default() -> Self { Self::empty() } } // --------------------------------------------------------------------------- // Tests // --------------------------------------------------------------------------- #[cfg(test)] mod tests { use super::*; // --- TagConfig & validate_with --- const STRICT: TagConfig = TagConfig { max_depth: 3, max_length: 30, semantic_depth: 1, }; const LOOSE: TagConfig = TagConfig { max_depth: 8, max_length: 200, semantic_depth: 0, }; #[test] fn config_enforces_max_depth() { let cfg = TagConfig { max_depth: 3, max_length: 100, semantic_depth: 0, }; assert!(validate_with("a.b.c", &cfg).is_ok()); assert!(validate_with("a.b.c.d", &cfg).is_err()); } #[test] fn config_enforces_max_length() { let cfg = TagConfig { max_depth: 10, max_length: 10, semantic_depth: 0, }; assert!(validate_with("abcdefghij", &cfg).is_ok()); // 10 chars assert!(validate_with("abcdefghijk", &cfg).is_err()); // 11 chars } #[test] fn config_enforces_semantic_depth() { // semantic_depth=1 requires at least 2 segments assert!(validate_with("genre.rock", &STRICT).is_ok()); assert!(validate_with("genre.rock.punk", &STRICT).is_ok()); assert!(validate_with("rock", &STRICT).is_err()); // only 1 segment // semantic_depth=0 allows single segments assert!(validate_with("rock", &LOOSE).is_ok()); } #[test] fn config_semantic_depth_2() { let cfg = TagConfig { max_depth: 5, max_length: 100, semantic_depth: 2, }; assert!(validate_with("a.b.c", &cfg).is_ok()); // 3 segments >= 2+1 assert!(validate_with("a.b", &cfg).is_err()); // 2 segments < 2+1 assert!(validate_with("a", &cfg).is_err()); } // --- Default validate (backwards compat) --- #[test] fn valid_tags() { assert!(validate("genre").is_ok()); assert!(validate("genre.electronic").is_ok()); assert!(validate("genre.electronic.house").is_ok()); assert!(validate("instrument.drum.kick.808").is_ok()); assert!(validate("source.field-recording").is_ok()); assert!(validate("a.b.c.d.e").is_ok()); assert!(validate("a").is_ok()); assert!(validate("123").is_ok()); assert!(validate("a-b-c").is_ok()); } #[test] fn reject_empty() { assert!(validate("").is_err()); } #[test] fn reject_uppercase() { assert!(validate("Genre").is_err()); assert!(validate("genre.Electronic").is_err()); assert!(validate("LOUD").is_err()); } #[test] fn reject_spaces() { assert!(validate("genre electronic").is_err()); assert!(validate(" genre").is_err()); } #[test] fn reject_special_chars() { assert!(validate("genre/electronic").is_err()); assert!(validate("genre:electronic").is_err()); assert!(validate("genre_electronic").is_err()); assert!(validate("genre@electronic").is_err()); } #[test] fn reject_leading_trailing_dots() { assert!(validate(".genre").is_err()); assert!(validate("genre.").is_err()); assert!(validate(".").is_err()); } #[test] fn reject_consecutive_dots() { assert!(validate("genre..electronic").is_err()); } #[test] fn reject_too_many_levels_default() { assert!(validate("a.b.c.d.e.f").is_err()); // 6 levels assert!(validate("a.b.c.d.e").is_ok()); // 5 levels } #[test] fn reject_too_long_default() { let long = "a".repeat(101); assert!(validate(&long).is_err()); let ok = "a".repeat(100); assert!(validate(&ok).is_ok()); } // --- segment --- #[test] fn segment_indices() { assert_eq!(segment("genre.electronic.house", 0), Some("genre")); assert_eq!(segment("genre.electronic.house", 1), Some("electronic")); assert_eq!(segment("genre.electronic.house", 2), Some("house")); assert_eq!(segment("genre.electronic.house", 3), None); } #[test] fn segment_single() { assert_eq!(segment("genre", 0), Some("genre")); assert_eq!(segment("genre", 1), None); } // --- prefix_at_depth --- #[test] fn prefix_at_depth_values() { assert_eq!(prefix_at_depth("a.b.c.d", 0), None); assert_eq!(prefix_at_depth("a.b.c.d", 1), Some("a")); assert_eq!(prefix_at_depth("a.b.c.d", 2), Some("a.b")); assert_eq!(prefix_at_depth("a.b.c.d", 3), Some("a.b.c")); assert_eq!(prefix_at_depth("a.b.c.d", 4), Some("a.b.c.d")); assert_eq!(prefix_at_depth("a.b.c.d", 5), None); } #[test] fn prefix_at_depth_single() { assert_eq!(prefix_at_depth("genre", 1), Some("genre")); assert_eq!(prefix_at_depth("genre", 2), None); } // --- semantic_prefix / free_suffix --- #[test] fn semantic_prefix_depth_1() { assert_eq!(semantic_prefix("genre.electronic.house", 1), Some("genre")); assert_eq!(free_suffix("genre.electronic.house", 1), Some("electronic.house")); } #[test] fn semantic_prefix_depth_2() { assert_eq!(semantic_prefix("type.audio.podcast", 2), Some("type.audio")); assert_eq!(free_suffix("type.audio.podcast", 2), Some("podcast")); } #[test] fn semantic_prefix_depth_0() { assert_eq!(semantic_prefix("genre.rock", 0), None); assert_eq!(free_suffix("genre.rock", 0), Some("genre.rock")); } #[test] fn free_suffix_at_boundary() { // Tag is exactly the semantic prefix length — no suffix assert_eq!(free_suffix("genre", 1), None); assert_eq!(free_suffix("type.audio", 2), None); } #[test] fn free_suffix_empty_tag() { assert_eq!(free_suffix("", 0), None); } // --- Parent --- #[test] fn parent_of_nested() { assert_eq!(parent("a.b.c"), Some("a.b")); assert_eq!(parent("a.b"), Some("a")); } #[test] fn parent_of_root() { assert_eq!(parent("a"), None); } // --- Leaf --- #[test] fn leaf_of_nested() { assert_eq!(leaf("a.b.c"), "c"); assert_eq!(leaf("genre.electronic.house"), "house"); } #[test] fn leaf_of_root() { assert_eq!(leaf("genre"), "genre"); } // --- Depth --- #[test] fn depth_levels() { assert_eq!(depth(""), 0); assert_eq!(depth("a"), 1); assert_eq!(depth("a.b"), 2); assert_eq!(depth("a.b.c"), 3); assert_eq!(depth("a.b.c.d.e"), 5); } // --- Ancestors --- #[test] fn ancestors_chain() { assert_eq!( ancestors("genre.electronic.house"), vec!["genre", "genre.electronic"] ); } #[test] fn ancestors_single_level() { let result: Vec<&str> = ancestors("genre"); assert!(result.is_empty()); } #[test] fn ancestors_two_levels() { assert_eq!(ancestors("genre.rock"), vec!["genre"]); } // --- is_ancestor_of --- #[test] fn ancestor_relationship() { assert!(is_ancestor_of("genre", "genre.electronic")); assert!(is_ancestor_of("genre", "genre.electronic.house")); assert!(is_ancestor_of("genre.electronic", "genre.electronic.house")); } #[test] fn not_ancestor_of_self() { assert!(!is_ancestor_of("genre", "genre")); } #[test] fn not_ancestor_reversed() { assert!(!is_ancestor_of("genre.electronic", "genre")); } #[test] fn not_ancestor_partial_segment() { assert!(!is_ancestor_of("gen", "genre.rock")); assert!(!is_ancestor_of("genre.electro", "genre.electronic.house")); } #[test] fn not_ancestor_unrelated() { assert!(!is_ancestor_of("genre", "mood.dark")); } // --- common_ancestor --- #[test] fn common_ancestor_siblings() { assert_eq!( common_ancestor("genre.electronic.house", "genre.electronic.techno"), Some("genre.electronic") ); } #[test] fn common_ancestor_cousins() { assert_eq!( common_ancestor("genre.rock.punk", "genre.electronic.house"), Some("genre") ); } #[test] fn common_ancestor_parent_child() { assert_eq!(common_ancestor("genre", "genre.rock"), Some("genre")); assert_eq!(common_ancestor("genre.rock", "genre"), Some("genre")); } #[test] fn common_ancestor_none() { assert_eq!(common_ancestor("genre.rock", "mood.dark"), None); } #[test] fn common_ancestor_same_tag() { assert_eq!(common_ancestor("genre", "genre"), None); } #[test] fn common_ancestor_partial_segment_no_match() { assert_eq!(common_ancestor("genre.rock", "genetic.code"), None); } // --- children_at_prefix --- #[test] fn children_at_root() { let tags = [ "genre.electronic.house", "genre.rock", "mood.dark", "instrument.kick", ]; assert_eq!( children_at_prefix("", &tags), vec!["genre", "instrument", "mood"] ); } #[test] fn children_one_level() { let tags = [ "genre.electronic.house", "genre.electronic.techno", "genre.rock", ]; assert_eq!( children_at_prefix("genre", &tags), vec!["electronic", "rock"] ); } #[test] fn children_two_levels() { let tags = [ "genre.electronic.house", "genre.electronic.techno", ]; assert_eq!( children_at_prefix("genre.electronic", &tags), vec!["house", "techno"] ); } #[test] fn children_at_leaf() { let tags = ["genre.electronic.house"]; let result = children_at_prefix("genre.electronic.house", &tags); assert!(result.is_empty()); } #[test] fn children_no_partial_segment_match() { let tags = ["genre.rock"]; let result = children_at_prefix("gen", &tags); assert!(result.is_empty()); } #[test] fn children_with_string_vec() { let tags: Vec = vec![ "genre.rock".to_string(), "genre.electronic".to_string(), ]; assert_eq!(children_at_prefix("genre", &tags), vec!["electronic", "rock"]); } // --- subtree --- #[test] fn subtree_includes_self_and_descendants() { let tags = [ "genre.electronic", "genre.electronic.house", "genre.electronic.techno", "genre.rock", "mood.dark", ]; assert_eq!( subtree("genre.electronic", &tags), vec![ "genre.electronic", "genre.electronic.house", "genre.electronic.techno", ] ); } #[test] fn subtree_no_partial_segment() { let tags = ["genre.rock", "gen.something"]; assert_eq!(subtree("gen", &tags), vec!["gen.something"]); } #[test] fn subtree_empty() { let tags = ["genre.rock", "mood.dark"]; let result = subtree("instrument", &tags); assert!(result.is_empty()); } // --- escape_like --- #[test] fn escape_like_special_chars() { assert_eq!(escape_like("100%"), r"100\%"); assert_eq!(escape_like("a_b"), r"a\_b"); assert_eq!(escape_like(r"a\b"), r"a\\b"); assert_eq!(escape_like("normal"), "normal"); } // --- like_descendant_pattern --- #[test] fn like_descendant_pattern_simple() { assert_eq!(like_descendant_pattern("genre"), "genre.%"); assert_eq!(like_descendant_pattern("genre.electronic"), "genre.electronic.%"); } #[test] fn like_descendant_pattern_escapes() { assert_eq!(like_descendant_pattern("a%b"), r"a\%b.%"); } // --- rename_prefix --- #[test] fn rename_descendant() { assert_eq!( rename_prefix("genre.electronic", "genre.dance", "genre.electronic.house"), Some("genre.dance.house".to_string()) ); } #[test] fn rename_exact_match() { assert_eq!( rename_prefix("genre.electronic", "genre.dance", "genre.electronic"), Some("genre.dance".to_string()) ); } #[test] fn rename_no_match() { assert_eq!( rename_prefix("genre.electronic", "genre.dance", "mood.dark"), None ); } #[test] fn rename_partial_segment_no_match() { assert_eq!( rename_prefix("gen", "xxx", "genre.rock"), None ); } #[test] fn rename_preserves_deep_suffix() { assert_eq!( rename_prefix("a", "x", "a.b.c.d"), Some("x.b.c.d".to_string()) ); } // --- TagIndex --- fn test_index() -> TagIndex { TagIndex::new(vec![ "genre.electronic.house".into(), "genre.electronic.techno".into(), "genre.rock".into(), "genre.rock.punk".into(), "instrument.drum.kick".into(), "instrument.drum.snare".into(), "instrument.synth".into(), "mood.dark".into(), "mood.upbeat".into(), "source.field-recording".into(), ]) } #[test] fn index_new_deduplicates_and_sorts() { let idx = TagIndex::new(vec![ "b".into(), "a".into(), "b".into(), "c".into(), ]); assert_eq!(idx.len(), 3); assert_eq!(idx.iter().collect::>(), vec!["a", "b", "c"]); } #[test] fn index_contains() { let idx = test_index(); assert!(idx.contains("genre.rock")); assert!(idx.contains("mood.dark")); assert!(!idx.contains("genre")); assert!(!idx.contains("nonexistent")); } #[test] fn index_insert_and_remove() { let mut idx = TagIndex::empty(); assert!(idx.is_empty()); idx.insert("genre.rock".into()); assert_eq!(idx.len(), 1); assert!(idx.contains("genre.rock")); // Idempotent insert idx.insert("genre.rock".into()); assert_eq!(idx.len(), 1); // Remove assert!(idx.remove("genre.rock")); assert!(!idx.contains("genre.rock")); assert!(idx.is_empty()); // Remove nonexistent assert!(!idx.remove("nope")); } #[test] fn index_rebuild() { let mut idx = test_index(); assert_eq!(idx.len(), 10); idx.rebuild(vec!["a".into(), "b".into()]); assert_eq!(idx.len(), 2); assert_eq!(idx.iter().collect::>(), vec!["a", "b"]); } // --- suggest: tier 1 (path prefix) --- #[test] fn suggest_full_path_prefix() { let idx = test_index(); assert_eq!( idx.suggest("genre.electronic", 10), vec!["genre.electronic.house", "genre.electronic.techno"] ); } #[test] fn suggest_partial_segment_in_path() { let idx = test_index(); // "genre.el" matches "genre.electronic.*" assert_eq!( idx.suggest("genre.el", 10), vec!["genre.electronic.house", "genre.electronic.techno"] ); } #[test] fn suggest_root_prefix() { let idx = test_index(); let results = idx.suggest("genre", 10); assert_eq!(results, vec![ "genre.electronic.house", "genre.electronic.techno", "genre.rock", "genre.rock.punk", ]); } #[test] fn suggest_exact_tag() { let idx = test_index(); // "mood.dark" is an exact tag — should still appear assert_eq!(idx.suggest("mood.dark", 10), vec!["mood.dark"]); } #[test] fn suggest_no_match() { let idx = test_index(); assert!(idx.suggest("zzz", 10).is_empty()); } #[test] fn suggest_empty_input() { let idx = test_index(); assert!(idx.suggest("", 10).is_empty()); } #[test] fn suggest_zero_limit() { let idx = test_index(); assert!(idx.suggest("genre", 0).is_empty()); } #[test] fn suggest_respects_limit() { let idx = test_index(); let results = idx.suggest("genre", 2); assert_eq!(results.len(), 2); assert_eq!(results, vec!["genre.electronic.house", "genre.electronic.techno"]); } // --- suggest: tier 2 (segment prefix) --- #[test] fn suggest_segment_prefix_mid_tag() { let idx = test_index(); // "hou" doesn't match any tag path-prefix, but "house" starts with "hou" assert_eq!(idx.suggest("hou", 10), vec!["genre.electronic.house"]); } #[test] fn suggest_segment_prefix_finds_multiple() { let idx = test_index(); // "drum" matches instrument.drum.kick and instrument.drum.snare via segment // But it also matches as a path prefix of "instrument.drum" — wait, no. // "drum" as path prefix: tags starting with "drum" — none. // "drum" as segment prefix: "instrument.drum.kick" has segment "drum". assert_eq!( idx.suggest("drum", 10), vec!["instrument.drum.kick", "instrument.drum.snare"] ); } #[test] fn suggest_segment_prefix_partial() { let idx = test_index(); // "dar" matches "mood.dark" via segment "dark" starting with "dar" assert_eq!(idx.suggest("dar", 10), vec!["mood.dark"]); } #[test] fn suggest_no_segment_scan_when_input_has_dot() { let idx = test_index(); // "electronic.hou" is a dotted input — tier 2 should NOT run // This means only path-prefix matching, which finds nothing // (no tag starts with "electronic.hou") assert!(idx.suggest("electronic.hou", 10).is_empty()); } #[test] fn suggest_path_prefix_before_segment_prefix() { let idx = TagIndex::new(vec![ "rock.classic".into(), "genre.rock".into(), "genre.rock.punk".into(), ]); // "rock" matches "rock.classic" as path prefix (tier 1) // and "genre.rock", "genre.rock.punk" as segment prefix (tier 2) let results = idx.suggest("rock", 10); assert_eq!(results, vec![ "rock.classic", // tier 1 "genre.rock", // tier 2 "genre.rock.punk", // tier 2 ]); } #[test] fn suggest_tier2_skips_tier1_matches() { let idx = TagIndex::new(vec![ "mood.dark".into(), "mood.dark.brooding".into(), ]); // "mood" matches both via path prefix. Tier 2 should not duplicate. let results = idx.suggest("mood", 10); assert_eq!(results, vec!["mood.dark", "mood.dark.brooding"]); } #[test] fn suggest_segment_only_matches_segment_start() { let idx = test_index(); // "ock" should NOT match "genre.rock" — we match segment starts, not substrings assert!(idx.suggest("ock", 10).is_empty()); } // --- suggest_with_status --- #[test] fn suggest_with_status_exact_exists() { let idx = test_index(); let (suggestions, exact) = idx.suggest_with_status("genre.rock", 10); assert!(exact); assert!(suggestions.contains(&"genre.rock")); } #[test] fn suggest_with_status_exact_missing() { let idx = test_index(); let (suggestions, exact) = idx.suggest_with_status("genre.el", 10); assert!(!exact); assert!(!suggestions.is_empty()); } #[test] fn suggest_with_status_novel_tag() { let idx = test_index(); let (_suggestions, exact) = idx.suggest_with_status("brand-new-tag", 10); assert!(!exact); } // --- edit_distance --- #[test] fn edit_distance_identical() { assert_eq!(edit_distance("genre", "genre", 2), Some(0)); } #[test] fn edit_distance_single_substitution() { assert_eq!(edit_distance("rock", "rack", 2), Some(1)); } #[test] fn edit_distance_single_insertion() { assert_eq!(edit_distance("genr", "genre", 2), Some(1)); } #[test] fn edit_distance_single_deletion() { assert_eq!(edit_distance("genre", "genr", 2), Some(1)); } #[test] fn edit_distance_transposition_as_two_ops() { // "ab" → "ba" is 2 (sub+sub), not 1 (no Damerau) assert_eq!(edit_distance("ab", "ba", 2), Some(2)); } #[test] fn edit_distance_exceeds_threshold() { assert_eq!(edit_distance("abc", "xyz", 2), None); } #[test] fn edit_distance_length_shortcircuit() { assert_eq!(edit_distance("a", "abcd", 2), None); } #[test] fn edit_distance_empty_strings() { assert_eq!(edit_distance("", "", 2), Some(0)); assert_eq!(edit_distance("", "ab", 2), Some(2)); assert_eq!(edit_distance("ab", "", 2), Some(2)); assert_eq!(edit_distance("", "abc", 2), None); } // --- suggest_fuzzy --- #[test] fn suggest_fuzzy_typo_in_root_segment() { let idx = test_index(); // "genr" → "genre" (distance 1) let results = idx.suggest_fuzzy("genr", 10); assert!(results.contains(&"genre.electronic.house")); assert!(results.contains(&"genre.rock")); } #[test] fn suggest_fuzzy_typo_in_leaf_segment() { let idx = test_index(); // "hous" → "house" (distance 1) let results = idx.suggest_fuzzy("hous", 10); assert!(results.contains(&"genre.electronic.house")); } #[test] fn suggest_fuzzy_exact_prefix_first() { let idx = test_index(); // "mood" matches exactly via tier 1 path prefix let results = idx.suggest_fuzzy("mood", 10); assert_eq!(results[0], "mood.dark"); assert_eq!(results[1], "mood.upbeat"); } #[test] fn suggest_fuzzy_no_false_positives() { let idx = test_index(); // "zzz" is too far from anything assert!(idx.suggest_fuzzy("zzz", 10).is_empty()); } #[test] fn suggest_fuzzy_respects_limit() { let idx = test_index(); let results = idx.suggest_fuzzy("genr", 2); assert_eq!(results.len(), 2); } #[test] fn suggest_fuzzy_distance_2() { let idx = test_index(); // "drak" → "dark" (distance 2: transposition = sub+sub) let results = idx.suggest_fuzzy("drak", 10); assert!(results.contains(&"mood.dark")); } #[test] fn suggest_fuzzy_empty_input() { let idx = test_index(); assert!(idx.suggest_fuzzy("", 10).is_empty()); } #[test] fn suggest_fuzzy_sorted_by_distance() { let idx = TagIndex::new(vec![ "mood.dark".into(), "mood.dork".into(), "mood.dxxx".into(), ]); // "dark" → segment "dark"=0 (tier 2 hit), "dork"=1, "dxxx"=3 (exceeds threshold) let results = idx.suggest_fuzzy("dark", 10); // tier 2 picks up mood.dark (segment "dark" starts_with "dark") // tier 3 fuzzy adds mood.dork (distance 1) // mood.dxxx not matched (distance > 2) assert_eq!(results.len(), 2); assert_eq!(results[0], "mood.dark"); assert_eq!(results[1], "mood.dork"); } // --- rename_prefix_bulk --- #[test] fn rename_prefix_bulk_renames_matching() { let mut idx = TagIndex::new(vec![ "genre.electronic.house".into(), "genre.electronic.techno".into(), "genre.rock".into(), ]); assert_eq!(rename_prefix_bulk("genre.electronic", "genre.dance", &mut idx), 2); assert!(idx.contains("genre.dance.house")); assert!(idx.contains("genre.dance.techno")); assert!(idx.contains("genre.rock")); assert!(!idx.contains("genre.electronic.house")); } #[test] fn rename_prefix_bulk_no_match() { let mut idx = TagIndex::new(vec!["genre.rock".into()]); assert_eq!(rename_prefix_bulk("mood", "vibe", &mut idx), 0); assert!(idx.contains("genre.rock")); } // --- remove_subtree --- #[test] fn remove_subtree_removes_prefix_and_descendants() { let mut idx = TagIndex::new(vec![ "genre.electronic.house".into(), "genre.electronic.techno".into(), "genre.electronic".into(), "genre.rock".into(), ]); assert_eq!(remove_subtree("genre.electronic", &mut idx), 3); assert_eq!(idx.len(), 1); assert!(idx.contains("genre.rock")); } #[test] fn remove_subtree_no_match() { let mut idx = TagIndex::new(vec!["genre.rock".into()]); assert_eq!(remove_subtree("mood", &mut idx), 0); assert_eq!(idx.len(), 1); } // --- merge_tags --- #[test] fn merge_tags_replaces_source() { let mut idx = TagIndex::new(vec![ "genre.electronic".into(), "genre.rock".into(), ]); assert_eq!(merge_tags("genre.electronic", "genre.dance", &mut idx), 1); assert!(idx.contains("genre.dance")); assert!(idx.contains("genre.rock")); assert!(!idx.contains("genre.electronic")); } #[test] fn merge_tags_deduplicates() { let mut idx = TagIndex::new(vec![ "genre.electronic".into(), "genre.dance".into(), ]); assert_eq!(merge_tags("genre.electronic", "genre.dance", &mut idx), 1); assert_eq!(idx.len(), 1); assert!(idx.contains("genre.dance")); } #[test] fn merge_tags_source_missing() { let mut idx = TagIndex::new(vec!["genre.rock".into()]); assert_eq!(merge_tags("genre.electronic", "genre.dance", &mut idx), 0); assert_eq!(idx.len(), 1); } // --- batch_rename --- #[test] fn batch_rename_multiple_ops() { let mut idx = TagIndex::new(vec![ "genre.electronic.house".into(), "mood.dark".into(), ]); let ops = [("genre.electronic", "genre.dance"), ("mood", "vibe")]; assert_eq!(batch_rename(&ops, &mut idx), 2); assert!(idx.contains("genre.dance.house")); assert!(idx.contains("vibe.dark")); } #[test] fn batch_rename_chained() { let mut idx = TagIndex::new(vec!["a.b.c".into()]); let ops = [("a", "x"), ("x.b", "x.y")]; assert_eq!(batch_rename(&ops, &mut idx), 2); assert!(idx.contains("x.y.c")); } }