Skip to main content

max / tagtree

Initial commit: tagtree v0.3.0 Hierarchical dot-notation tag standard. 99 tests, criterion benchmarks, 2 deps (serde, thiserror). Integrated into AF, GO, BB, MT, MNW. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Author: Max J. <87768334+MaxJMath@users.noreply.github.com> · 2026-03-22 04:30 UTC
Commit: d589b200eb45166fbfc9406d11b92324bf1b2caa
5 files changed, +1436 insertions, -0 deletions
A .gitignore +2
@@ -0,0 +1,2 @@
1 + /target/
2 + .DS_Store
A Cargo.lock +500
@@ -0,0 +1,561 @@
1 + # This file is automatically @generated by Cargo.
2 + # It is not intended for manual editing.
3 + version = 4
4 +
5 + [[package]]
6 + name = "aho-corasick"
7 + version = "1.1.4"
8 + source = "registry+https://github.com/rust-lang/crates.io-index"
9 + checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
10 + dependencies = [
11 + "memchr",
12 + ]
13 +
14 + [[package]]
15 + name = "anes"
16 + version = "0.1.6"
17 + source = "registry+https://github.com/rust-lang/crates.io-index"
18 + checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"
19 +
20 + [[package]]
21 + name = "anstyle"
22 + version = "1.0.14"
23 + source = "registry+https://github.com/rust-lang/crates.io-index"
24 + checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000"
25 +
26 + [[package]]
27 + name = "autocfg"
28 + version = "1.5.0"
29 + source = "registry+https://github.com/rust-lang/crates.io-index"
30 + checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
31 +
32 + [[package]]
33 + name = "bumpalo"
34 + version = "3.20.2"
35 + source = "registry+https://github.com/rust-lang/crates.io-index"
36 + checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb"
37 +
38 + [[package]]
39 + name = "cast"
40 + version = "0.3.0"
41 + source = "registry+https://github.com/rust-lang/crates.io-index"
42 + checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
43 +
44 + [[package]]
45 + name = "cfg-if"
46 + version = "1.0.4"
47 + source = "registry+https://github.com/rust-lang/crates.io-index"
48 + checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
49 +
50 + [[package]]
51 + name = "ciborium"
52 + version = "0.2.2"
53 + source = "registry+https://github.com/rust-lang/crates.io-index"
54 + checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e"
55 + dependencies = [
56 + "ciborium-io",
57 + "ciborium-ll",
58 + "serde",
59 + ]
60 +
61 + [[package]]
62 + name = "ciborium-io"
63 + version = "0.2.2"
64 + source = "registry+https://github.com/rust-lang/crates.io-index"
65 + checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757"
66 +
67 + [[package]]
68 + name = "ciborium-ll"
69 + version = "0.2.2"
70 + source = "registry+https://github.com/rust-lang/crates.io-index"
71 + checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9"
72 + dependencies = [
73 + "ciborium-io",
74 + "half",
75 + ]
76 +
77 + [[package]]
78 + name = "clap"
79 + version = "4.6.0"
80 + source = "registry+https://github.com/rust-lang/crates.io-index"
81 + checksum = "b193af5b67834b676abd72466a96c1024e6a6ad978a1f484bd90b85c94041351"
82 + dependencies = [
83 + "clap_builder",
84 + ]
85 +
86 + [[package]]
87 + name = "clap_builder"
88 + version = "4.6.0"
89 + source = "registry+https://github.com/rust-lang/crates.io-index"
90 + checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f"
91 + dependencies = [
92 + "anstyle",
93 + "clap_lex",
94 + ]
95 +
96 + [[package]]
97 + name = "clap_lex"
98 + version = "1.1.0"
99 + source = "registry+https://github.com/rust-lang/crates.io-index"
100 + checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9"
101 +
102 + [[package]]
103 + name = "criterion"
104 + version = "0.5.1"
105 + source = "registry+https://github.com/rust-lang/crates.io-index"
106 + checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f"
107 + dependencies = [
108 + "anes",
109 + "cast",
110 + "ciborium",
111 + "clap",
112 + "criterion-plot",
113 + "is-terminal",
114 + "itertools",
115 + "num-traits",
116 + "once_cell",
117 + "oorandom",
118 + "plotters",
119 + "rayon",
120 + "regex",
121 + "serde",
122 + "serde_derive",
123 + "serde_json",
124 + "tinytemplate",
125 + "walkdir",
126 + ]
127 +
128 + [[package]]
129 + name = "criterion-plot"
130 + version = "0.5.0"
131 + source = "registry+https://github.com/rust-lang/crates.io-index"
132 + checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1"
133 + dependencies = [
134 + "cast",
135 + "itertools",
136 + ]
137 +
138 + [[package]]
139 + name = "crossbeam-deque"
140 + version = "0.8.6"
141 + source = "registry+https://github.com/rust-lang/crates.io-index"
142 + checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
143 + dependencies = [
144 + "crossbeam-epoch",
145 + "crossbeam-utils",
146 + ]
147 +
148 + [[package]]
149 + name = "crossbeam-epoch"
150 + version = "0.9.18"
151 + source = "registry+https://github.com/rust-lang/crates.io-index"
152 + checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
153 + dependencies = [
154 + "crossbeam-utils",
155 + ]
156 +
157 + [[package]]
158 + name = "crossbeam-utils"
159 + version = "0.8.21"
160 + source = "registry+https://github.com/rust-lang/crates.io-index"
161 + checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
162 +
163 + [[package]]
164 + name = "crunchy"
165 + version = "0.2.4"
166 + source = "registry+https://github.com/rust-lang/crates.io-index"
167 + checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5"
168 +
169 + [[package]]
170 + name = "either"
171 + version = "1.15.0"
172 + source = "registry+https://github.com/rust-lang/crates.io-index"
173 + checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
174 +
175 + [[package]]
176 + name = "half"
177 + version = "2.7.1"
178 + source = "registry+https://github.com/rust-lang/crates.io-index"
179 + checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b"
180 + dependencies = [
181 + "cfg-if",
182 + "crunchy",
183 + "zerocopy",
184 + ]
185 +
186 + [[package]]
187 + name = "hermit-abi"
188 + version = "0.5.2"
189 + source = "registry+https://github.com/rust-lang/crates.io-index"
190 + checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c"
191 +
192 + [[package]]
193 + name = "is-terminal"
194 + version = "0.4.17"
195 + source = "registry+https://github.com/rust-lang/crates.io-index"
196 + checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46"
197 + dependencies = [
198 + "hermit-abi",
199 + "libc",
200 + "windows-sys",
201 + ]
202 +
203 + [[package]]
204 + name = "itertools"
205 + version = "0.10.5"
206 + source = "registry+https://github.com/rust-lang/crates.io-index"
207 + checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473"
208 + dependencies = [
209 + "either",
210 + ]
211 +
212 + [[package]]
213 + name = "itoa"
214 + version = "1.0.18"
215 + source = "registry+https://github.com/rust-lang/crates.io-index"
216 + checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682"
217 +
218 + [[package]]
219 + name = "js-sys"
220 + version = "0.3.91"
221 + source = "registry+https://github.com/rust-lang/crates.io-index"
222 + checksum = "b49715b7073f385ba4bc528e5747d02e66cb39c6146efb66b781f131f0fb399c"
223 + dependencies = [
224 + "once_cell",
225 + "wasm-bindgen",
226 + ]
227 +
228 + [[package]]
229 + name = "libc"
230 + version = "0.2.183"
231 + source = "registry+https://github.com/rust-lang/crates.io-index"
232 + checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d"
233 +
234 + [[package]]
235 + name = "memchr"
236 + version = "2.8.0"
237 + source = "registry+https://github.com/rust-lang/crates.io-index"
238 + checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
239 +
240 + [[package]]
241 + name = "num-traits"
242 + version = "0.2.19"
243 + source = "registry+https://github.com/rust-lang/crates.io-index"
244 + checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
245 + dependencies = [
246 + "autocfg",
247 + ]
248 +
249 + [[package]]
250 + name = "once_cell"
251 + version = "1.21.4"
252 + source = "registry+https://github.com/rust-lang/crates.io-index"
253 + checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50"
254 +
255 + [[package]]
256 + name = "oorandom"
257 + version = "11.1.5"
258 + source = "registry+https://github.com/rust-lang/crates.io-index"
259 + checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e"
260 +
261 + [[package]]
262 + name = "plotters"
263 + version = "0.3.7"
264 + source = "registry+https://github.com/rust-lang/crates.io-index"
265 + checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747"
266 + dependencies = [
267 + "num-traits",
268 + "plotters-backend",
269 + "plotters-svg",
270 + "wasm-bindgen",
271 + "web-sys",
272 + ]
273 +
274 + [[package]]
275 + name = "plotters-backend"
276 + version = "0.3.7"
277 + source = "registry+https://github.com/rust-lang/crates.io-index"
278 + checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a"
279 +
280 + [[package]]
281 + name = "plotters-svg"
282 + version = "0.3.7"
283 + source = "registry+https://github.com/rust-lang/crates.io-index"
284 + checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670"
285 + dependencies = [
286 + "plotters-backend",
287 + ]
288 +
289 + [[package]]
290 + name = "proc-macro2"
291 + version = "1.0.106"
292 + source = "registry+https://github.com/rust-lang/crates.io-index"
293 + checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
294 + dependencies = [
295 + "unicode-ident",
296 + ]
297 +
298 + [[package]]
299 + name = "quote"
300 + version = "1.0.45"
301 + source = "registry+https://github.com/rust-lang/crates.io-index"
302 + checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924"
303 + dependencies = [
304 + "proc-macro2",
305 + ]
306 +
307 + [[package]]
308 + name = "rayon"
309 + version = "1.11.0"
310 + source = "registry+https://github.com/rust-lang/crates.io-index"
311 + checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f"
312 + dependencies = [
313 + "either",
314 + "rayon-core",
315 + ]
316 +
317 + [[package]]
318 + name = "rayon-core"
319 + version = "1.13.0"
320 + source = "registry+https://github.com/rust-lang/crates.io-index"
321 + checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91"
322 + dependencies = [
323 + "crossbeam-deque",
324 + "crossbeam-utils",
325 + ]
326 +
327 + [[package]]
328 + name = "regex"
329 + version = "1.12.3"
330 + source = "registry+https://github.com/rust-lang/crates.io-index"
331 + checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276"
332 + dependencies = [
333 + "aho-corasick",
334 + "memchr",
335 + "regex-automata",
336 + "regex-syntax",
337 + ]
338 +
339 + [[package]]
340 + name = "regex-automata"
341 + version = "0.4.14"
342 + source = "registry+https://github.com/rust-lang/crates.io-index"
343 + checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f"
344 + dependencies = [
345 + "aho-corasick",
346 + "memchr",
347 + "regex-syntax",
348 + ]
349 +
350 + [[package]]
351 + name = "regex-syntax"
352 + version = "0.8.10"
353 + source = "registry+https://github.com/rust-lang/crates.io-index"
354 + checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a"
355 +
356 + [[package]]
357 + name = "rustversion"
358 + version = "1.0.22"
359 + source = "registry+https://github.com/rust-lang/crates.io-index"
360 + checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
361 +
362 + [[package]]
363 + name = "same-file"
364 + version = "1.0.6"
365 + source = "registry+https://github.com/rust-lang/crates.io-index"
366 + checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
367 + dependencies = [
368 + "winapi-util",
369 + ]
370 +
371 + [[package]]
372 + name = "serde"
373 + version = "1.0.228"
374 + source = "registry+https://github.com/rust-lang/crates.io-index"
375 + checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
376 + dependencies = [
377 + "serde_core",
378 + "serde_derive",
379 + ]
380 +
381 + [[package]]
382 + name = "serde_core"
383 + version = "1.0.228"
384 + source = "registry+https://github.com/rust-lang/crates.io-index"
385 + checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
386 + dependencies = [
387 + "serde_derive",
388 + ]
389 +
390 + [[package]]
391 + name = "serde_derive"
392 + version = "1.0.228"
393 + source = "registry+https://github.com/rust-lang/crates.io-index"
394 + checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
395 + dependencies = [
396 + "proc-macro2",
397 + "quote",
398 + "syn",
399 + ]
400 +
401 + [[package]]
402 + name = "serde_json"
403 + version = "1.0.149"
404 + source = "registry+https://github.com/rust-lang/crates.io-index"
405 + checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86"
406 + dependencies = [
407 + "itoa",
408 + "memchr",
409 + "serde",
410 + "serde_core",
411 + "zmij",
412 + ]
413 +
414 + [[package]]
415 + name = "syn"
416 + version = "2.0.117"
417 + source = "registry+https://github.com/rust-lang/crates.io-index"
418 + checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99"
419 + dependencies = [
420 + "proc-macro2",
421 + "quote",
422 + "unicode-ident",
423 + ]
424 +
425 + [[package]]
426 + name = "tagtree"
427 + version = "0.3.0"
428 + dependencies = [
429 + "criterion",
430 + ]
431 +
432 + [[package]]
433 + name = "tinytemplate"
434 + version = "1.2.1"
435 + source = "registry+https://github.com/rust-lang/crates.io-index"
436 + checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc"
437 + dependencies = [
438 + "serde",
439 + "serde_json",
440 + ]
441 +
442 + [[package]]
443 + name = "unicode-ident"
444 + version = "1.0.24"
445 + source = "registry+https://github.com/rust-lang/crates.io-index"
446 + checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
447 +
448 + [[package]]
449 + name = "walkdir"
450 + version = "2.5.0"
451 + source = "registry+https://github.com/rust-lang/crates.io-index"
452 + checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b"
453 + dependencies = [
454 + "same-file",
455 + "winapi-util",
456 + ]
457 +
458 + [[package]]
459 + name = "wasm-bindgen"
460 + version = "0.2.114"
461 + source = "registry+https://github.com/rust-lang/crates.io-index"
462 + checksum = "6532f9a5c1ece3798cb1c2cfdba640b9b3ba884f5db45973a6f442510a87d38e"
463 + dependencies = [
464 + "cfg-if",
465 + "once_cell",
466 + "rustversion",
467 + "wasm-bindgen-macro",
468 + "wasm-bindgen-shared",
469 + ]
470 +
471 + [[package]]
472 + name = "wasm-bindgen-macro"
473 + version = "0.2.114"
474 + source = "registry+https://github.com/rust-lang/crates.io-index"
475 + checksum = "18a2d50fcf105fb33bb15f00e7a77b772945a2ee45dcf454961fd843e74c18e6"
476 + dependencies = [
477 + "quote",
478 + "wasm-bindgen-macro-support",
479 + ]
480 +
481 + [[package]]
482 + name = "wasm-bindgen-macro-support"
483 + version = "0.2.114"
484 + source = "registry+https://github.com/rust-lang/crates.io-index"
485 + checksum = "03ce4caeaac547cdf713d280eda22a730824dd11e6b8c3ca9e42247b25c631e3"
486 + dependencies = [
487 + "bumpalo",
488 + "proc-macro2",
489 + "quote",
490 + "syn",
491 + "wasm-bindgen-shared",
492 + ]
493 +
494 + [[package]]
495 + name = "wasm-bindgen-shared"
496 + version = "0.2.114"
497 + source = "registry+https://github.com/rust-lang/crates.io-index"
498 + checksum = "75a326b8c223ee17883a4251907455a2431acc2791c98c26279376490c378c16"
499 + dependencies = [
500 + "unicode-ident",
Lines truncated
A Cargo.toml +13
@@ -0,0 +1,13 @@
1 + [package]
2 + name = "tagtree"
3 + version = "0.3.0"
4 + edition = "2024"
5 + description = "Hierarchical dot-notation tag standard: validation, parsing, tree operations, SQL helpers"
6 + license-file = "LICENSE"
7 +
8 + [dev-dependencies]
9 + criterion = { version = "0.5", features = ["html_reports"] }
10 +
11 + [[bench]]
12 + name = "tagtree_bench"
13 + harness = false
@@ -0,0 +1,421 @@
1 + use criterion::{black_box, criterion_group, criterion_main, Criterion};
2 + use tagtree::{TagConfig, TagIndex};
3 +
4 + const AF_CONFIG: TagConfig = TagConfig {
5 + max_depth: 5,
6 + max_length: 100,
7 + semantic_depth: 0,
8 + };
9 +
10 + const GO_CONFIG: TagConfig = TagConfig {
11 + max_depth: 3,
12 + max_length: 60,
13 + semantic_depth: 0,
14 + };
15 +
16 + fn sample_tags() -> Vec<String> {
17 + vec![
18 + "genre", "genre.electronic", "genre.electronic.house",
19 + "genre.electronic.techno", "genre.electronic.ambient",
20 + "genre.rock", "genre.rock.indie", "genre.rock.post-punk",
21 + "genre.jazz", "genre.jazz.bebop", "genre.jazz.fusion",
22 + "mood", "mood.energetic", "mood.calm", "mood.dark",
23 + "source", "source.vinyl", "source.digital", "source.tape",
24 + "instrument", "instrument.guitar", "instrument.synth",
25 + "instrument.drums", "instrument.bass",
26 + "era", "era.60s", "era.70s", "era.80s", "era.90s", "era.2000s",
27 + "bpm.slow", "bpm.medium", "bpm.fast",
28 + "key.c-major", "key.a-minor", "key.d-major",
29 + "quality.high", "quality.medium", "quality.low",
30 + "format.wav", "format.flac", "format.mp3", "format.aiff",
31 + ]
32 + .into_iter()
33 + .map(String::from)
34 + .collect()
35 + }
36 +
37 + fn bench_validate(c: &mut Criterion) {
38 + let mut group = c.benchmark_group("validate");
39 +
40 + group.bench_function("simple_tag", |b| {
41 + b.iter(|| tagtree::validate_with(black_box("genre"), &AF_CONFIG));
42 + });
43 +
44 + group.bench_function("deep_tag", |b| {
45 + b.iter(|| {
46 + tagtree::validate_with(black_box("genre.electronic.house"), &AF_CONFIG)
47 + });
48 + });
49 +
50 + group.bench_function("max_depth_tag", |b| {
51 + b.iter(|| {
52 + tagtree::validate_with(black_box("a.b.c.d.e"), &AF_CONFIG)
53 + });
54 + });
55 +
56 + group.bench_function("invalid_tag", |b| {
57 + b.iter(|| tagtree::validate_with(black_box("UPPER.case"), &AF_CONFIG));
58 + });
59 +
60 + group.bench_function("long_tag", |b| {
61 + let tag = "abcdefghij.abcdefghij.abcdefghij.abcdefghij.abcdefghij";
62 + b.iter(|| tagtree::validate_with(black_box(tag), &AF_CONFIG));
63 + });
64 +
65 + group.bench_function("go_config", |b| {
66 + b.iter(|| tagtree::validate_with(black_box("work.meeting"), &GO_CONFIG));
67 + });
68 +
69 + group.finish();
70 + }
71 +
72 + fn bench_hierarchy(c: &mut Criterion) {
73 + let mut group = c.benchmark_group("hierarchy");
74 +
75 + group.bench_function("parent", |b| {
76 + b.iter(|| tagtree::parent(black_box("genre.electronic.house")));
77 + });
78 +
79 + group.bench_function("leaf", |b| {
80 + b.iter(|| tagtree::leaf(black_box("genre.electronic.house")));
81 + });
82 +
83 + group.bench_function("depth", |b| {
84 + b.iter(|| tagtree::depth(black_box("genre.electronic.house")));
85 + });
86 +
87 + group.bench_function("ancestors", |b| {
88 + b.iter(|| tagtree::ancestors(black_box("genre.electronic.house")));
89 + });
90 +
91 + group.bench_function("is_ancestor_of", |b| {
92 + b.iter(|| {
93 + tagtree::is_ancestor_of(
94 + black_box("genre.electronic"),
95 + black_box("genre.electronic.house"),
96 + )
97 + });
98 + });
99 +
100 + group.bench_function("common_ancestor", |b| {
101 + b.iter(|| {
102 + tagtree::common_ancestor(
103 + black_box("genre.electronic.house"),
104 + black_box("genre.electronic.techno"),
105 + )
106 + });
107 + });
108 +
109 + group.finish();
110 + }
111 +
112 + fn bench_sql(c: &mut Criterion) {
113 + let mut group = c.benchmark_group("sql");
114 +
115 + group.bench_function("escape_like", |b| {
116 + b.iter(|| tagtree::escape_like(black_box("genre%_electronic")));
117 + });
118 +
119 + group.bench_function("like_descendant_pattern", |b| {
120 + b.iter(|| tagtree::like_descendant_pattern(black_box("genre.electronic")));
121 + });
122 +
123 + group.finish();
124 + }
125 +
126 + fn bench_tree_ops(c: &mut Criterion) {
127 + let tags = sample_tags();
128 + let mut group = c.benchmark_group("tree_ops");
129 +
130 + group.bench_function("children_at_prefix", |b| {
131 + b.iter(|| tagtree::children_at_prefix(black_box("genre"), &tags));
132 + });
133 +
134 + group.bench_function("subtree", |b| {
135 + b.iter(|| tagtree::subtree(black_box("genre"), &tags));
136 + });
137 +
138 + group.bench_function("rename_prefix", |b| {
139 + b.iter(|| {
140 + tagtree::rename_prefix(
141 + black_box("genre.electronic"),
142 + black_box("style.electronic"),
143 + black_box("genre.electronic.house"),
144 + )
145 + });
146 + });
147 +
148 + group.finish();
149 + }
150 +
151 + fn bench_tag_index(c: &mut Criterion) {
152 + let tags = sample_tags();
153 + let mut group = c.benchmark_group("tag_index");
154 +
155 + group.bench_function("build_index", |b| {
156 + b.iter(|| TagIndex::new(black_box(tags.clone())));
157 + });
158 +
159 + let index = TagIndex::new(tags);
160 +
161 + group.bench_function("suggest_prefix", |b| {
162 + b.iter(|| index.suggest(black_box("gen"), 5));
163 + });
164 +
165 + group.bench_function("suggest_path", |b| {
166 + b.iter(|| index.suggest(black_box("genre.e"), 5));
167 + });
168 +
169 + group.bench_function("suggest_exact", |b| {
170 + b.iter(|| index.suggest(black_box("genre.electronic"), 5));
171 + });
172 +
173 + group.bench_function("suggest_no_match", |b| {
174 + b.iter(|| index.suggest(black_box("zzz"), 5));
175 + });
176 +
177 + group.finish();
178 + }
179 +
180 + // ---------------------------------------------------------------------------
181 + // Large-scale benchmarks (1K, 10K, 50K tags)
182 + // ---------------------------------------------------------------------------
183 +
184 + /// Generate a hierarchical tag set of approximately `target` tags.
185 + ///
186 + /// Structure: `{root}.{mid}.{leaf}` with configurable fan-out.
187 + fn generate_tags(target: usize) -> Vec<String> {
188 + let roots = [
189 + "genre", "mood", "source", "instrument", "era", "bpm", "key",
190 + "quality", "format", "region", "label", "artist", "album",
191 + "technique", "effect", "tempo", "scale", "texture", "timbre",
192 + "dynamic",
193 + ];
194 +
195 + let mut tags = Vec::with_capacity(target);
196 +
197 + // How many children per node needed to reach target
198 + // ~20 roots * mid * leaf ≈ target, so mid ≈ sqrt(target / 20)
199 + let fan = ((target as f64 / roots.len() as f64).sqrt().ceil() as usize).max(2);
200 +
201 + for root in &roots {
202 + tags.push(root.to_string());
203 + for m in 0..fan {
204 + let mid = format!("{root}.sub-{m}");
205 + tags.push(mid.clone());
206 + for l in 0..fan {
207 + tags.push(format!("{mid}.leaf-{l}"));
208 + if tags.len() >= target {
209 + return tags;
210 + }
211 + }
212 + }
213 + }
214 + tags
215 + }
216 +
217 + fn bench_large_tree_ops(c: &mut Criterion) {
218 + let tags_1k = generate_tags(1_000);
219 + let tags_10k = generate_tags(10_000);
220 + let tags_50k = generate_tags(50_000);
221 +
222 + let mut group = c.benchmark_group("large_tree_ops");
223 +
224 + // children_at_prefix — scans the full tag list
225 + group.bench_function("children_at_prefix/1k", |b| {
226 + b.iter(|| tagtree::children_at_prefix(black_box("genre"), &tags_1k));
227 + });
228 + group.bench_function("children_at_prefix/10k", |b| {
229 + b.iter(|| tagtree::children_at_prefix(black_box("genre"), &tags_10k));
230 + });
231 + group.bench_function("children_at_prefix/50k", |b| {
232 + b.iter(|| tagtree::children_at_prefix(black_box("genre"), &tags_50k));
233 + });
234 +
235 + // subtree — scans for prefix matches
236 + group.bench_function("subtree/1k", |b| {
237 + b.iter(|| tagtree::subtree(black_box("genre"), &tags_1k));
238 + });
239 + group.bench_function("subtree/10k", |b| {
240 + b.iter(|| tagtree::subtree(black_box("genre"), &tags_10k));
241 + });
242 + group.bench_function("subtree/50k", |b| {
243 + b.iter(|| tagtree::subtree(black_box("genre"), &tags_50k));
244 + });
245 +
246 + group.finish();
247 + }
248 +
249 + fn bench_large_tag_index(c: &mut Criterion) {
250 + let tags_1k = generate_tags(1_000);
251 + let tags_10k = generate_tags(10_000);
252 + let tags_50k = generate_tags(50_000);
253 +
254 + let mut group = c.benchmark_group("large_tag_index");
255 +
256 + // Build time
257 + group.bench_function("build/1k", |b| {
258 + b.iter(|| TagIndex::new(black_box(tags_1k.clone())));
259 + });
260 + group.bench_function("build/10k", |b| {
261 + b.iter(|| TagIndex::new(black_box(tags_10k.clone())));
262 + });
263 + group.bench_function("build/50k", |b| {
264 + b.iter(|| TagIndex::new(black_box(tags_50k.clone())));
265 + });
266 +
267 + // Suggest — build once, query many
268 + let idx_1k = TagIndex::new(tags_1k);
269 + let idx_10k = TagIndex::new(tags_10k);
270 + let idx_50k = TagIndex::new(tags_50k);
271 +
272 + group.bench_function("suggest/1k", |b| {
273 + b.iter(|| idx_1k.suggest(black_box("genre.sub"), 10));
274 + });
275 + group.bench_function("suggest/10k", |b| {
276 + b.iter(|| idx_10k.suggest(black_box("genre.sub"), 10));
277 + });
278 + group.bench_function("suggest/50k", |b| {
279 + b.iter(|| idx_50k.suggest(black_box("genre.sub"), 10));
280 + });
281 +
282 + // Worst case: short prefix that matches many tags
283 + group.bench_function("suggest_broad/1k", |b| {
284 + b.iter(|| idx_1k.suggest(black_box("g"), 10));
285 + });
286 + group.bench_function("suggest_broad/10k", |b| {
287 + b.iter(|| idx_10k.suggest(black_box("g"), 10));
288 + });
289 + group.bench_function("suggest_broad/50k", |b| {
290 + b.iter(|| idx_50k.suggest(black_box("g"), 10));
291 + });
292 +
293 + group.finish();
294 + }
295 +
296 + fn bench_large_validate(c: &mut Criterion) {
297 + let tags_10k = generate_tags(10_000);
298 + let mut group = c.benchmark_group("large_validate");
299 +
300 + // Validate every tag in a 10K set
301 + group.bench_function("validate_all/10k", |b| {
302 + b.iter(|| {
303 + for tag in &tags_10k {
304 + let _ = tagtree::validate_with(black_box(tag), &AF_CONFIG);
305 + }
306 + });
307 + });
308 +
309 + group.finish();
310 + }
311 +
312 + // ---------------------------------------------------------------------------
313 + // Deep tree benchmarks (max depth stress test)
314 + // ---------------------------------------------------------------------------
315 +
316 + /// Generate a single tag chain of depth `n`: "a.b.c.d.e.f..."
317 + fn deep_tag(depth: usize) -> String {
318 + (0..depth)
319 + .map(|i| format!("seg-{i}"))
320 + .collect::<Vec<_>>()
321 + .join(".")
322 + }
323 +
324 + /// Generate a tree where every node has `fan` children, up to `depth` levels.
325 + fn generate_deep_tree(depth: usize, fan: usize) -> Vec<String> {
326 + let mut tags = Vec::new();
327 + let mut stack: Vec<String> = vec!["root".to_string()];
328 +
329 + while let Some(prefix) = stack.pop() {
330 + tags.push(prefix.clone());
331 + let d = prefix.chars().filter(|&c| c == '.').count() + 1;
332 + if d < depth {
333 + for i in 0..fan {
334 + stack.push(format!("{prefix}.child-{i}"));
335 + }
336 + }
337 + }
338 + tags
339 + }
340 +
341 + fn bench_deep_tree(c: &mut Criterion) {
342 + // Deep validation config (allows up to 20 levels)
343 + const DEEP_CONFIG: TagConfig = TagConfig {
344 + max_depth: 20,
345 + max_length: 500,
346 + semantic_depth: 0,
347 + };
348 +
349 + let tag_5 = deep_tag(5);
350 + let tag_10 = deep_tag(10);
351 + let tag_20 = deep_tag(20);
352 +
353 + let mut group = c.benchmark_group("deep_tree");
354 +
355 + // Validate deep tags
356 + group.bench_function("validate/depth-5", |b| {
357 + b.iter(|| tagtree::validate_with(black_box(&tag_5), &DEEP_CONFIG));
358 + });
359 + group.bench_function("validate/depth-10", |b| {
360 + b.iter(|| tagtree::validate_with(black_box(&tag_10), &DEEP_CONFIG));
361 + });
362 + group.bench_function("validate/depth-20", |b| {
363 + b.iter(|| tagtree::validate_with(black_box(&tag_20), &DEEP_CONFIG));
364 + });
365 +
366 + // Ancestors at deep levels
367 + group.bench_function("ancestors/depth-5", |b| {
368 + b.iter(|| tagtree::ancestors(black_box(&tag_5)));
369 + });
370 + group.bench_function("ancestors/depth-10", |b| {
371 + b.iter(|| tagtree::ancestors(black_box(&tag_10)));
372 + });
373 + group.bench_function("ancestors/depth-20", |b| {
374 + b.iter(|| tagtree::ancestors(black_box(&tag_20)));
375 + });
376 +
377 + // is_ancestor_of with deep tags
378 + group.bench_function("is_ancestor/depth-20", |b| {
379 + let ancestor = &deep_tag(3);
380 + b.iter(|| tagtree::is_ancestor_of(black_box(ancestor), black_box(&tag_20)));
381 + });
382 +
383 + // Deep wide tree: depth 6, fan 5 = ~19K tags
384 + let deep_wide = generate_deep_tree(6, 5);
385 + let deep_wide_len = deep_wide.len();
386 +
387 + group.bench_function(&format!("children_at_prefix/{deep_wide_len}_tags"), |b| {
388 + b.iter(|| tagtree::children_at_prefix(black_box("root"), &deep_wide));
389 + });
390 +
391 + group.bench_function(&format!("subtree/{deep_wide_len}_tags"), |b| {
392 + b.iter(|| tagtree::subtree(black_box("root"), &deep_wide));
393 + });
394 +
395 + // TagIndex with deep wide tree
396 + let deep_idx = TagIndex::new(deep_wide);
397 +
398 + group.bench_function("suggest_deep_path", |b| {
399 + b.iter(|| deep_idx.suggest(black_box("root.child-0.child-1"), 10));
400 + });
401 +
402 + group.bench_function("suggest_root", |b| {
403 + b.iter(|| deep_idx.suggest(black_box("root"), 10));
404 + });
405 +
406 + group.finish();
407 + }
408 +
409 + criterion_group!(
410 + benches,
411 + bench_validate,
412 + bench_hierarchy,
413 + bench_sql,
414 + bench_tree_ops,
415 + bench_tag_index,
416 + bench_large_tree_ops,
417 + bench_large_tag_index,
418 + bench_large_validate,
419 + bench_deep_tree,
420 + );
421 + criterion_main!(benches);
A src/lib.rs +500
@@ -0,0 +1,1441 @@
1 + //! Hierarchical dot-notation tag standard.
2 + //!
3 + //! Tags are lowercase strings with dot-separated segments representing a hierarchy:
4 + //! `genre.electronic.house`, `work.meeting.standup`, `news.tech.rust`.
5 + //!
6 + //! # Format (invariants enforced by all configs)
7 + //!
8 + //! - Segments: lowercase alphanumeric and hyphens (`[a-z0-9-]`)
9 + //! - Separator: `.` (dot)
10 + //! - No empty segments, no leading/trailing dots, no consecutive dots
11 + //!
12 + //! # Per-app configuration
13 + //!
14 + //! Each app provides a [`TagConfig`] that controls:
15 + //! - **`max_depth`** — maximum number of segments (e.g., 5 for AF, 3 for GO)
16 + //! - **`max_length`** — maximum character length of the entire tag string
17 + //! - **`semantic_depth`** — how many leading segments carry dispatch meaning
18 + //! (e.g., 1 means the first segment is a namespace like `genre`, `mood`)
19 + //!
20 + //! # SQL
21 + //!
22 + //! Hierarchy queries use `LIKE` prefix matching, which works identically on SQLite
23 + //! and PostgreSQL. Use [`like_descendant_pattern`] to build the pattern and
24 + //! [`escape_like`] to sanitize user input embedded in patterns.
25 +
26 + use std::fmt;
27 +
28 + /// Separator between tag levels.
29 + pub const SEPARATOR: char = '.';
30 +
31 + // ---------------------------------------------------------------------------
32 + // Configuration
33 + // ---------------------------------------------------------------------------
34 +
35 + /// Per-app tag rules. Each consumer defines one of these as a constant.
36 + ///
37 + /// ```
38 + /// use tagtree::TagConfig;
39 + ///
40 + /// // audiofiles: deep hierarchy, namespace-driven
41 + /// const AF_TAGS: TagConfig = TagConfig { max_depth: 5, max_length: 100, semantic_depth: 1 };
42 + ///
43 + /// // goingson: shallow tags, no required prefix
44 + /// const GO_TAGS: TagConfig = TagConfig { max_depth: 3, max_length: 60, semantic_depth: 0 };
45 + /// ```
46 + #[derive(Debug, Clone, Copy, PartialEq, Eq)]
47 + pub struct TagConfig {
48 + /// Maximum number of segments allowed (e.g., 5 means `a.b.c.d.e` is valid).
49 + pub max_depth: usize,
50 + /// Maximum character length of the entire tag string.
51 + pub max_length: usize,
52 + /// Number of leading segments that carry app-specific dispatch meaning.
53 + ///
54 + /// - `0` — no semantic prefix; all segments are free-form hierarchy
55 + /// - `1` — first segment is a namespace (e.g., `genre`, `mood`, `source`)
56 + /// - `2` — first two segments are structured (e.g., `type.subtype`)
57 + ///
58 + /// When > 0, [`validate_with`] enforces that the tag has at least this many
59 + /// segments, and [`semantic_prefix`] / [`free_suffix`] can split the tag.
60 + pub semantic_depth: usize,
61 + }
62 +
63 + /// Error returned when a tag string is invalid.
64 + #[derive(Debug, Clone, PartialEq, Eq)]
65 + pub struct TagError(pub String);
66 +
67 + impl fmt::Display for TagError {
68 + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
69 + write!(f, "invalid tag: {}", self.0)
70 + }
71 + }
72 +
73 + impl std::error::Error for TagError {}
74 +
75 + // ---------------------------------------------------------------------------
76 + // Validation
77 + // ---------------------------------------------------------------------------
78 +
79 + /// Validate a tag against an app-specific [`TagConfig`].
80 + ///
81 + /// Checks format invariants (charset, no empty segments) plus the config's
82 + /// depth, length, and semantic-depth constraints.
83 + ///
84 + /// ```
85 + /// use tagtree::{TagConfig, validate_with};
86 + ///
87 + /// const CFG: TagConfig = TagConfig { max_depth: 3, max_length: 50, semantic_depth: 1 };
88 + ///
89 + /// assert!(validate_with("genre.rock", &CFG).is_ok());
90 + /// assert!(validate_with("genre.rock.punk", &CFG).is_ok()); // depth 3
91 + /// assert!(validate_with("genre.rock.punk.oi", &CFG).is_err()); // depth 4 > max 3
92 + /// assert!(validate_with("rock", &CFG).is_err()); // semantic_depth=1 requires 2+ segments
93 + /// ```
94 + pub fn validate_with(tag: &str, config: &TagConfig) -> Result<(), TagError> {
95 + if tag.is_empty() {
96 + return Err(TagError("tag cannot be empty".into()));
97 + }
98 + if tag.len() > config.max_length {
99 + return Err(TagError(format!(
100 + "tag exceeds {} characters",
101 + config.max_length
102 + )));
103 + }
104 + if tag.starts_with(SEPARATOR) || tag.ends_with(SEPARATOR) {
105 + return Err(TagError("tag cannot start or end with a dot".into()));
106 + }
107 + if tag.contains("..") {
108 + return Err(TagError("tag cannot contain consecutive dots".into()));
109 + }
110 + for ch in tag.chars() {
111 + if !(ch.is_ascii_lowercase() || ch.is_ascii_digit() || ch == '-' || ch == SEPARATOR) {
112 + return Err(TagError(format!(
113 + "invalid character '{ch}': only lowercase alphanumeric, hyphens, and dots allowed"
114 + )));
115 + }
116 + }
117 + let d = depth(tag);
118 + if d > config.max_depth {
119 + return Err(TagError(format!(
120 + "tag has {} levels, maximum is {}",
121 + d, config.max_depth
122 + )));
123 + }
124 + if config.semantic_depth > 0 && d < config.semantic_depth + 1 {
125 + return Err(TagError(format!(
126 + "tag must have at least {} segments (semantic prefix requires {} + at least 1 value)",
127 + config.semantic_depth + 1,
128 + config.semantic_depth
129 + )));
130 + }
131 + Ok(())
132 + }
133 +
134 + /// Validate a tag with default limits (max_depth=5, max_length=100, no semantic prefix).
135 + ///
136 + /// Convenience wrapper for quick validation without a config.
137 + ///
138 + /// ```
139 + /// # use tagtree::validate;
140 + /// assert!(validate("genre.electronic.house").is_ok());
141 + /// assert!(validate("a.b.c.d.e.f").is_err()); // 6 levels > default max 5
142 + /// ```
143 + pub fn validate(tag: &str) -> Result<(), TagError> {
144 + const DEFAULT: TagConfig = TagConfig {
145 + max_depth: 5,
146 + max_length: 100,
147 + semantic_depth: 0,
148 + };
149 + validate_with(tag, &DEFAULT)
150 + }
151 +
152 + // ---------------------------------------------------------------------------
153 + // Hierarchy parsing
154 + // ---------------------------------------------------------------------------
155 +
156 + /// Get the parent path of a tag, or `None` if it's a root-level tag.
157 + ///
158 + /// ```
159 + /// # use tagtree::parent;
160 + /// assert_eq!(parent("genre.electronic.house"), Some("genre.electronic"));
161 + /// assert_eq!(parent("genre"), None);
162 + /// ```
163 + pub fn parent(tag: &str) -> Option<&str> {
164 + tag.rfind(SEPARATOR).map(|pos| &tag[..pos])
165 + }
166 +
167 + /// Get the leaf (last segment) of a tag.
168 + ///
169 + /// ```
170 + /// # use tagtree::leaf;
171 + /// assert_eq!(leaf("genre.electronic.house"), "house");
172 + /// assert_eq!(leaf("genre"), "genre");
173 + /// ```
174 + pub fn leaf(tag: &str) -> &str {
175 + match tag.rfind(SEPARATOR) {
176 + Some(pos) => &tag[pos + 1..],
177 + None => tag,
178 + }
179 + }
180 +
181 + /// Count the depth (number of segments) in a tag.
182 + ///
183 + /// ```
184 + /// # use tagtree::depth;
185 + /// assert_eq!(depth("genre"), 1);
186 + /// assert_eq!(depth("genre.electronic"), 2);
187 + /// assert_eq!(depth("genre.electronic.house"), 3);
188 + /// ```
189 + pub fn depth(tag: &str) -> usize {
190 + if tag.is_empty() {
191 + return 0;
192 + }
193 + tag.chars().filter(|&c| c == SEPARATOR).count() + 1
194 + }
195 +
196 + /// Get the Nth segment (0-indexed) from a tag, or `None` if out of bounds.
197 + ///
198 + /// ```
199 + /// # use tagtree::segment;
200 + /// assert_eq!(segment("genre.electronic.house", 0), Some("genre"));
201 + /// assert_eq!(segment("genre.electronic.house", 1), Some("electronic"));
202 + /// assert_eq!(segment("genre.electronic.house", 2), Some("house"));
203 + /// assert_eq!(segment("genre.electronic.house", 3), None);
204 + /// ```
205 + pub fn segment(tag: &str, index: usize) -> Option<&str> {
206 + tag.split(SEPARATOR).nth(index)
207 + }
208 +
209 + /// Get the path formed by the first `n` segments, or `None` if the tag has fewer.
210 + ///
211 + /// ```
212 + /// # use tagtree::prefix_at_depth;
213 + /// assert_eq!(prefix_at_depth("genre.electronic.house", 1), Some("genre"));
214 + /// assert_eq!(prefix_at_depth("genre.electronic.house", 2), Some("genre.electronic"));
215 + /// assert_eq!(prefix_at_depth("genre.electronic.house", 3), Some("genre.electronic.house"));
216 + /// assert_eq!(prefix_at_depth("genre.electronic.house", 4), None);
217 + /// ```
218 + pub fn prefix_at_depth(tag: &str, n: usize) -> Option<&str> {
219 + if n == 0 {
220 + return None;
221 + }
222 + let mut dots_seen = 0;
223 + for (i, ch) in tag.bytes().enumerate() {
224 + if ch == SEPARATOR as u8 {
225 + dots_seen += 1;
226 + if dots_seen == n {
227 + return Some(&tag[..i]);
228 + }
229 + }
230 + }
231 + // If we've seen fewer dots than n-1, the tag doesn't have n segments
232 + if dots_seen == n - 1 {
233 + Some(tag)
234 + } else {
235 + None
236 + }
237 + }
238 +
239 + /// Get all ancestor paths from root to parent (excludes the tag itself).
240 + ///
241 + /// ```
242 + /// # use tagtree::ancestors;
243 + /// assert_eq!(ancestors("genre.electronic.house"), vec!["genre", "genre.electronic"]);
244 + /// assert!(ancestors("genre").is_empty());
245 + /// ```
246 + pub fn ancestors(tag: &str) -> Vec<&str> {
247 + let mut result = Vec::new();
248 + let mut remaining = tag;
249 + while let Some(pos) = remaining.rfind(SEPARATOR) {
250 + remaining = &remaining[..pos];
251 + result.push(remaining);
252 + }
253 + result.reverse();
254 + result
255 + }
256 +
257 + /// Check whether `ancestor` is an ancestor of `descendant`.
258 + ///
259 + /// A tag is NOT considered its own ancestor.
260 + ///
261 + /// ```
262 + /// # use tagtree::is_ancestor_of;
263 + /// assert!(is_ancestor_of("genre", "genre.electronic"));
264 + /// assert!(is_ancestor_of("genre", "genre.electronic.house"));
265 + /// assert!(!is_ancestor_of("genre.electronic", "genre"));
266 + /// assert!(!is_ancestor_of("genre", "genre")); // not self
267 + /// assert!(!is_ancestor_of("gen", "genre.rock")); // not a segment boundary
268 + /// ```
269 + pub fn is_ancestor_of(ancestor: &str, descendant: &str) -> bool {
270 + descendant.len() > ancestor.len()
271 + && descendant.starts_with(ancestor)
272 + && descendant.as_bytes()[ancestor.len()] == SEPARATOR as u8
273 + }
274 +
275 + /// Find the longest common ancestor of two tags, or `None` if they share no prefix.
276 + ///
277 + /// ```
278 + /// # use tagtree::common_ancestor;
279 + /// assert_eq!(common_ancestor("genre.electronic.house", "genre.electronic.techno"), Some("genre.electronic"));
280 + /// assert_eq!(common_ancestor("genre.rock", "genre.electronic"), Some("genre"));
281 + /// assert_eq!(common_ancestor("genre.rock", "mood.dark"), None);
282 + /// assert_eq!(common_ancestor("genre", "genre"), None); // same tag, no ancestor
283 + /// ```
284 + pub fn common_ancestor<'a>(a: &'a str, b: &str) -> Option<&'a str> {
285 + let mut last_sep = None;
286 + for (i, (ca, cb)) in a.bytes().zip(b.bytes()).enumerate() {
287 + if ca != cb {
288 + break;
289 + }
290 + if ca == SEPARATOR as u8 {
291 + last_sep = Some(i);
292 + }
293 + }
294 + // If one is a prefix of the other and ends at a separator boundary,
295 + // the shorter one is the common ancestor.
296 + let min_len = a.len().min(b.len());
297 + if a.len() != b.len()
298 + && a.as_bytes()[..min_len] == b.as_bytes()[..min_len]
299 + && (min_len == a.len() || min_len == b.len())
300 + {
301 + let longer = if a.len() > b.len() { a } else { b };
302 + if longer.as_bytes()[min_len] == SEPARATOR as u8 {
303 + return Some(&a[..min_len]);
304 + }
305 + }
306 + last_sep.map(|pos| &a[..pos])
307 + }
308 +
309 + // ---------------------------------------------------------------------------
310 + // Semantic prefix helpers
311 + // ---------------------------------------------------------------------------
312 +
313 + /// Extract the semantic prefix (first `n` segments) from a tag.
314 + ///
315 + /// Returns `None` if the tag has fewer than `n` segments.
316 + /// This is equivalent to `prefix_at_depth(tag, n)` but named for clarity
317 + /// when used with [`TagConfig::semantic_depth`].
318 + ///
319 + /// ```
320 + /// # use tagtree::semantic_prefix;
321 + /// // AF uses semantic_depth=1: first segment is the namespace
322 + /// assert_eq!(semantic_prefix("genre.electronic.house", 1), Some("genre"));
323 + ///
324 + /// // A hypothetical app with semantic_depth=2
325 + /// assert_eq!(semantic_prefix("type.audio.podcast", 2), Some("type.audio"));
326 + ///
327 + /// // No semantic prefix
328 + /// assert_eq!(semantic_prefix("rock", 0), None);
329 + /// ```
330 + pub fn semantic_prefix(tag: &str, semantic_depth: usize) -> Option<&str> {
331 + if semantic_depth == 0 {
332 + return None;
333 + }
334 + prefix_at_depth(tag, semantic_depth)
335 + }
336 +
337 + /// Extract the free-form suffix after the semantic prefix.
338 + ///
339 + /// Returns `None` if the tag has no segments beyond the semantic prefix.
340 + ///
341 + /// ```
342 + /// # use tagtree::free_suffix;
343 + /// assert_eq!(free_suffix("genre.electronic.house", 1), Some("electronic.house"));
344 + /// assert_eq!(free_suffix("genre.rock", 1), Some("rock"));
345 + /// assert_eq!(free_suffix("genre", 1), None); // no suffix beyond prefix
346 + ///
347 + /// // semantic_depth=0 means the whole tag is free-form
348 + /// assert_eq!(free_suffix("genre.rock", 0), Some("genre.rock"));
349 + /// ```
350 + pub fn free_suffix(tag: &str, semantic_depth: usize) -> Option<&str> {
351 + if semantic_depth == 0 {
352 + if tag.is_empty() {
353 + return None;
354 + }
355 + return Some(tag);
356 + }
357 + let prefix = prefix_at_depth(tag, semantic_depth)?;
358 + if prefix.len() == tag.len() {
359 + // Tag is exactly the semantic prefix, no suffix
360 + None
361 + } else {
362 + Some(&tag[prefix.len() + 1..]) // skip the dot
363 + }
364 + }
365 +
366 + // ---------------------------------------------------------------------------
367 + // Tree operations on tag sets (in-memory)
368 + // ---------------------------------------------------------------------------
369 +
370 + /// List the immediate child segment names under a prefix.
371 + ///
372 + /// Given a set of tags and a prefix, returns the unique next-level segment names
373 + /// (not full paths). Pass an empty string for root-level children.
374 + ///
375 + /// ```
376 + /// # use tagtree::children_at_prefix;
377 + /// let tags = ["genre.electronic.house", "genre.electronic.techno", "genre.rock", "mood.dark"];
378 + /// assert_eq!(children_at_prefix("", &tags), vec!["genre", "mood"]);
379 + /// assert_eq!(children_at_prefix("genre", &tags), vec!["electronic", "rock"]);
380 + /// assert_eq!(children_at_prefix("genre.electronic", &tags), vec!["house", "techno"]);
381 + /// ```
382 + pub fn children_at_prefix(prefix: &str, tags: &[impl AsRef<str>]) -> Vec<String> {
383 + let strip_len = if prefix.is_empty() {
384 + 0
385 + } else {
386 + prefix.len() + 1 // prefix + dot
387 + };
388 +
389 + let mut children: Vec<String> = Vec::new();
390 + for tag in tags {
391 + let tag = tag.as_ref();
392 + let matches = if prefix.is_empty() {
393 + true
394 + } else {
395 + tag.starts_with(prefix)
396 + && tag.len() > prefix.len()
397 + && tag.as_bytes()[prefix.len()] == SEPARATOR as u8
398 + };
399 + if !matches || tag.len() <= strip_len {
400 + continue;
401 + }
402 + let suffix = &tag[strip_len..];
403 + let child = match suffix.find(SEPARATOR) {
404 + Some(pos) => &suffix[..pos],
405 + None => suffix,
406 + };
407 + if !child.is_empty() && !children.iter().any(|c| c == child) {
408 + children.push(child.to_string());
409 + }
410 + }
411 + children.sort();
412 + children
413 + }
414 +
415 + /// Get all tags that are descendants of a prefix (including the prefix itself if present).
416 + ///
417 + /// ```
418 + /// # use tagtree::subtree;
419 + /// let tags = ["genre.electronic.house", "genre.electronic.techno", "genre.rock", "mood.dark"];
420 + /// let sub = subtree("genre.electronic", &tags);
421 + /// assert_eq!(sub, vec!["genre.electronic.house", "genre.electronic.techno"]);
422 + /// ```
423 + pub fn subtree<'a>(prefix: &str, tags: &'a [impl AsRef<str>]) -> Vec<&'a str> {
424 + tags.iter()
425 + .map(|t| t.as_ref())
426 + .filter(|tag| {
427 + *tag == prefix
428 + || (tag.starts_with(prefix)
429 + && tag.len() > prefix.len()
430 + && tag.as_bytes()[prefix.len()] == SEPARATOR as u8)
431 + })
432 + .collect()
433 + }
434 +
435 + // ---------------------------------------------------------------------------
436 + // SQL helpers
437 + // ---------------------------------------------------------------------------
438 +
439 + /// Escape special characters in a string for use in SQL `LIKE` patterns.
440 + ///
441 + /// Handles `\`, `%`, and `_`. Use with `ESCAPE '\'` in the SQL query.
442 + ///
443 + /// ```
444 + /// # use tagtree::escape_like;
445 + /// assert_eq!(escape_like("100%"), r"100\%");
446 + /// assert_eq!(escape_like("a_b"), r"a\_b");
447 + /// ```
448 + pub fn escape_like(s: &str) -> String {
449 + s.replace('\\', "\\\\")
450 + .replace('%', "\\%")
451 + .replace('_', "\\_")
452 + }
453 +
454 + /// Build a `LIKE` pattern that matches all descendants of a prefix (not the prefix itself).
455 + ///
456 + /// For use in SQL: `WHERE tag = ?1 OR tag LIKE ?2 ESCAPE '\'`
457 + /// where `?1` is the prefix itself and `?2` is this pattern.
458 + ///
459 + /// ```
460 + /// # use tagtree::like_descendant_pattern;
461 + /// assert_eq!(like_descendant_pattern("genre"), r"genre.%");
462 + /// assert_eq!(like_descendant_pattern("a-b"), r"a-b.%");
463 + /// ```
464 + pub fn like_descendant_pattern(prefix: &str) -> String {
465 + format!("{}.%", escape_like(prefix))
466 + }
467 +
468 + // ---------------------------------------------------------------------------
469 + // Rename / move
470 + // ---------------------------------------------------------------------------
471 +
472 + /// Rename a prefix within a tag. Returns `Some(new_tag)` if the tag starts with
473 + /// `old_prefix` at a segment boundary, or `None` if it doesn't match.
474 + ///
475 + /// ```
476 + /// # use tagtree::rename_prefix;
477 + /// assert_eq!(
478 + /// rename_prefix("genre.electronic", "genre.dance", "genre.electronic.house"),
479 + /// Some("genre.dance.house".to_string())
480 + /// );
481 + /// assert_eq!(
482 + /// rename_prefix("genre.electronic", "genre.dance", "genre.electronic"),
483 + /// Some("genre.dance".to_string())
484 + /// );
485 + /// assert_eq!(rename_prefix("gen", "xxx", "genre.rock"), None);
486 + /// ```
487 + pub fn rename_prefix(old_prefix: &str, new_prefix: &str, tag: &str) -> Option<String> {
488 + if tag == old_prefix {
489 + Some(new_prefix.to_string())
490 + } else if is_ancestor_of(old_prefix, tag) {
491 + Some(format!("{}{}", new_prefix, &tag[old_prefix.len()..]))
492 + } else {
493 + None
494 + }
495 + }
496 +
497 + // ---------------------------------------------------------------------------
498 + // In-memory suggestion index
499 + // ---------------------------------------------------------------------------
500 +
Lines truncated