Skip to main content

max / tagtree

15.0 KB · 491 lines History Blame Raw
1 use criterion::{black_box, criterion_group, criterion_main, Criterion};
2 use tagtree::{batch_rename, merge_tags, remove_subtree, rename_prefix_bulk, TagConfig, TagIndex};
3
4 const AF_CONFIG: TagConfig = TagConfig {
5 max_depth: 5,
6 max_length: 100,
7 semantic_depth: 0,
8 };
9
10 const GO_CONFIG: TagConfig = TagConfig {
11 max_depth: 3,
12 max_length: 60,
13 semantic_depth: 0,
14 };
15
16 fn sample_tags() -> Vec<String> {
17 vec![
18 "genre", "genre.electronic", "genre.electronic.house",
19 "genre.electronic.techno", "genre.electronic.ambient",
20 "genre.rock", "genre.rock.indie", "genre.rock.post-punk",
21 "genre.jazz", "genre.jazz.bebop", "genre.jazz.fusion",
22 "mood", "mood.energetic", "mood.calm", "mood.dark",
23 "source", "source.vinyl", "source.digital", "source.tape",
24 "instrument", "instrument.guitar", "instrument.synth",
25 "instrument.drums", "instrument.bass",
26 "era", "era.60s", "era.70s", "era.80s", "era.90s", "era.2000s",
27 "bpm.slow", "bpm.medium", "bpm.fast",
28 "key.c-major", "key.a-minor", "key.d-major",
29 "quality.high", "quality.medium", "quality.low",
30 "format.wav", "format.flac", "format.mp3", "format.aiff",
31 ]
32 .into_iter()
33 .map(String::from)
34 .collect()
35 }
36
37 fn bench_validate(c: &mut Criterion) {
38 let mut group = c.benchmark_group("validate");
39
40 group.bench_function("simple_tag", |b| {
41 b.iter(|| tagtree::validate_with(black_box("genre"), &AF_CONFIG));
42 });
43
44 group.bench_function("deep_tag", |b| {
45 b.iter(|| {
46 tagtree::validate_with(black_box("genre.electronic.house"), &AF_CONFIG)
47 });
48 });
49
50 group.bench_function("max_depth_tag", |b| {
51 b.iter(|| {
52 tagtree::validate_with(black_box("a.b.c.d.e"), &AF_CONFIG)
53 });
54 });
55
56 group.bench_function("invalid_tag", |b| {
57 b.iter(|| tagtree::validate_with(black_box("UPPER.case"), &AF_CONFIG));
58 });
59
60 group.bench_function("long_tag", |b| {
61 let tag = "abcdefghij.abcdefghij.abcdefghij.abcdefghij.abcdefghij";
62 b.iter(|| tagtree::validate_with(black_box(tag), &AF_CONFIG));
63 });
64
65 group.bench_function("go_config", |b| {
66 b.iter(|| tagtree::validate_with(black_box("work.meeting"), &GO_CONFIG));
67 });
68
69 group.finish();
70 }
71
72 fn bench_hierarchy(c: &mut Criterion) {
73 let mut group = c.benchmark_group("hierarchy");
74
75 group.bench_function("parent", |b| {
76 b.iter(|| tagtree::parent(black_box("genre.electronic.house")));
77 });
78
79 group.bench_function("leaf", |b| {
80 b.iter(|| tagtree::leaf(black_box("genre.electronic.house")));
81 });
82
83 group.bench_function("depth", |b| {
84 b.iter(|| tagtree::depth(black_box("genre.electronic.house")));
85 });
86
87 group.bench_function("ancestors", |b| {
88 b.iter(|| tagtree::ancestors(black_box("genre.electronic.house")));
89 });
90
91 group.bench_function("is_ancestor_of", |b| {
92 b.iter(|| {
93 tagtree::is_ancestor_of(
94 black_box("genre.electronic"),
95 black_box("genre.electronic.house"),
96 )
97 });
98 });
99
100 group.bench_function("common_ancestor", |b| {
101 b.iter(|| {
102 tagtree::common_ancestor(
103 black_box("genre.electronic.house"),
104 black_box("genre.electronic.techno"),
105 )
106 });
107 });
108
109 group.finish();
110 }
111
112 fn bench_sql(c: &mut Criterion) {
113 let mut group = c.benchmark_group("sql");
114
115 group.bench_function("escape_like", |b| {
116 b.iter(|| tagtree::escape_like(black_box("genre%_electronic")));
117 });
118
119 group.bench_function("like_descendant_pattern", |b| {
120 b.iter(|| tagtree::like_descendant_pattern(black_box("genre.electronic")));
121 });
122
123 group.finish();
124 }
125
126 fn bench_tree_ops(c: &mut Criterion) {
127 let tags = sample_tags();
128 let mut group = c.benchmark_group("tree_ops");
129
130 group.bench_function("children_at_prefix", |b| {
131 b.iter(|| tagtree::children_at_prefix(black_box("genre"), &tags));
132 });
133
134 group.bench_function("subtree", |b| {
135 b.iter(|| tagtree::subtree(black_box("genre"), &tags));
136 });
137
138 group.bench_function("rename_prefix", |b| {
139 b.iter(|| {
140 tagtree::rename_prefix(
141 black_box("genre.electronic"),
142 black_box("style.electronic"),
143 black_box("genre.electronic.house"),
144 )
145 });
146 });
147
148 group.finish();
149 }
150
151 fn bench_tag_index(c: &mut Criterion) {
152 let tags = sample_tags();
153 let mut group = c.benchmark_group("tag_index");
154
155 group.bench_function("build_index", |b| {
156 b.iter(|| TagIndex::new(black_box(tags.clone())));
157 });
158
159 let index = TagIndex::new(tags);
160
161 group.bench_function("suggest_prefix", |b| {
162 b.iter(|| index.suggest(black_box("gen"), 5));
163 });
164
165 group.bench_function("suggest_path", |b| {
166 b.iter(|| index.suggest(black_box("genre.e"), 5));
167 });
168
169 group.bench_function("suggest_exact", |b| {
170 b.iter(|| index.suggest(black_box("genre.electronic"), 5));
171 });
172
173 group.bench_function("suggest_no_match", |b| {
174 b.iter(|| index.suggest(black_box("zzz"), 5));
175 });
176
177 group.bench_function("suggest_fuzzy_typo", |b| {
178 b.iter(|| index.suggest_fuzzy(black_box("genr"), 5));
179 });
180
181 group.bench_function("suggest_fuzzy_no_match", |b| {
182 b.iter(|| index.suggest_fuzzy(black_box("zzz"), 5));
183 });
184
185 group.finish();
186 }
187
188 // ---------------------------------------------------------------------------
189 // Large-scale benchmarks (1K, 10K, 50K tags)
190 // ---------------------------------------------------------------------------
191
192 /// Generate a hierarchical tag set of approximately `target` tags.
193 ///
194 /// Structure: `{root}.{mid}.{leaf}` with configurable fan-out.
195 fn generate_tags(target: usize) -> Vec<String> {
196 let roots = [
197 "genre", "mood", "source", "instrument", "era", "bpm", "key",
198 "quality", "format", "region", "label", "artist", "album",
199 "technique", "effect", "tempo", "scale", "texture", "timbre",
200 "dynamic",
201 ];
202
203 let mut tags = Vec::with_capacity(target);
204
205 // How many children per node needed to reach target
206 // ~20 roots * mid * leaf ≈ target, so mid ≈ sqrt(target / 20)
207 let fan = ((target as f64 / roots.len() as f64).sqrt().ceil() as usize).max(2);
208
209 for root in &roots {
210 tags.push(root.to_string());
211 for m in 0..fan {
212 let mid = format!("{root}.sub-{m}");
213 tags.push(mid.clone());
214 for l in 0..fan {
215 tags.push(format!("{mid}.leaf-{l}"));
216 if tags.len() >= target {
217 return tags;
218 }
219 }
220 }
221 }
222 tags
223 }
224
225 fn bench_large_tree_ops(c: &mut Criterion) {
226 let tags_1k = generate_tags(1_000);
227 let tags_10k = generate_tags(10_000);
228 let tags_50k = generate_tags(50_000);
229
230 let mut group = c.benchmark_group("large_tree_ops");
231
232 // children_at_prefix — scans the full tag list
233 group.bench_function("children_at_prefix/1k", |b| {
234 b.iter(|| tagtree::children_at_prefix(black_box("genre"), &tags_1k));
235 });
236 group.bench_function("children_at_prefix/10k", |b| {
237 b.iter(|| tagtree::children_at_prefix(black_box("genre"), &tags_10k));
238 });
239 group.bench_function("children_at_prefix/50k", |b| {
240 b.iter(|| tagtree::children_at_prefix(black_box("genre"), &tags_50k));
241 });
242
243 // subtree — scans for prefix matches
244 group.bench_function("subtree/1k", |b| {
245 b.iter(|| tagtree::subtree(black_box("genre"), &tags_1k));
246 });
247 group.bench_function("subtree/10k", |b| {
248 b.iter(|| tagtree::subtree(black_box("genre"), &tags_10k));
249 });
250 group.bench_function("subtree/50k", |b| {
251 b.iter(|| tagtree::subtree(black_box("genre"), &tags_50k));
252 });
253
254 group.finish();
255 }
256
257 fn bench_large_tag_index(c: &mut Criterion) {
258 let tags_1k = generate_tags(1_000);
259 let tags_10k = generate_tags(10_000);
260 let tags_50k = generate_tags(50_000);
261
262 let mut group = c.benchmark_group("large_tag_index");
263
264 // Build time
265 group.bench_function("build/1k", |b| {
266 b.iter(|| TagIndex::new(black_box(tags_1k.clone())));
267 });
268 group.bench_function("build/10k", |b| {
269 b.iter(|| TagIndex::new(black_box(tags_10k.clone())));
270 });
271 group.bench_function("build/50k", |b| {
272 b.iter(|| TagIndex::new(black_box(tags_50k.clone())));
273 });
274
275 // Suggest — build once, query many
276 let idx_1k = TagIndex::new(tags_1k);
277 let idx_10k = TagIndex::new(tags_10k);
278 let idx_50k = TagIndex::new(tags_50k);
279
280 group.bench_function("suggest/1k", |b| {
281 b.iter(|| idx_1k.suggest(black_box("genre.sub"), 10));
282 });
283 group.bench_function("suggest/10k", |b| {
284 b.iter(|| idx_10k.suggest(black_box("genre.sub"), 10));
285 });
286 group.bench_function("suggest/50k", |b| {
287 b.iter(|| idx_50k.suggest(black_box("genre.sub"), 10));
288 });
289
290 // Fuzzy suggest at scale
291 group.bench_function("suggest_fuzzy/1k", |b| {
292 b.iter(|| idx_1k.suggest_fuzzy(black_box("genr"), 10));
293 });
294 group.bench_function("suggest_fuzzy/10k", |b| {
295 b.iter(|| idx_10k.suggest_fuzzy(black_box("genr"), 10));
296 });
297 group.bench_function("suggest_fuzzy/50k", |b| {
298 b.iter(|| idx_50k.suggest_fuzzy(black_box("genr"), 10));
299 });
300
301 // Fuzzy worst case: no match, must scan all segments
302 group.bench_function("suggest_fuzzy_miss/10k", |b| {
303 b.iter(|| idx_10k.suggest_fuzzy(black_box("zzz"), 10));
304 });
305
306 // Worst case: short prefix that matches many tags
307 group.bench_function("suggest_broad/1k", |b| {
308 b.iter(|| idx_1k.suggest(black_box("g"), 10));
309 });
310 group.bench_function("suggest_broad/10k", |b| {
311 b.iter(|| idx_10k.suggest(black_box("g"), 10));
312 });
313 group.bench_function("suggest_broad/50k", |b| {
314 b.iter(|| idx_50k.suggest(black_box("g"), 10));
315 });
316
317 group.finish();
318 }
319
320 fn bench_large_validate(c: &mut Criterion) {
321 let tags_10k = generate_tags(10_000);
322 let mut group = c.benchmark_group("large_validate");
323
324 // Validate every tag in a 10K set
325 group.bench_function("validate_all/10k", |b| {
326 b.iter(|| {
327 for tag in &tags_10k {
328 let _ = tagtree::validate_with(black_box(tag), &AF_CONFIG);
329 }
330 });
331 });
332
333 group.finish();
334 }
335
336 // ---------------------------------------------------------------------------
337 // Deep tree benchmarks (max depth stress test)
338 // ---------------------------------------------------------------------------
339
340 /// Generate a single tag chain of depth `n`: "a.b.c.d.e.f..."
341 fn deep_tag(depth: usize) -> String {
342 (0..depth)
343 .map(|i| format!("seg-{i}"))
344 .collect::<Vec<_>>()
345 .join(".")
346 }
347
348 /// Generate a tree where every node has `fan` children, up to `depth` levels.
349 fn generate_deep_tree(depth: usize, fan: usize) -> Vec<String> {
350 let mut tags = Vec::new();
351 let mut stack: Vec<String> = vec!["root".to_string()];
352
353 while let Some(prefix) = stack.pop() {
354 tags.push(prefix.clone());
355 let d = prefix.chars().filter(|&c| c == '.').count() + 1;
356 if d < depth {
357 for i in 0..fan {
358 stack.push(format!("{prefix}.child-{i}"));
359 }
360 }
361 }
362 tags
363 }
364
365 fn bench_deep_tree(c: &mut Criterion) {
366 // Deep validation config (allows up to 20 levels)
367 const DEEP_CONFIG: TagConfig = TagConfig {
368 max_depth: 20,
369 max_length: 500,
370 semantic_depth: 0,
371 };
372
373 let tag_5 = deep_tag(5);
374 let tag_10 = deep_tag(10);
375 let tag_20 = deep_tag(20);
376
377 let mut group = c.benchmark_group("deep_tree");
378
379 // Validate deep tags
380 group.bench_function("validate/depth-5", |b| {
381 b.iter(|| tagtree::validate_with(black_box(&tag_5), &DEEP_CONFIG));
382 });
383 group.bench_function("validate/depth-10", |b| {
384 b.iter(|| tagtree::validate_with(black_box(&tag_10), &DEEP_CONFIG));
385 });
386 group.bench_function("validate/depth-20", |b| {
387 b.iter(|| tagtree::validate_with(black_box(&tag_20), &DEEP_CONFIG));
388 });
389
390 // Ancestors at deep levels
391 group.bench_function("ancestors/depth-5", |b| {
392 b.iter(|| tagtree::ancestors(black_box(&tag_5)));
393 });
394 group.bench_function("ancestors/depth-10", |b| {
395 b.iter(|| tagtree::ancestors(black_box(&tag_10)));
396 });
397 group.bench_function("ancestors/depth-20", |b| {
398 b.iter(|| tagtree::ancestors(black_box(&tag_20)));
399 });
400
401 // is_ancestor_of with deep tags
402 group.bench_function("is_ancestor/depth-20", |b| {
403 let ancestor = &deep_tag(3);
404 b.iter(|| tagtree::is_ancestor_of(black_box(ancestor), black_box(&tag_20)));
405 });
406
407 // Deep wide tree: depth 6, fan 5 = ~19K tags
408 let deep_wide = generate_deep_tree(6, 5);
409 let deep_wide_len = deep_wide.len();
410
411 group.bench_function(format!("children_at_prefix/{deep_wide_len}_tags"), |b| {
412 b.iter(|| tagtree::children_at_prefix(black_box("root"), &deep_wide));
413 });
414
415 group.bench_function(format!("subtree/{deep_wide_len}_tags"), |b| {
416 b.iter(|| tagtree::subtree(black_box("root"), &deep_wide));
417 });
418
419 // TagIndex with deep wide tree
420 let deep_idx = TagIndex::new(deep_wide);
421
422 group.bench_function("suggest_deep_path", |b| {
423 b.iter(|| deep_idx.suggest(black_box("root.child-0.child-1"), 10));
424 });
425
426 group.bench_function("suggest_root", |b| {
427 b.iter(|| deep_idx.suggest(black_box("root"), 10));
428 });
429
430 group.finish();
431 }
432
433 fn bench_bulk_ops(c: &mut Criterion) {
434 let tags_10k = generate_tags(10_000);
435 let mut group = c.benchmark_group("bulk_ops");
436
437 group.bench_function("rename_prefix_bulk/10k", |b| {
438 b.iter_batched(
439 || TagIndex::new(tags_10k.clone()),
440 |mut idx| rename_prefix_bulk(black_box("genre"), black_box("style"), &mut idx),
441 criterion::BatchSize::SmallInput,
442 );
443 });
444
445 group.bench_function("remove_subtree/10k", |b| {
446 b.iter_batched(
447 || TagIndex::new(tags_10k.clone()),
448 |mut idx| remove_subtree(black_box("genre"), &mut idx),
449 criterion::BatchSize::SmallInput,
450 );
451 });
452
453 group.bench_function("merge_tags/10k", |b| {
454 b.iter_batched(
455 || TagIndex::new(tags_10k.clone()),
456 |mut idx| merge_tags(black_box("genre.sub-0"), black_box("mood.sub-0"), &mut idx),
457 criterion::BatchSize::SmallInput,
458 );
459 });
460
461 group.bench_function("batch_rename_3ops/10k", |b| {
462 let ops = [
463 ("genre.sub-0", "style.sub-0"),
464 ("mood.sub-1", "vibe.sub-1"),
465 ("source.sub-2", "origin.sub-2"),
466 ];
467 b.iter_batched(
468 || TagIndex::new(tags_10k.clone()),
469 |mut idx| batch_rename(black_box(&ops), &mut idx),
470 criterion::BatchSize::SmallInput,
471 );
472 });
473
474 group.finish();
475 }
476
477 criterion_group!(
478 benches,
479 bench_validate,
480 bench_hierarchy,
481 bench_sql,
482 bench_tree_ops,
483 bench_tag_index,
484 bench_large_tree_ops,
485 bench_large_tag_index,
486 bench_large_validate,
487 bench_deep_tree,
488 bench_bulk_ops,
489 );
490 criterion_main!(benches);
491