max / docengine

Code review remediation: custom directives, escape module, media URLs, tests Extensible [!TYPE] alerts, [!TABS] code tabs, HTML escape extraction, media URL rewriting. 141 tests. Grade A. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Author: Max J. <87768334+MaxJMath@users.noreply.github.com> · 2026-04-12 23:39 UTC

Commit: f919b8cf0596b8b48e6ed344dd92062dc1368245

Parent: 7367851

14 files changed, +1203 insertions, -38 deletions

M Cargo.lock +38

			@@ -92,6 +92,7 @@ dependencies = [
92	92		"regex-lite",
93	93		"serde",
94	94		"toml",
	95	+	"tracing",
95	96		"uuid",
96	97		]
97	98
			@@ -498,6 +499,12 @@ dependencies = [
498	499		]
499	500
500	501		[[package]]
	502	+	name = "pin-project-lite"
	503	+	version = "0.2.17"
	504	+	source = "registry+https://github.com/rust-lang/crates.io-index"
	505	+	checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd"
	506	+
	507	+	[[package]]
501	508		name = "potential_utf"
502	509		version = "0.1.4"
503	510		source = "registry+https://github.com/rust-lang/crates.io-index"
			@@ -822,6 +829,37 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
822	829		checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801"
823	830
824	831		[[package]]
	832	+	name = "tracing"
	833	+	version = "0.1.44"
	834	+	source = "registry+https://github.com/rust-lang/crates.io-index"
	835	+	checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100"
	836	+	dependencies = [
	837	+	"pin-project-lite",
	838	+	"tracing-attributes",
	839	+	"tracing-core",
	840	+	]
	841	+
	842	+	[[package]]
	843	+	name = "tracing-attributes"
	844	+	version = "0.1.31"
	845	+	source = "registry+https://github.com/rust-lang/crates.io-index"
	846	+	checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da"
	847	+	dependencies = [
	848	+	"proc-macro2",
	849	+	"quote",
	850	+	"syn",
	851	+	]
	852	+
	853	+	[[package]]
	854	+	name = "tracing-core"
	855	+	version = "0.1.36"
	856	+	source = "registry+https://github.com/rust-lang/crates.io-index"
	857	+	checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a"
	858	+	dependencies = [
	859	+	"once_cell",
	860	+	]
	861	+
	862	+	[[package]]
825	863		name = "unicase"
826	864		version = "2.9.0"
827	865		source = "registry+https://github.com/rust-lang/crates.io-index"

M Cargo.toml +6 -3

			@@ -5,11 +5,13 @@ edition = "2021"
5	5
6	6		[features]
7	7		default = []
8		-	doc-loader = ["dep:regex"]
	8	+	doc-loader = ["dep:regex", "dep:tracing"]
	9	+	directives = ["dep:regex-lite"]
9	10		mentions = ["dep:regex-lite"]
10	11		quotes = ["dep:regex-lite", "dep:uuid"]
11		-	frontmatter = ["dep:toml"]
12		-	full = ["doc-loader", "mentions", "quotes", "frontmatter"]
	12	+	frontmatter = ["dep:toml", "dep:tracing"]
	13	+	media-urls = ["dep:regex-lite"]
	14	+	full = ["doc-loader", "directives", "mentions", "quotes", "frontmatter", "media-urls"]
13	15
14	16		[dependencies]
15	17		pulldown-cmark = "0.12"
			@@ -20,3 +22,4 @@ regex = { version = "1", optional = true }
20	22		regex-lite = { version = "0.1", optional = true }
21	23		uuid = { version = "1", features = ["serde", "v4"], optional = true }
22	24		toml = { version = "0.8", optional = true }
	25	+	tracing = { version = "0.1", optional = true }

A README.md +119

		@@ -0,0 +1,119 @@
1	+	# DocEngine
2	+
3	+	Configurable markdown-to-HTML rendering library with sanitization presets. Built on pulldown-cmark (GFM) and ammonia.
4	+
5	+	Used by MNW (site docs, blog posts, user-generated content), Multithreaded (forum posts), and the desktop apps (descriptions, notes).
6	+
7	+	## Presets
8	+
9	+	Four rendering presets, each with different security/feature tradeoffs:
10	+
11	+	\| Preset \| Use case \| Tables \| Images \| Raw HTML \| Dangerous scheme filter \| Sanitization \|
12	+	\|--------\|----------\|:------:\|:------:\|:--------:\|:-----------------------:\|--------------\|
13	+	\| Permissive \| Docs, blog posts (trusted) \| Y \| Y \| Y \| N \| Default ammonia \|
14	+	\| Standard \| App text fields (descriptions) \| Y \| N \| Y \| N \| Default ammonia \|
15	+	\| Strict \| User-generated content (forums) \| N \| N \| N \| Y \| nofollow on links \|
16	+	\| Sanitize-only \| External HTML (RSS feeds) \| -- \| -- \| -- \| -- \| Default ammonia, no markdown parsing \|
17	+
18	+	```rust
19	+	use docengine::{render_permissive, render_standard, render_strict, sanitize_html};
20	+
21	+	// Convenience functions
22	+	let html = render_permissive("# Hello\n\nBold text");
23	+	let html = render_standard("A description with [link](https://example.com)");
24	+	let html = render_strict("User post with @mentions and `code`");
25	+	let html = sanitize_html("<p>Pre-rendered</p><script>stripped</script>");
26	+
27	+	// Builder pattern for custom configurations
28	+	use docengine::{Renderer, SanitizePreset};
29	+
30	+	let html = Renderer::permissive()
31	+	.with_strip_images(true) // override: strip images even in permissive
32	+	.with_footnotes(false)
33	+	.render("# Custom config");
34	+
35	+	// Render with metadata (word count, reading time)
36	+	let result = Renderer::standard().render_with_meta("Some article text...");
37	+	println!("{} words, ~{} min read", result.word_count, result.reading_time_minutes);
38	+	```
39	+
40	+	## Feature Flags
41	+
42	+	All optional features are off by default. Enable what you need:
43	+
44	+	\| Flag \| Dependencies \| Provides \|
45	+	\|------\|-------------\|----------\|
46	+	\| `doc-loader` \| regex \| `DocLoader` -- load a directory of `.md` files into an in-memory page store \|
47	+	\| `directives` \| regex-lite \| `post_process_directives` -- `[!NOTE]`/`[!TIP]`/`[!TABS]` blockquote alerts and code tabs \|
48	+	\| `frontmatter` \| toml \| `parse_frontmatter` -- extract TOML frontmatter delimited by `+++` \|
49	+	\| `mentions` \| regex-lite \| `extract_mentions`, `resolve_mentions` -- `@username` parsing and linking \|
50	+	\| `quotes` \| regex-lite, uuid \| `post_process_quotes` -- replace `[quote:POST_ID:HASH]` markers with author attribution \|
51	+	\| `media-urls` \| regex-lite \| `rewrite_media_paths`, `img_to_video` -- CDN path rewriting and video tag conversion \|
52	+	\| `full` \| all of the above \| Enable everything \|
53	+
54	+	```toml
55	+	# In Cargo.toml
56	+	docengine = { path = "../Shared/docengine" } # Core only
57	+	docengine = { path = "../Shared/docengine", features = ["full"] } # Everything
58	+	```
59	+
60	+	## Core API
61	+
62	+	### Types
63	+
64	+	- `Renderer` -- configurable markdown renderer with builder pattern
65	+	- `RenderResult` -- rendered HTML plus `word_count` and `reading_time_minutes`
66	+	- `SanitizePreset` -- `Permissive`, `Standard`, `Strict`, `Minimal`
67	+	- `TocEntry` -- heading level, text, and anchor for table of contents
68	+
69	+	### Functions
70	+
71	+	\| Function \| Description \|
72	+	\|----------\|-------------\|
73	+	\| `render_permissive(md)` \| Render with full GFM features \|
74	+	\| `render_standard(md)` \| Render without images \|
75	+	\| `render_strict(md)` \| Render with all restrictions (UGC-safe) \|
76	+	\| `sanitize_html(html)` \| Clean pre-rendered HTML without markdown parsing \|
77	+	\| `word_count(text)` \| Count words in raw text \|
78	+	\| `reading_time_minutes(wc)` \| Estimate reading time (200 wpm) \|
79	+	\| `extract_title(md)` \| Pull the first `# Heading` from markdown \|
80	+	\| `strip_first_heading(md)` \| Remove the first `# Heading` (for template-rendered titles) \|
81	+	\| `extract_toc(md)` \| Build a `Vec<TocEntry>` from all headings \|
82	+	\| `render_toc_html(entries)` \| Render TOC entries as a `<nav class="toc">` HTML list \|
83	+
84	+	### Feature-gated
85	+
86	+	\| Function / Type \| Feature \| Description \|
87	+	\|-----------------\|---------\|-------------\|
88	+	\| `DocLoader::load(path, config)` \| `doc-loader` \| Load `.md` files from disk, render to HTML, build searchable index \|
89	+	\| `DocPage`, `DocIndexEntry` \| `doc-loader` \| Page and index entry types \|
90	+	\| `post_process_directives(html)` \| `directives` \| Convert `[!NOTE]`/`[!TIP]`/etc. blockquotes to alert divs, `[!TABS]` to tabbed code blocks \|
91	+	\| `parse_frontmatter(input)` \| `frontmatter` \| Parse `+++`-delimited TOML frontmatter \|
92	+	\| `Frontmatter` \| `frontmatter` \| Struct with `title`, `date`, `tags`, `section`, `draft`, `extra` \|
93	+	\| `extract_mentions(md)` \| `mentions` \| Find unique `@username` mentions (skips code blocks) \|
94	+	\| `resolve_mentions(md, valid, template)` \| `mentions` \| Replace `@user` with `[@user](/path/to/user)` for known usernames \|
95	+	\| `post_process_quotes(html, authors)` \| `quotes` \| Replace `[quote:UUID:HASH]` with clickable attribution \|
96	+	\| `rewrite_media_paths(md, base, user)` \| `media-urls` \| Rewrite relative image paths to absolute CDN URLs \|
97	+	\| `img_to_video(html)` \| `media-urls` \| Convert `<img>` tags pointing to video files into `<video>` elements \|
98	+
99	+	## Consumers
100	+
101	+	\| Project \| Features used \| Preset \|
102	+	\|---------\|--------------\|--------\|
103	+	\| MNW \| `doc-loader`, `directives`, `frontmatter`, `media-urls` \| Permissive (docs/blog), Standard (descriptions) \|
104	+	\| Multithreaded \| `mentions`, `quotes` \| Strict (forum posts) \|
105	+	\| GoingsOn \| core only \| Standard (notes, descriptions) \|
106	+	\| Balanced Breakfast \| core only \| Sanitize-only (RSS feed content) \|
107	+
108	+	## Security
109	+
110	+	All presets sanitize output through ammonia. The strict preset additionally:
111	+	- Strips all raw HTML and images at the parser level (before ammonia)
112	+	- Replaces `javascript:`, `data:`, `vbscript:` URLs with `#`
113	+	- Adds `rel="noopener noreferrer nofollow"` to all links
114	+
115	+	Zero unsafe code.
116	+
117	+	## License
118	+
119	+	PolyForm Noncommercial 1.0.0

A docs/architecture.md +80

		@@ -0,0 +1,80 @@
1	+	# DocEngine Architecture
2	+
3	+	## Overview
4	+
5	+	DocEngine is a markdown rendering library that wraps pulldown-cmark (parsing) and ammonia (sanitization) behind a preset system. Each preset configures which markdown features are enabled and how aggressively the output is sanitized.
6	+
7	+	## Module Map
8	+
9	+	```
10	+	src/
11	+	lib.rs Crate root, re-exports, convenience functions
12	+	render.rs Renderer struct (builder pattern, 4 presets, render/render_with_meta)
13	+	sanitize.rs SanitizePreset enum (Permissive, Standard, Strict, Minimal)
14	+	text.rs Text utilities (word_count, reading_time, extract_title, strip_first_heading)
15	+	toc.rs Table of contents extraction and HTML rendering
16	+	escape.rs HTML entity escaping for safe string interpolation
17	+	code_spans.rs Code span/block byte range detection (used by mentions to skip code)
18	+	directives.rs [directives] Alert/tabs blockquote post-processing
19	+	doc_loader.rs [doc-loader] Load .md files from disk into in-memory page store
20	+	frontmatter.rs [frontmatter] Parse +++delimited TOML frontmatter
21	+	media_urls.rs [media-urls] CDN path rewriting for images, img-to-video conversion
22	+	mentions.rs [mentions] @username extraction and resolution
23	+	quotes.rs [quotes] [quote:UUID:HASH] post-processing for forum attribution
24	+	```
25	+
26	+	## Design Decisions
27	+
28	+	### Presets over configuration
29	+
30	+	Rather than exposing every pulldown-cmark option, DocEngine provides named presets that bundle markdown features with sanitization levels. This prevents misconfiguration -- you can't accidentally enable raw HTML without appropriate sanitization.
31	+
32	+	Custom configurations are still possible via the builder pattern (`Renderer::permissive().with_strip_images(true)`).
33	+
34	+	### Two-phase rendering
35	+
36	+	Rendering happens in two phases:
37	+	1. pulldown-cmark parses markdown to HTML events, with optional filtering (strip images, strip raw HTML, neutralize dangerous URL schemes)
38	+	2. ammonia sanitizes the resulting HTML string
39	+
40	+	This means even the permissive preset strips `<script>` tags -- ammonia always runs.
41	+
42	+	Post-processing steps (directives, mentions, quotes, media URLs) are applied after sanitization by consumers, not built into the render pipeline.
43	+
44	+	### Feature-gated modules
45	+
46	+	DocEngine has zero required dependencies beyond pulldown-cmark, ammonia, and serde. Consumers that only need rendering don't pull in regex, toml, or uuid. The `full` feature enables everything.
47	+
48	+	The `regex` vs `regex-lite` split is intentional -- doc-loader's link rewriting needs the full regex engine while simpler patterns in directives, mentions, quotes, and media-urls use the lighter variant.
49	+
50	+	### DocLoader loads once at startup
51	+
52	+	`DocLoader::load()` reads all `.md` files from disk, renders them to HTML, and stores them in a `HashMap<String, DocPage>`. This happens once at application boot (MNW calls it during startup). Pages are served from memory with no disk I/O on request.
53	+
54	+	Link rewriting converts relative `.md` references to the configured URL prefix (e.g., `./faq.md` becomes `/docs/faq`). Links to unpublished docs are stripped to plain text.
55	+
56	+	### Mention resolution skips code
57	+
58	+	`extract_mentions` and `resolve_mentions` detect inline code (backticks) and fenced code blocks, skipping any @mentions inside them. This prevents false positives from code examples.
59	+
60	+	### Directive post-processing
61	+
62	+	Directives (`[!NOTE]`, `[!TIP]`, `[!TABS]`, etc.) are implemented as HTML post-processing rather than markdown parsing extensions. This keeps the core render pipeline simple and makes directives composable with any preset.
63	+
64	+	## Consumers
65	+
66	+	\| Consumer \| Features \| How it's used \|
67	+	\|----------\|----------\|---------------\|
68	+	\| MNW \| doc-loader, directives, frontmatter, media-urls \| Site docs loaded at boot, blog posts with frontmatter, user descriptions (standard), item markdown (standard), CDN image rewriting \|
69	+	\| Multithreaded \| mentions, quotes \| Forum posts (strict), @username linking, quote attribution \|
70	+	\| GoingsOn \| core \| Task/event descriptions (standard) \|
71	+	\| Balanced Breakfast \| core \| RSS feed content (sanitize_only) \|
72	+	\| audiofiles \| core \| Sample descriptions (standard) \|
73	+
74	+	## Key Paths
75	+
76	+	- `src/render.rs` -- the core rendering logic
77	+	- `src/sanitize.rs` -- ammonia preset configurations
78	+	- `src/directives.rs` -- alert and code tab processing
79	+	- `src/doc_loader.rs` -- document loading and link rewriting
80	+	- `src/media_urls.rs` -- CDN path rewriting

A docs/code_review.md +114

		@@ -0,0 +1,114 @@
1	+	# DocEngine — Code Review
2	+
3	+	Date: 2026-04-12
4	+	Version: 0.3.0
5	+	Reviewer: Claude (Opus 4.6)
6	+	Scope: Full codebase review — all Rust source, Cargo.toml, README, docs
7	+
8	+	## Summary
9	+
10	+	DocEngine is a markdown-to-HTML rendering library (~2,550 source LOC across 13 modules) built on pulldown-cmark and ammonia. Preset-based configuration system bundles markdown features with sanitization levels. 6 cargo feature gates keep the dependency tree minimal. Used by 5 consumers across the ecosystem (MNW, Multithreaded, GO, BB, AF). 141 tests, 0 clippy warnings, 0 unsafe code.
11	+
12	+	Overall: A — clean, well-tested, security-conscious. No bugs found. Findings are documentation gaps and minor observations.
13	+
14	+	---
15	+
16	+	## Findings
17	+
18	+	### [MEDIUM] README and architecture.md missing `directives` and `media-urls` features
19	+
20	+	The README feature flag table (lines 42-51) lists `doc-loader`, `frontmatter`, `mentions`, and `quotes` but omits `directives` and `media-urls`. These features are defined in Cargo.toml, included in `full`, and used by MNW. The `full` description says "all of the above" but the unlisted features make this misleading.
21	+
22	+	Similarly, `architecture.md` module map (lines 9-21) does not include `directives.rs`, `media_urls.rs`, or `escape.rs`. The consumers table also doesn't mention `directives` or `media-urls` for MNW.
23	+
24	+	### [MEDIUM] Missing `docs/todo.md` and `docs/audit_review.md`
25	+
26	+	Per cross-cutting conventions, each project should have `todo.md` and `audit_review.md` in `docs/`. Only `architecture.md` exists.
27	+
28	+	### [LOW] `Permissive` and `Standard` sanitize presets are identical
29	+
30	+	In `sanitize.rs:17`, both `Permissive` and `Standard` map to `ammonia::clean(html)`. The doc comment for `Standard` says "Same as Permissive." The distinction is intentional — they differ in the `Renderer`'s markdown settings (Standard strips images, Permissive doesn't) — but the sanitize preset enum having two identical variants with a comment that says "Same" could confuse contributors. A comment clarifying "Same sanitization; markdown-level differences are configured in the Renderer" would help.
31	+
32	+	### [LOW] `strip_html_tags` in doc_loader.rs does not decode HTML entities
33	+
34	+	The search index generator (`strip_html_tags`, lines 162-182) strips tags but leaves HTML entities (`&`, `<`, etc.) intact. Searching for "A & B" won't match content rendered as "A & B". Low impact since doc search is client-side and search terms are unlikely to contain entities, but worth noting.
35	+
36	+	### [LOW] `render.rs` at 511 lines
37	+
38	+	Technically exceeds the 500-line guideline, but 268 lines are tests. The logic is ~235 lines. Within the spirit of the rule. If the test suite grows further, consider moving tests to a submodule.
39	+
40	+	### [INFO] `rewrite_links` regex is naive about nested brackets
41	+
42	+	The regex `\[([^\]]+)\]$([^)]+)$` in doc_loader.rs cannot handle nested brackets in link text or parentheses in URLs. Low risk since doc files are authored by the project owner, but edge cases like `[text](url_(with_parens))` would be malformed.
43	+
44	+	### [INFO] `extract_title` silently fails on frontmatter-prefixed documents
45	+
46	+	If called on raw markdown that starts with `+++` TOML frontmatter, `extract_title` returns `None` because the `+++` line is neither empty nor `---`. This is the correct behavior (frontmatter should be stripped first via `parse_frontmatter`), but the interaction is documented nowhere.
47	+
48	+	### [INFO] `html_escape` uses sequential string replacements
49	+
50	+	Five sequential `.replace()` calls, each allocating a new String. A single-pass approach would be more efficient, but this function is only called in template contexts (TOC, quotes, video tags), not in the hot render path. Negligible impact.
51	+
52	+	---
53	+
54	+	## Strengths
55	+
56	+	- Preset system is the right abstraction. Bundles markdown features with matching sanitization levels, preventing dangerous misconfigurations (e.g., raw HTML without sanitization). Builder pattern still allows per-instance overrides.
57	+	- Feature gate design. Zero unnecessary dependencies for core-only consumers. Smart split of `regex` (doc-loader, complex patterns) vs `regex-lite` (directives/mentions/quotes/media-urls, simple patterns).
58	+	- Security-conscious. All paths go through ammonia sanitization. Dangerous URL schemes detected case-insensitively. Path traversal blocked in media URL rewriting. HTML escaping on all user-supplied strings interpolated into HTML. Zero unsafe code.
59	+	- Test quality. 141 tests at ~1.09:1 test-to-logic ratio. Tests cover happy paths, edge cases, security scenarios. All co-located with implementation.
60	+	- Clean module boundaries. Each module does one thing. No circular dependencies. Feature gates cleanly gate whole modules.
61	+	- Directive system is extensible. Any `[!UPPERCASE]` blockquote becomes an alert div. Code tabs auto-detect language labels. Both are implemented as HTML post-processing, keeping the core render pipeline simple.
62	+
63	+	## Security Checklist
64	+
65	+	\| Check \| Status \|
66	+	\|-------\|--------\|
67	+	\| XSS via raw HTML \| Pass — ammonia sanitization on all presets \|
68	+	\| XSS via markdown \| Pass — strict preset strips raw HTML at parser level + sanitizes \|
69	+	\| javascript:/data:/vbscript: URLs \| Pass — detected case-insensitively, neutralized to `#` \|
70	+	\| Path traversal in media URLs \| Pass — `..` paths rejected \|
71	+	\| User string injection in HTML \| Pass — `html_escape()` applied in quotes, TOC, media tags \|
72	+	\| Unsafe code \| Pass — zero `unsafe` blocks \|
73	+
74	+	## Metrics
75	+
76	+	\| Metric \| Value \|
77	+	\|--------\|-------\|
78	+	\| Source LOC (logic) \| ~1,310 \|
79	+	\| Source LOC (tests) \| ~1,235 \|
80	+	\| Source LOC (total) \| ~2,550 \|
81	+	\| Source files \| 13 \|
82	+	\| Test count \| 141 \|
83	+	\| Tests/KLOC (logic) \| ~108 \|
84	+	\| Clippy warnings \| 0 \|
85	+	\| Unsafe blocks \| 0 \|
86	+	\| Cargo features \| 6 (+full) \|
87	+	\| Direct dependencies \| 7 (3 always, 4 optional) \|
88	+	\| Consumers \| 5 (MNW, Multithreaded, GO, BB, AF) \|
89	+	\| Audit advisories \| 0 (1 allowed warning) \|
90	+
91	+	## Module Heatmap
92	+
93	+	\| Module \| Code \| Test \| Security \| Docs \|
94	+	\|--------\|:----:\|:----:\|:--------:\|:----:\|
95	+	\| render.rs \| A \| A \| A \| A \|
96	+	\| directives.rs \| A \| A \| A- \| B (not in README/arch) \|
97	+	\| doc_loader.rs \| A \| A- \| A \| A- \|
98	+	\| media_urls.rs \| A \| A \| A \| B (not in README/arch) \|
99	+	\| toc.rs \| A \| A- \| A \| A \|
100	+	\| mentions.rs \| A \| A \| A \| A \|
101	+	\| frontmatter.rs \| A \| A \| A \| A \|
102	+	\| code_spans.rs \| A \| A- \| A \| A- \|
103	+	\| sanitize.rs \| A- \| A- \| A \| B+ (confusing "Same" comment) \|
104	+	\| text.rs \| A \| A \| A \| A \|
105	+	\| escape.rs \| A \| A- \| A \| A \|
106	+	\| quotes.rs \| A \| A- \| A \| A \|
107	+	\| lib.rs \| A \| — \| A \| A \|
108	+
109	+	## Action Items
110	+
111	+	1. ~~[MEDIUM] Update README feature flag table to include `directives` and `media-urls`~~ — Done. Feature table, feature-gated API table, and consumers table all updated.
112	+	2. ~~[MEDIUM] Update `architecture.md` module map and consumers table~~ — Done. Added directives.rs, media_urls.rs, escape.rs to module map. Updated consumers, key paths, and added directive design decision.
113	+	3. ~~[LOW] Clarify `Standard` sanitize preset doc comment~~ — Done. Explains the difference is at the Renderer level.
114	+	4. ~~[LOW] Consider HTML entity decoding in `strip_html_tags` for search index accuracy~~ — Done. Decodes `&`, `<`, `>`, `"`, `'`, `'` after tag stripping.

A src/directives.rs +443

		@@ -0,0 +1,443 @@
1	+	//! Post-process rendered HTML to convert blockquote-based directives into
2	+	//! styled elements.
3	+	//!
4	+	//! Alerts: `> [!NOTE]`, `> [!TIP]`, `> [!WARNING]`, `> [!CAUTION]`,
5	+	//! `> [!IMPORTANT]`, and any custom `> [!TYPE]` marker become styled
6	+	//! `<div class="alert alert-{type}">` callout elements.
7	+	//!
8	+	//! Code tabs: `> [!TABS]` followed by fenced code blocks become a tabbed
9	+	//! interface with language-labelled tabs.
10	+
11	+	use std::sync::LazyLock;
12	+
13	+	/// Matches any `[!TYPE]` alert marker inside a blockquote paragraph.
14	+	/// Accepts any uppercase word (letters, digits, hyphens, underscores).
15	+	static ALERT_RE: LazyLock<regex_lite::Regex> = LazyLock::new(\|\| {
16	+	regex_lite::Regex::new(
17	+	r"<blockquote>\s<p>\[!([A-Z][A-Z0-9_-])\](?:<br\s/?>)?\s",
18	+	)
19	+	.expect("valid alert regex")
20	+	});
21	+
22	+	/// Process all directives: code tabs first, then alerts.
23	+	pub fn post_process_directives(html: &str) -> String {
24	+	let with_tabs = process_tabs(html);
25	+	process_alerts(&with_tabs)
26	+	}
27	+
28	+	/// Replace alert blockquotes with styled `<div class="alert ...">` elements.
29	+	fn process_alerts(html: &str) -> String {
30	+	// First pass: replace opening markers.
31	+	let opened = ALERT_RE.replace_all(html, \|caps: &regex_lite::Captures\| {
32	+	let kind = &caps[1];
33	+	// Skip TABS — already handled by process_tabs.
34	+	if kind == "TABS" {
35	+	return caps[0].to_string();
36	+	}
37	+	let label = title_case(kind);
38	+	format!(
39	+	"<div class=\"alert alert-{kind}\"><p class=\"alert-title\">{label}</p><p>",
40	+	kind = kind.to_ascii_lowercase(),
41	+	label = label,
42	+	)
43	+	});
44	+
45	+	// Second pass: close any opened alerts.
46	+	let alert_count = ALERT_RE
47	+	.captures_iter(html)
48	+	.filter(\|c\| &c[1] != "TABS")
49	+	.count();
50	+	if alert_count == 0 {
51	+	return opened.into_owned();
52	+	}
53	+
54	+	let mut result = String::with_capacity(opened.len());
55	+	let mut remaining = opened.as_ref();
56	+	let mut replaced = 0;
57	+
58	+	while replaced < alert_count {
59	+	if let Some(pos) = remaining.find("</blockquote>") {
60	+	result.push_str(&remaining[..pos]);
61	+	result.push_str("</div>");
62	+	remaining = &remaining[(pos + "</blockquote>".len())..];
63	+	replaced += 1;
64	+	} else {
65	+	break;
66	+	}
67	+	}
68	+	result.push_str(remaining);
69	+	result
70	+	}
71	+
72	+	/// Process `[!TABS]` blockquotes into tabbed code-block interfaces.
73	+	fn process_tabs(html: &str) -> String {
74	+	if !html.contains("[!TABS]") {
75	+	return html.to_string();
76	+	}
77	+
78	+	let mut result = String::with_capacity(html.len());
79	+	let mut remaining = html;
80	+
81	+	while let Some(bq_pos) = remaining.find("<blockquote>") {
82	+	let after_bq_start = bq_pos + "<blockquote>".len();
83	+
84	+	// Find the closing </blockquote> for this blockquote.
85	+	let close_pos = match remaining[bq_pos..].find("</blockquote>") {
86	+	Some(p) => bq_pos + p,
87	+	None => break,
88	+	};
89	+
90	+	let inner = &remaining[after_bq_start..close_pos];
91	+
92	+	// Check if the first <p> in the blockquote contains [!TABS].
93	+	let is_tabs = {
94	+	let trimmed = inner.trim_start();
95	+	trimmed.starts_with("<p>") && {
96	+	let first_p_end = trimmed.find("</p>").unwrap_or(trimmed.len());
97	+	trimmed[..first_p_end].contains("[!TABS]")
98	+	}
99	+	};
100	+
101	+	if !is_tabs {
102	+	// Not a TABS blockquote — copy through the opening tag and continue.
103	+	result.push_str(&remaining[..after_bq_start]);
104	+	remaining = &remaining[after_bq_start..];
105	+	continue;
106	+	}
107	+
108	+	// Copy everything before this blockquote.
109	+	result.push_str(&remaining[..bq_pos]);
110	+
111	+	// Extract code blocks from the inner HTML.
112	+	let tabs = extract_code_blocks(inner);
113	+
114	+	if tabs.is_empty() {
115	+	// No code blocks found — wrap content in a plain div.
116	+	result.push_str("<div class=\"code-tabs\">");
117	+	result.push_str(inner);
118	+	result.push_str("</div>");
119	+	} else {
120	+	result.push_str(&build_tabs_html(&tabs));
121	+	}
122	+
123	+	remaining = &remaining[close_pos + "</blockquote>".len()..];
124	+	}
125	+
126	+	result.push_str(remaining);
127	+	result
128	+	}
129	+
130	+	/// Extract `(language, full_html_block)` pairs from HTML containing
131	+	/// `<pre><code>` elements.
132	+	fn extract_code_blocks(html: &str) -> Vec<(String, String)> {
133	+	let mut blocks = Vec::new();
134	+	let mut search_from = 0;
135	+	let end_marker = "</code></pre>";
136	+
137	+	while let Some(pre_pos) = html[search_from..].find("<pre><code") {
138	+	let abs_pos = search_from + pre_pos;
139	+
140	+	let end_pos = match html[abs_pos..].find(end_marker) {
141	+	Some(p) => abs_pos + p + end_marker.len(),
142	+	None => break,
143	+	};
144	+
145	+	let full_block = &html[abs_pos..end_pos];
146	+
147	+	// Extract language from class="language-X".
148	+	let lang = if let Some(class_start) = full_block.find("class=\"language-") {
149	+	let after = &full_block[class_start + "class=\"language-".len()..];
150	+	after.split('"').next().unwrap_or("code").to_string()
151	+	} else {
152	+	"code".to_string()
153	+	};
154	+
155	+	blocks.push((lang, full_block.to_string()));
156	+	search_from = end_pos;
157	+	}
158	+
159	+	blocks
160	+	}
161	+
162	+	/// Build tabbed HTML from extracted code blocks.
163	+	fn build_tabs_html(tabs: &[(String, String)]) -> String {
164	+	let mut html = String::from("<div class=\"code-tabs\">\n<div class=\"code-tabs-bar\">");
165	+
166	+	for (i, (lang, _)) in tabs.iter().enumerate() {
167	+	let active = if i == 0 { " active" } else { "" };
168	+	let label = code_language_label(lang);
169	+	html.push_str(&format!(
170	+	"<button class=\"code-tab{active}\" data-tab-index=\"{i}\">{label}</button>"
171	+	));
172	+	}
173	+
174	+	html.push_str("</div>\n");
175	+
176	+	for (i, (_, block)) in tabs.iter().enumerate() {
177	+	let active = if i == 0 { " active" } else { "" };
178	+	html.push_str(&format!(
179	+	"<div class=\"code-tab-panel{active}\" data-tab-index=\"{i}\">{block}</div>\n"
180	+	));
181	+	}
182	+
183	+	html.push_str("</div>");
184	+	html
185	+	}
186	+
187	+	/// Human-readable label for a code language identifier.
188	+	fn code_language_label(lang: &str) -> String {
189	+	match lang {
190	+	"js" \| "javascript" => "JavaScript".into(),
191	+	"ts" \| "typescript" => "TypeScript".into(),
192	+	"sh" \| "bash" \| "zsh" \| "shell" => "Shell".into(),
193	+	"json" => "JSON".into(),
194	+	"html" => "HTML".into(),
195	+	"css" => "CSS".into(),
196	+	"sql" => "SQL".into(),
197	+	"toml" => "TOML".into(),
198	+	"yaml" \| "yml" => "YAML".into(),
199	+	"xml" => "XML".into(),
200	+	other => title_case(other),
201	+	}
202	+	}
203	+
204	+	fn title_case(s: &str) -> String {
205	+	let mut chars = s.chars();
206	+	match chars.next() {
207	+	Some(c) => {
208	+	let mut out = c.to_uppercase().to_string();
209	+	out.extend(chars.map(\|c\| c.to_ascii_lowercase()));
210	+	out
211	+	}
212	+	None => String::new(),
213	+	}
214	+	}
215	+
216	+	#[cfg(test)]
217	+	mod tests {
218	+	use super::*;
219	+
220	+	// ===== Alert directives =====
221	+
222	+	#[test]
223	+	fn note_alert() {
224	+	let html = "<blockquote>\n<p>[!NOTE]<br>\nThis is a note.</p>\n</blockquote>";
225	+	let result = post_process_directives(html);
226	+	assert!(result.contains("alert alert-note"));
227	+	assert!(result.contains("<p class=\"alert-title\">Note</p>"));
228	+	assert!(result.contains("This is a note."));
229	+	assert!(!result.contains("<blockquote>"));
230	+	}
231	+
232	+	#[test]
233	+	fn tip_alert() {
234	+	let html = "<blockquote>\n<p>[!TIP]<br>\nHelpful tip here.</p>\n</blockquote>";
235	+	let result = post_process_directives(html);
236	+	assert!(result.contains("alert alert-tip"));
237	+	assert!(result.contains("<p class=\"alert-title\">Tip</p>"));
238	+	}
239	+
240	+	#[test]
241	+	fn important_alert() {
242	+	let html = "<blockquote>\n<p>[!IMPORTANT]<br>\nDo this.</p>\n</blockquote>";
243	+	let result = post_process_directives(html);
244	+	assert!(result.contains("alert alert-important"));
245	+	assert!(result.contains("<p class=\"alert-title\">Important</p>"));
246	+	}
247	+
248	+	#[test]
249	+	fn warning_alert() {
250	+	let html = "<blockquote>\n<p>[!WARNING]<br>\nBe careful.</p>\n</blockquote>";
251	+	let result = post_process_directives(html);
252	+	assert!(result.contains("alert alert-warning"));
253	+	assert!(result.contains("<p class=\"alert-title\">Warning</p>"));
254	+	}
255	+
256	+	#[test]
257	+	fn caution_alert() {
258	+	let html = "<blockquote>\n<p>[!CAUTION]<br/>\nDanger zone.</p>\n</blockquote>";
259	+	let result = post_process_directives(html);
260	+	assert!(result.contains("alert alert-caution"));
261	+	assert!(result.contains("<p class=\"alert-title\">Caution</p>"));
262	+	}
263	+
264	+	#[test]
265	+	fn multi_paragraph_alert() {
266	+	let html = "<blockquote>\n<p>[!NOTE]<br>\nFirst paragraph.</p>\n<p>Second paragraph.</p>\n</blockquote>";
267	+	let result = post_process_directives(html);
268	+	assert!(result.contains("alert alert-note"));
269	+	assert!(result.contains("First paragraph."));
270	+	assert!(result.contains("Second paragraph."));
271	+	assert!(result.contains("</div>"));
272	+	assert!(!result.contains("</blockquote>"));
273	+	}
274	+
275	+	#[test]
276	+	fn regular_blockquote_unchanged() {
277	+	let html = "<blockquote>\n<p>Just a normal quote.</p>\n</blockquote>";
278	+	let result = post_process_directives(html);
279	+	assert_eq!(result, html);
280	+	}
281	+
282	+	#[test]
283	+	fn mixed_alerts_and_blockquotes() {
284	+	let html = concat!(
285	+	"<blockquote>\n<p>[!WARNING]<br>\nWatch out!</p>\n</blockquote>\n",
286	+	"<blockquote>\n<p>Normal quote.</p>\n</blockquote>"
287	+	);
288	+	let result = post_process_directives(html);
289	+	assert!(result.contains("alert alert-warning"));
290	+	assert!(result.contains("Watch out!"));
291	+	// The normal blockquote remains unchanged.
292	+	assert!(result.contains("<blockquote>"));
293	+	assert!(result.contains("Normal quote."));
294	+	}
295	+
296	+	// ===== Custom alert types =====
297	+
298	+	#[test]
299	+	fn custom_example_alert() {
300	+	let html = "<blockquote>\n<p>[!EXAMPLE]<br>\nHere is an example.</p>\n</blockquote>";
301	+	let result = post_process_directives(html);
302	+	assert!(result.contains("alert alert-example"));
303	+	assert!(result.contains("<p class=\"alert-title\">Example</p>"));
304	+	assert!(result.contains("Here is an example."));
305	+	assert!(!result.contains("<blockquote>"));
306	+	}
307	+
308	+	#[test]
309	+	fn custom_definition_alert() {
310	+	let html = "<blockquote>\n<p>[!DEFINITION]<br>\nA term and its meaning.</p>\n</blockquote>";
311	+	let result = post_process_directives(html);
312	+	assert!(result.contains("alert alert-definition"));
313	+	assert!(result.contains("<p class=\"alert-title\">Definition</p>"));
314	+	}
315	+
316	+	#[test]
317	+	fn custom_alert_with_hyphen() {
318	+	let html =
319	+	"<blockquote>\n<p>[!SEE-ALSO]<br>\nRelated topics.</p>\n</blockquote>";
320	+	let result = post_process_directives(html);
321	+	assert!(result.contains("alert alert-see-also"));
322	+	assert!(result.contains("<p class=\"alert-title\">See-also</p>"));
323	+	}
324	+
325	+	// ===== Code tabs =====
326	+
327	+	#[test]
328	+	fn tabs_two_languages() {
329	+	let html = concat!(
330	+	"<blockquote>\n<p>[!TABS]</p>\n",
331	+	"<pre><code class=\"language-rust\">fn main() {}\n</code></pre>\n",
332	+	"<pre><code class=\"language-python\">def main(): pass\n</code></pre>\n",
333	+	"</blockquote>"
334	+	);
335	+	let result = post_process_directives(html);
336	+	assert!(result.contains("code-tabs"));
337	+	assert!(result.contains("code-tabs-bar"));
338	+	assert!(result.contains("Rust"));
339	+	assert!(result.contains("Python"));
340	+	assert!(result.contains("fn main() {}"));
341	+	assert!(result.contains("def main(): pass"));
342	+	assert!(!result.contains("<blockquote>"));
343	+	// First tab is active.
344	+	assert!(result.contains("code-tab active"));
345	+	assert!(result.contains("code-tab-panel active"));
346	+	}
347	+
348	+	#[test]
349	+	fn tabs_three_languages() {
350	+	let html = concat!(
351	+	"<blockquote>\n<p>[!TABS]</p>\n",
352	+	"<pre><code class=\"language-bash\">curl https://api.example.com\n</code></pre>\n",
353	+	"<pre><code class=\"language-js\">fetch('https://api.example.com')\n</code></pre>\n",
354	+	"<pre><code class=\"language-python\">requests.get('https://api.example.com')\n</code></pre>\n",
355	+	"</blockquote>"
356	+	);
357	+	let result = post_process_directives(html);
358	+	assert!(result.contains("Shell")); // bash → Shell
359	+	assert!(result.contains("JavaScript")); // js → JavaScript
360	+	assert!(result.contains("Python"));
361	+	assert!(result.contains("data-tab-index=\"0\""));
362	+	assert!(result.contains("data-tab-index=\"1\""));
363	+	assert!(result.contains("data-tab-index=\"2\""));
364	+	}
365	+
366	+	#[test]
367	+	fn tabs_no_language_specified() {
368	+	let html = concat!(
369	+	"<blockquote>\n<p>[!TABS]</p>\n",
370	+	"<pre><code>some code\n</code></pre>\n",
371	+	"<pre><code class=\"language-rust\">let x = 1;\n</code></pre>\n",
372	+	"</blockquote>"
373	+	);
374	+	let result = post_process_directives(html);
375	+	assert!(result.contains("Code")); // fallback label
376	+	assert!(result.contains("Rust"));
377	+	}
378	+
379	+	#[test]
380	+	fn tabs_with_br_marker() {
381	+	let html = concat!(
382	+	"<blockquote>\n<p>[!TABS]<br>\n</p>\n",
383	+	"<pre><code class=\"language-toml\">[package]\n</code></pre>\n",
384	+	"<pre><code class=\"language-json\">{}\n</code></pre>\n",
385	+	"</blockquote>"
386	+	);
387	+	let result = post_process_directives(html);
388	+	assert!(result.contains("TOML"));
389	+	assert!(result.contains("JSON"));
390	+	}
391	+
392	+	#[test]
393	+	fn tabs_mixed_with_alert_and_blockquote() {
394	+	let html = concat!(
395	+	"<blockquote>\n<p>[!NOTE]<br>\nA note.</p>\n</blockquote>\n",
396	+	"<blockquote>\n<p>[!TABS]</p>\n",
397	+	"<pre><code class=\"language-rust\">let x = 1;\n</code></pre>\n",
398	+	"</blockquote>\n",
399	+	"<blockquote>\n<p>Normal quote.</p>\n</blockquote>"
400	+	);
401	+	let result = post_process_directives(html);
402	+	// Alert processed.
403	+	assert!(result.contains("alert alert-note"));
404	+	// Tabs processed.
405	+	assert!(result.contains("code-tabs"));
406	+	assert!(result.contains("Rust"));
407	+	// Normal blockquote unchanged.
408	+	assert!(result.contains("<blockquote>"));
409	+	assert!(result.contains("Normal quote."));
410	+	}
411	+
412	+	#[test]
413	+	fn tabs_no_code_blocks() {
414	+	let html = concat!(
415	+	"<blockquote>\n<p>[!TABS]</p>\n",
416	+	"<p>Just text, no code.</p>\n",
417	+	"</blockquote>"
418	+	);
419	+	let result = post_process_directives(html);
420	+	assert!(result.contains("code-tabs"));
421	+	assert!(result.contains("Just text, no code."));
422	+	assert!(!result.contains("<blockquote>"));
423	+	}
424	+
425	+	// ===== Language label mapping =====
426	+
427	+	#[test]
428	+	fn language_labels() {
429	+	assert_eq!(code_language_label("js"), "JavaScript");
430	+	assert_eq!(code_language_label("typescript"), "TypeScript");
431	+	assert_eq!(code_language_label("bash"), "Shell");
432	+	assert_eq!(code_language_label("json"), "JSON");
433	+	assert_eq!(code_language_label("html"), "HTML");
434	+	assert_eq!(code_language_label("css"), "CSS");
435	+	assert_eq!(code_language_label("sql"), "SQL");
436	+	assert_eq!(code_language_label("toml"), "TOML");
437	+	assert_eq!(code_language_label("yaml"), "YAML");
438	+	assert_eq!(code_language_label("xml"), "XML");
439	+	assert_eq!(code_language_label("rust"), "Rust");
440	+	assert_eq!(code_language_label("python"), "Python");
441	+	assert_eq!(code_language_label("go"), "Go");
442	+	}
443	+	}

M src/doc_loader.rs +103 -9

			@@ -35,6 +35,15 @@ pub struct DocIndexEntry {
35	35		pub section: String,
36	36		}
37	37
	38	+	/// Entry in the full-text search index, serialised to JSON for client-side search.
	39	+	#[derive(Clone, Debug, serde::Serialize)]
	40	+	pub struct DocSearchEntry {
	41	+	pub slug: String,
	42	+	pub title: String,
	43	+	pub section: String,
	44	+	pub body_text: String,
	45	+	}
	46	+
38	47		/// In-memory store of rendered documentation pages, built once at startup.
39	48		#[derive(Clone, Debug)]
40	49		pub struct DocLoader {
			@@ -56,9 +65,15 @@ impl DocLoader {
56	65		continue;
57	66		}
58	67
59		-	let mut entries: Vec<_> = std::fs::read_dir(&section_path)
60		-	.into_iter()
61		-	.flatten()
	68	+	let read_dir = match std::fs::read_dir(&section_path) {
	69	+	Ok(rd) => rd,
	70	+	Err(e) => {
	71	+	tracing::warn!(path = %section_path.display(), error = %e, "Failed to read docs section directory");
	72	+	continue;
	73	+	}
	74	+	};
	75	+
	76	+	let mut entries: Vec<_> = read_dir
62	77		.filter_map(\|e\| e.ok())
63	78		.filter(\|e\| {
64	79		e.path()
			@@ -92,21 +107,24 @@ impl DocLoader {
92	107		);
93	108		let md_without_title = crate::text::strip_first_heading(&rewritten_md);
94	109		let html_content = crate::render_permissive(&md_without_title);
	110	+	#[cfg(feature = "directives")]
	111	+	let html_content = crate::directives::post_process_directives(&html_content);
95	112
96	113		let page = DocPage {
97		-	title: title.clone(),
98		-	slug: slug.clone(),
	114	+	title,
	115	+	slug,
99	116		section: section_display.clone(),
100	117		html_content,
101	118		};
102	119
103	120		index.push(DocIndexEntry {
104		-	title: title.clone(),
105		-	slug: slug.clone(),
106		-	section: section_display.clone(),
	121	+	title: page.title.clone(),
	122	+	slug: page.slug.clone(),
	123	+	section: page.section.clone(),
107	124		});
108	125
109		-	pages.insert(slug, page);
	126	+	let slug_key = page.slug.clone();
	127	+	pages.insert(slug_key, page);
110	128		}
111	129		}
112	130
			@@ -122,6 +140,53 @@ impl DocLoader {
122	140		pub fn index(&self) -> &[DocIndexEntry] {
123	141		&self.index
124	142		}
	143	+
	144	+	/// Build a search index with HTML stripped to plain text.
	145	+	pub fn search_index(&self) -> Vec<DocSearchEntry> {
	146	+	self.index
	147	+	.iter()
	148	+	.filter_map(\|entry\| {
	149	+	let page = self.pages.get(&entry.slug)?;
	150	+	Some(DocSearchEntry {
	151	+	slug: entry.slug.clone(),
	152	+	title: entry.title.clone(),
	153	+	section: entry.section.clone(),
	154	+	body_text: strip_html_tags(&page.html_content),
	155	+	})
	156	+	})
	157	+	.collect()
	158	+	}
	159	+	}
	160	+
	161	+	/// Strip HTML tags from a string, returning plain text.
	162	+	/// Decodes common HTML entities so search indexes match plain-text queries.
	163	+	fn strip_html_tags(html: &str) -> String {
	164	+	let mut out = String::with_capacity(html.len());
	165	+	let mut in_tag = false;
	166	+	for ch in html.chars() {
	167	+	match ch {
	168	+	'<' => in_tag = true,
	169	+	'>' => {
	170	+	in_tag = false;
	171	+	// Add a space after closing tags to separate words.
	172	+	if !out.ends_with(' ') {
	173	+	out.push(' ');
	174	+	}
	175	+	}
	176	+	_ if !in_tag => out.push(ch),
	177	+	_ => {}
	178	+	}
	179	+	}
	180	+	// Collapse runs of whitespace.
	181	+	let collapsed: String = out.split_whitespace().collect::<Vec<_>>().join(" ");
	182	+	// Decode common HTML entities for search index accuracy.
	183	+	collapsed
	184	+	.replace("&", "&")
	185	+	.replace("<", "<")
	186	+	.replace(">", ">")
	187	+	.replace(""", "\"")
	188	+	.replace("'", "'")
	189	+	.replace("'", "'")
125	190		}
126	191
127	192		/// Rewrite relative `.md` links to the configured prefix.
			@@ -257,4 +322,33 @@ mod tests {
257	322		let result = rewrite_links(md, "/docs", None);
258	323		assert_eq!(result, md);
259	324		}
	325	+
	326	+	#[test]
	327	+	fn strip_html_tags_removes_tags() {
	328	+	let html = "<p>Hello <strong>world</strong></p>";
	329	+	assert_eq!(strip_html_tags(html), "Hello world");
	330	+	}
	331	+
	332	+	#[test]
	333	+	fn strip_html_tags_empty_input() {
	334	+	assert_eq!(strip_html_tags(""), "");
	335	+	}
	336	+
	337	+	#[test]
	338	+	fn strip_html_tags_decodes_entities() {
	339	+	let html = "<p>Price: $10 & free</p>";
	340	+	assert_eq!(strip_html_tags(html), "Price: $10 & free");
	341	+
	342	+	let html2 = "<p>a < b > c</p>";
	343	+	assert_eq!(strip_html_tags(html2), "a < b > c");
	344	+
	345	+	let html3 = "<p>"hello" & 'world'</p>";
	346	+	assert_eq!(strip_html_tags(html3), "\"hello\" & 'world'");
	347	+	}
	348	+
	349	+	#[test]
	350	+	fn strip_html_tags_nested_tags() {
	351	+	let html = "<div><p>A <em>nested <strong>deep</strong></em> tag</p></div>";
	352	+	assert_eq!(strip_html_tags(html), "A nested deep tag");
	353	+	}
260	354		}

A src/escape.rs +33

		@@ -0,0 +1,33 @@
1	+	/// HTML-escape a string for safe interpolation into element content or attributes.
2	+	///
3	+	/// Escapes all five HTML-significant characters: `& < > " '`.
4	+	pub(crate) fn html_escape(s: &str) -> String {
5	+	s.replace('&', "&")
6	+	.replace('<', "<")
7	+	.replace('>', ">")
8	+	.replace('"', """)
9	+	.replace('\'', "'")
10	+	}
11	+
12	+	#[cfg(test)]
13	+	mod tests {
14	+	use super::*;
15	+
16	+	#[test]
17	+	fn escapes_all_five_chars() {
18	+	assert_eq!(
19	+	html_escape("A & B < C > D \" E ' F"),
20	+	"A & B < C > D " E ' F"
21	+	);
22	+	}
23	+
24	+	#[test]
25	+	fn no_change_for_safe_string() {
26	+	assert_eq!(html_escape("hello world"), "hello world");
27	+	}
28	+
29	+	#[test]
30	+	fn empty_string() {
31	+	assert_eq!(html_escape(""), "");
32	+	}
33	+	}

M src/frontmatter.rs +4 -1

			@@ -40,7 +40,10 @@ pub fn parse_frontmatter(input: &str) -> (Option<Frontmatter>, &str) {
40	40
41	41		match toml::from_str::<Frontmatter>(toml_content) {
42	42		Ok(fm) => (Some(fm), rest_slice),
43		-	Err(_) => (None, input),
	43	+	Err(e) => {
	44	+	tracing::warn!(error = %e, "Failed to parse TOML frontmatter");
	45	+	(None, input)
	46	+	}
44	47		}
45	48		} else {
46	49		(None, input)

M src/lib.rs +21 -1

			@@ -1,10 +1,24 @@
	1	+	//! Configurable markdown-to-HTML rendering with sanitization presets.
	2	+	//!
	3	+	//! Provides four rendering presets for different trust levels:
	4	+	//! - Permissive -- full GFM (tables, footnotes, images, raw HTML). For trusted content.
	5	+	//! - Standard -- GFM without images. For app text fields.
	6	+	//! - Strict -- no images, no raw HTML, dangerous scheme filtering, nofollow. For UGC.
	7	+	//! - Sanitize-only -- ammonia cleaning without markdown parsing. For external HTML.
	8	+	//!
	9	+	//! Optional features add document loading, TOML frontmatter, @mention resolution,
	10	+	//! and quote attribution post-processing.
	11	+
1	12		#[cfg(any(feature = "mentions", test))]
2	13		mod code_spans;
	14	+	mod escape;
3	15		mod render;
4	16		mod sanitize;
5	17		mod text;
6	18		mod toc;
7	19
	20	+	#[cfg(feature = "directives")]
	21	+	mod directives;
8	22		#[cfg(feature = "doc-loader")]
9	23		mod doc_loader;
10	24		#[cfg(feature = "frontmatter")]
			@@ -13,6 +27,8 @@ mod frontmatter;
13	27		mod mentions;
14	28		#[cfg(feature = "quotes")]
15	29		mod quotes;
	30	+	#[cfg(feature = "media-urls")]
	31	+	mod media_urls;
16	32
17	33		// Re-export core types
18	34		pub use render::{RenderResult, Renderer};
			@@ -21,14 +37,18 @@ pub use text::{extract_title, reading_time_minutes, strip_first_heading, word_co
21	37		pub use toc::{TocEntry, extract_toc, render_toc_html};
22	38
23	39		// Re-export feature-gated types
	40	+	#[cfg(feature = "directives")]
	41	+	pub use directives::post_process_directives;
24	42		#[cfg(feature = "doc-loader")]
25		-	pub use doc_loader::{DocIndexEntry, DocLoader, DocLoaderConfig, DocPage};
	43	+	pub use doc_loader::{DocIndexEntry, DocLoader, DocLoaderConfig, DocPage, DocSearchEntry};
26	44		#[cfg(feature = "frontmatter")]
27	45		pub use frontmatter::{Frontmatter, parse_frontmatter};
28	46		#[cfg(feature = "mentions")]
29	47		pub use mentions::{extract_mentions, resolve_mentions};
30	48		#[cfg(feature = "quotes")]
31	49		pub use quotes::{QuoteAuthor, post_process_quotes};
	50	+	#[cfg(feature = "media-urls")]
	51	+	pub use media_urls::{img_to_video, rewrite_media_paths};
32	52
33	53		/// Render markdown with the permissive preset (GFM features, default ammonia).
34	54		pub fn render_permissive(markdown: &str) -> String {

A src/media_urls.rs +235

		@@ -0,0 +1,235 @@
1	+	//! Pre-process and post-process markdown/HTML for media file references.
2	+	//!
3	+	//! Two-stage pipeline:
4	+	//! 1. Pre-process markdown — rewrite `![alt](folder/file.png)` to
5	+	//! `![alt](https://cdn.makenot.work/{user_id}/media/folder/file.png)`.
6	+	//! 2. Post-process HTML — convert `<img src="...file.mp4">` to
7	+	//! `<video controls src="..."></video>`.
8	+
9	+	use std::sync::LazyLock;
10	+
11	+	/// Matches markdown image syntax: `![alt text](url)`
12	+	/// Captures: group 1 = alt text, group 2 = URL path
13	+	static MD_IMAGE_RE: LazyLock<regex_lite::Regex> = LazyLock::new(\|\| {
14	+	regex_lite::Regex::new(r"!\[([^\]]*)\]$([^)]+)$").expect("valid markdown image regex")
15	+	});
16	+
17	+	/// Matches `<img` tags with a src pointing to a video extension.
18	+	static IMG_VIDEO_RE: LazyLock<regex_lite::Regex> = LazyLock::new(\|\| {
19	+	regex_lite::Regex::new(
20	+	r#"<img\s+([^>]?)src="([^"]\.(?:mp4\|webm\|mov))"([^>]?)\s/?>"#,
21	+	)
22	+	.expect("valid img video regex")
23	+	});
24	+
25	+	/// Matches `alt="..."` in an img tag's attributes.
26	+	static ALT_RE: LazyLock<regex_lite::Regex> = LazyLock::new(\|\| {
27	+	regex_lite::Regex::new(r#"alt="([^"]*)""#).expect("valid alt regex")
28	+	});
29	+
30	+	/// Rewrite relative image paths in markdown to absolute CDN URLs.
31	+	///
32	+	/// Skips:
33	+	/// - Absolute URLs (`http://`, `https://`, `data:`)
34	+	/// - Absolute paths starting with `/`
35	+	/// - Paths containing `..` (path traversal)
36	+	///
37	+	/// Rewrites relative paths to: `{cdn_base}/{user_id}/media/{path}`
38	+	pub fn rewrite_media_paths(markdown: &str, cdn_base: &str, user_id: &str) -> String {
39	+	let cdn_base = cdn_base.trim_end_matches('/');
40	+
41	+	MD_IMAGE_RE
42	+	.replace_all(markdown, \|caps: &regex_lite::Captures\| {
43	+	let alt = &caps[1];
44	+	let path = &caps[2];
45	+
46	+	// Skip absolute URLs and data URIs
47	+	if path.starts_with("http://")
48	+	\|\| path.starts_with("https://")
49	+	\|\| path.starts_with("data:")
50	+	\|\| path.starts_with('/')
51	+	{
52	+	return caps[0].to_string();
53	+	}
54	+
55	+	// Reject path traversal
56	+	if path.contains("..") {
57	+	return caps[0].to_string();
58	+	}
59	+
60	+	format!("![{}]({}/{}/media/{})", alt, cdn_base, user_id, path)
61	+	})
62	+	.into_owned()
63	+	}
64	+
65	+	/// Convert `<img>` tags with video extensions (.mp4, .webm, .mov) to `<video>` elements.
66	+	///
67	+	/// Preserves alt text as fallback content inside the `<video>` tag.
68	+	pub fn img_to_video(html: &str) -> String {
69	+	IMG_VIDEO_RE
70	+	.replace_all(html, \|caps: &regex_lite::Captures\| {
71	+	let before_src = &caps[1];
72	+	let src = &caps[2];
73	+	let after_src = &caps[3];
74	+
75	+	// Extract alt text if present
76	+	let attrs = format!("{}{}", before_src, after_src);
77	+	let alt = ALT_RE
78	+	.captures(&attrs)
79	+	.map(\|c\| c[1].to_string())
80	+	.unwrap_or_default();
81	+
82	+	if alt.is_empty() {
83	+	format!(r#"<video controls src="{}">Your browser does not support video.</video>"#, src)
84	+	} else {
85	+	format!(
86	+	r#"<video controls src="{}">{}</video>"#,
87	+	src,
88	+	crate::escape::html_escape(&alt)
89	+	)
90	+	}
91	+	})
92	+	.into_owned()
93	+	}
94	+
95	+	#[cfg(test)]
96	+	mod tests {
97	+	use super::*;
98	+
99	+	#[test]
100	+	fn relative_path_rewritten() {
101	+	let md = "![Screenshot](screenshots/demo.png)";
102	+	let result = rewrite_media_paths(md, "https://cdn.makenot.work", "user-123");
103	+	assert_eq!(
104	+	result,
105	+	"![Screenshot](https://cdn.makenot.work/user-123/media/screenshots/demo.png)"
106	+	);
107	+	}
108	+
109	+	#[test]
110	+	fn absolute_url_unchanged() {
111	+	let md = "![Logo](https://example.com/logo.png)";
112	+	let result = rewrite_media_paths(md, "https://cdn.makenot.work", "user-123");
113	+	assert_eq!(result, md);
114	+	}
115	+
116	+	#[test]
117	+	fn http_url_unchanged() {
118	+	let md = "![Logo](http://example.com/logo.png)";
119	+	let result = rewrite_media_paths(md, "https://cdn.makenot.work", "user-123");
120	+	assert_eq!(result, md);
121	+	}
122	+
123	+	#[test]
124	+	fn data_uri_unchanged() {
125	+	let md = "![Pixel](data:image/png;base64,abc)";
126	+	let result = rewrite_media_paths(md, "https://cdn.makenot.work", "user-123");
127	+	assert_eq!(result, md);
128	+	}
129	+
130	+	#[test]
131	+	fn absolute_path_unchanged() {
132	+	let md = "![Doc](/static/images/docs/setup.png)";
133	+	let result = rewrite_media_paths(md, "https://cdn.makenot.work", "user-123");
134	+	assert_eq!(result, md);
135	+	}
136	+
137	+	#[test]
138	+	fn path_traversal_unchanged() {
139	+	let md = "![Hack](../../../etc/passwd)";
140	+	let result = rewrite_media_paths(md, "https://cdn.makenot.work", "user-123");
141	+	assert_eq!(result, md);
142	+	}
143	+
144	+	#[test]
145	+	fn root_folder_file() {
146	+	let md = "![Photo](photo.jpg)";
147	+	let result = rewrite_media_paths(md, "https://cdn.makenot.work", "user-123");
148	+	assert_eq!(
149	+	result,
150	+	"![Photo](https://cdn.makenot.work/user-123/media/photo.jpg)"
151	+	);
152	+	}
153	+
154	+	#[test]
155	+	fn cdn_base_trailing_slash_stripped() {
156	+	let md = "![Img](img.png)";
157	+	let result = rewrite_media_paths(md, "https://cdn.makenot.work/", "user-123");
158	+	assert_eq!(
159	+	result,
160	+	"![Img](https://cdn.makenot.work/user-123/media/img.png)"
161	+	);
162	+	}
163	+
164	+	#[test]
165	+	fn video_extension_to_video_tag() {
166	+	let html = r#"<img src="https://cdn.makenot.work/u/media/demo.mp4" alt="Demo">"#;
167	+	let result = img_to_video(html);
168	+	assert!(result.contains("<video controls"));
169	+	assert!(result.contains(r#"src="https://cdn.makenot.work/u/media/demo.mp4""#));
170	+	assert!(result.contains("Demo"));
171	+	assert!(result.contains("</video>"));
172	+	assert!(!result.contains("<img"));
173	+	}
174	+
175	+	#[test]
176	+	fn non_video_image_unchanged() {
177	+	let html = r#"<img src="https://cdn.makenot.work/u/media/photo.png" alt="Photo">"#;
178	+	let result = img_to_video(html);
179	+	assert_eq!(result, html);
180	+	}
181	+
182	+	#[test]
183	+	fn webm_converted() {
184	+	let html = r#"<img src="clip.webm">"#;
185	+	let result = img_to_video(html);
186	+	assert!(result.contains("<video controls"));
187	+	assert!(result.contains("</video>"));
188	+	}
189	+
190	+	#[test]
191	+	fn mov_converted() {
192	+	let html = r#"<img src="clip.mov" alt="Clip">"#;
193	+	let result = img_to_video(html);
194	+	assert!(result.contains("<video controls"));
195	+	assert!(result.contains("Clip"));
196	+	}
197	+
198	+	#[test]
199	+	fn mixed_content() {
200	+	let md = "Text before\n\n![Img](folder/img.png)\n\nMore text\n\n![Vid](folder/vid.mp4)\n\n![External](https://example.com/pic.jpg)";
201	+	let rewritten = rewrite_media_paths(md, "https://cdn.makenot.work", "u1");
202	+	assert!(rewritten.contains("https://cdn.makenot.work/u1/media/folder/img.png"));
203	+	assert!(rewritten.contains("https://cdn.makenot.work/u1/media/folder/vid.mp4"));
204	+	assert!(rewritten.contains("https://example.com/pic.jpg"));
205	+	}
206	+
207	+	#[test]
208	+	fn empty_alt_text() {
209	+	let md = "![](photo.jpg)";
210	+	let result = rewrite_media_paths(md, "https://cdn.makenot.work", "u1");
211	+	assert_eq!(
212	+	result,
213	+	"![](https://cdn.makenot.work/u1/media/photo.jpg)"
214	+	);
215	+	}
216	+
217	+	#[test]
218	+	fn video_tag_no_alt() {
219	+	let html = r#"<img src="demo.mp4">"#;
220	+	let result = img_to_video(html);
221	+	assert!(result.contains("Your browser does not support video."));
222	+	}
223	+
224	+	#[test]
225	+	fn multiple_images_in_html() {
226	+	let html = r#"<img src="a.png" alt="A"><img src="b.mp4" alt="B"><img src="c.webm">"#;
227	+	let result = img_to_video(html);
228	+	// a.png stays as img
229	+	assert!(result.contains(r#"<img src="a.png""#));
230	+	// b.mp4 becomes video
231	+	assert!(result.contains(r#"<video controls src="b.mp4">B</video>"#));
232	+	// c.webm becomes video
233	+	assert!(result.contains(r#"<video controls src="c.webm">"#));
234	+	}
235	+	}

M src/quotes.rs +2 -9

			@@ -1,5 +1,7 @@
1	1		use std::collections::HashMap;
2	2
	3	+	use crate::escape::html_escape;
	4	+
3	5		/// Quote author info for attribution rendering.
4	6		pub struct QuoteAuthor {
5	7		pub username: String,
			@@ -7,15 +9,6 @@ pub struct QuoteAuthor {
7	9		pub is_removed: bool,
8	10		}
9	11
10		-	/// HTML-escape a string for safe interpolation into raw HTML.
11		-	fn html_escape(s: &str) -> String {
12		-	s.replace('&', "&")
13		-	.replace('<', "<")
14		-	.replace('>', ">")
15		-	.replace('"', """)
16		-	.replace('\'', "'")
17		-	}
18		-
19	12		/// Post-process rendered HTML to replace `[quote:POST_ID:HASH]` markers with
20	13		/// clickable author attribution.
21	14		pub fn post_process_quotes(

M src/sanitize.rs +2 -1

			@@ -3,7 +3,8 @@
3	3		pub enum SanitizePreset {
4	4		/// Default ammonia settings. Allows most safe HTML.
5	5		Permissive,
6		-	/// Default ammonia settings. Same as Permissive.
	6	+	/// Default ammonia settings (same sanitization as Permissive; the difference
	7	+	/// is at the Renderer level — Standard strips images, Permissive doesn't).
7	8		Standard,
8	9		/// Adds `rel="noopener noreferrer nofollow"` to all links.
9	10		Strict,

M src/toc.rs +3 -14

			@@ -1,5 +1,7 @@
1	1		use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
2	2
	3	+	use crate::escape::html_escape;
	4	+
3	5		/// A single entry in a table of contents.
4	6		#[derive(Debug, Clone, PartialEq, Eq)]
5	7		pub struct TocEntry {
			@@ -57,7 +59,7 @@ pub fn render_toc_html(entries: &[TocEntry]) -> String {
57	59		html.push_str(&format!(
58	60		"<li class=\"toc-h{}\"><a href=\"#{}\">{}</a></li>\n",
59	61		entry.level,
60		-	html_escape_attr(&entry.anchor),
	62	+	html_escape(&entry.anchor),
61	63		html_escape(&entry.text),
62	64		));
63	65		}
			@@ -75,19 +77,6 @@ fn make_anchor(text: &str) -> String {
75	77		.collect()
76	78		}
77	79
78		-	fn html_escape(s: &str) -> String {
79		-	s.replace('&', "&")
80		-	.replace('<', "<")
81		-	.replace('>', ">")
82		-	}
83		-
84		-	fn html_escape_attr(s: &str) -> String {
85		-	s.replace('&', "&")
86		-	.replace('"', """)
87		-	.replace('<', "<")
88		-	.replace('>', ">")
89		-	}
90		-
91	80		#[cfg(test)]
92	81		mod tests {
93	82		use super::*;