Skip to main content

max / docengine

Initial commit: DocEngine shared markdown library Feature-gated modules: render presets (permissive/standard/strict/sanitize-only), TOC generation, TOML frontmatter, @mentions, quote attribution, doc loader. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Author: Max J. <87768334+MaxJMath@users.noreply.github.com> · 2026-03-22 05:01 UTC
Commit: 73678516b64b7a1cd4705fe9309c74cee9f31861
13 files changed, +2261 insertions, -0 deletions
A .gitignore +2
@@ -0,0 +1,2 @@
1 + /target/
2 + .DS_Store
A Cargo.lock +500
@@ -0,0 +1,1183 @@
1 + # This file is automatically @generated by Cargo.
2 + # It is not intended for manual editing.
3 + version = 4
4 +
5 + [[package]]
6 + name = "aho-corasick"
7 + version = "1.1.4"
8 + source = "registry+https://github.com/rust-lang/crates.io-index"
9 + checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
10 + dependencies = [
11 + "memchr",
12 + ]
13 +
14 + [[package]]
15 + name = "ammonia"
16 + version = "4.1.2"
17 + source = "registry+https://github.com/rust-lang/crates.io-index"
18 + checksum = "17e913097e1a2124b46746c980134e8c954bc17a6a59bb3fde96f088d126dde6"
19 + dependencies = [
20 + "cssparser",
21 + "html5ever",
22 + "maplit",
23 + "tendril",
24 + "url",
25 + ]
26 +
27 + [[package]]
28 + name = "anyhow"
29 + version = "1.0.102"
30 + source = "registry+https://github.com/rust-lang/crates.io-index"
31 + checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c"
32 +
33 + [[package]]
34 + name = "bitflags"
35 + version = "2.11.0"
36 + source = "registry+https://github.com/rust-lang/crates.io-index"
37 + checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af"
38 +
39 + [[package]]
40 + name = "bumpalo"
41 + version = "3.20.2"
42 + source = "registry+https://github.com/rust-lang/crates.io-index"
43 + checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb"
44 +
45 + [[package]]
46 + name = "cfg-if"
47 + version = "1.0.4"
48 + source = "registry+https://github.com/rust-lang/crates.io-index"
49 + checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
50 +
51 + [[package]]
52 + name = "cssparser"
53 + version = "0.35.0"
54 + source = "registry+https://github.com/rust-lang/crates.io-index"
55 + checksum = "4e901edd733a1472f944a45116df3f846f54d37e67e68640ac8bb69689aca2aa"
56 + dependencies = [
57 + "cssparser-macros",
58 + "dtoa-short",
59 + "itoa",
60 + "phf",
61 + "smallvec",
62 + ]
63 +
64 + [[package]]
65 + name = "cssparser-macros"
66 + version = "0.6.1"
67 + source = "registry+https://github.com/rust-lang/crates.io-index"
68 + checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331"
69 + dependencies = [
70 + "quote",
71 + "syn",
72 + ]
73 +
74 + [[package]]
75 + name = "displaydoc"
76 + version = "0.2.5"
77 + source = "registry+https://github.com/rust-lang/crates.io-index"
78 + checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
79 + dependencies = [
80 + "proc-macro2",
81 + "quote",
82 + "syn",
83 + ]
84 +
85 + [[package]]
86 + name = "docengine"
87 + version = "0.3.0"
88 + dependencies = [
89 + "ammonia",
90 + "pulldown-cmark",
91 + "regex",
92 + "regex-lite",
93 + "serde",
94 + "toml",
95 + "uuid",
96 + ]
97 +
98 + [[package]]
99 + name = "dtoa"
100 + version = "1.0.11"
101 + source = "registry+https://github.com/rust-lang/crates.io-index"
102 + checksum = "4c3cf4824e2d5f025c7b531afcb2325364084a16806f6d47fbc1f5fbd9960590"
103 +
104 + [[package]]
105 + name = "dtoa-short"
106 + version = "0.3.5"
107 + source = "registry+https://github.com/rust-lang/crates.io-index"
108 + checksum = "cd1511a7b6a56299bd043a9c167a6d2bfb37bf84a6dfceaba651168adfb43c87"
109 + dependencies = [
110 + "dtoa",
111 + ]
112 +
113 + [[package]]
114 + name = "equivalent"
115 + version = "1.0.2"
116 + source = "registry+https://github.com/rust-lang/crates.io-index"
117 + checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
118 +
119 + [[package]]
120 + name = "foldhash"
121 + version = "0.1.5"
122 + source = "registry+https://github.com/rust-lang/crates.io-index"
123 + checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
124 +
125 + [[package]]
126 + name = "form_urlencoded"
127 + version = "1.2.2"
128 + source = "registry+https://github.com/rust-lang/crates.io-index"
129 + checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf"
130 + dependencies = [
131 + "percent-encoding",
132 + ]
133 +
134 + [[package]]
135 + name = "futf"
136 + version = "0.1.5"
137 + source = "registry+https://github.com/rust-lang/crates.io-index"
138 + checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843"
139 + dependencies = [
140 + "mac",
141 + "new_debug_unreachable",
142 + ]
143 +
144 + [[package]]
145 + name = "getopts"
146 + version = "0.2.24"
147 + source = "registry+https://github.com/rust-lang/crates.io-index"
148 + checksum = "cfe4fbac503b8d1f88e6676011885f34b7174f46e59956bba534ba83abded4df"
149 + dependencies = [
150 + "unicode-width",
151 + ]
152 +
153 + [[package]]
154 + name = "getrandom"
155 + version = "0.4.2"
156 + source = "registry+https://github.com/rust-lang/crates.io-index"
157 + checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555"
158 + dependencies = [
159 + "cfg-if",
160 + "libc",
161 + "r-efi",
162 + "wasip2",
163 + "wasip3",
164 + ]
165 +
166 + [[package]]
167 + name = "hashbrown"
168 + version = "0.15.5"
169 + source = "registry+https://github.com/rust-lang/crates.io-index"
170 + checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
171 + dependencies = [
172 + "foldhash",
173 + ]
174 +
175 + [[package]]
176 + name = "hashbrown"
177 + version = "0.16.1"
178 + source = "registry+https://github.com/rust-lang/crates.io-index"
179 + checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100"
180 +
181 + [[package]]
182 + name = "heck"
183 + version = "0.5.0"
184 + source = "registry+https://github.com/rust-lang/crates.io-index"
185 + checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
186 +
187 + [[package]]
188 + name = "html5ever"
189 + version = "0.35.0"
190 + source = "registry+https://github.com/rust-lang/crates.io-index"
191 + checksum = "55d958c2f74b664487a2035fe1dadb032c48718a03b63f3ab0b8537db8549ed4"
192 + dependencies = [
193 + "log",
194 + "markup5ever",
195 + "match_token",
196 + ]
197 +
198 + [[package]]
199 + name = "icu_collections"
200 + version = "2.1.1"
201 + source = "registry+https://github.com/rust-lang/crates.io-index"
202 + checksum = "4c6b649701667bbe825c3b7e6388cb521c23d88644678e83c0c4d0a621a34b43"
203 + dependencies = [
204 + "displaydoc",
205 + "potential_utf",
206 + "yoke",
207 + "zerofrom",
208 + "zerovec",
209 + ]
210 +
211 + [[package]]
212 + name = "icu_locale_core"
213 + version = "2.1.1"
214 + source = "registry+https://github.com/rust-lang/crates.io-index"
215 + checksum = "edba7861004dd3714265b4db54a3c390e880ab658fec5f7db895fae2046b5bb6"
216 + dependencies = [
217 + "displaydoc",
218 + "litemap",
219 + "tinystr",
220 + "writeable",
221 + "zerovec",
222 + ]
223 +
224 + [[package]]
225 + name = "icu_normalizer"
226 + version = "2.1.1"
227 + source = "registry+https://github.com/rust-lang/crates.io-index"
228 + checksum = "5f6c8828b67bf8908d82127b2054ea1b4427ff0230ee9141c54251934ab1b599"
229 + dependencies = [
230 + "icu_collections",
231 + "icu_normalizer_data",
232 + "icu_properties",
233 + "icu_provider",
234 + "smallvec",
235 + "zerovec",
236 + ]
237 +
238 + [[package]]
239 + name = "icu_normalizer_data"
240 + version = "2.1.1"
241 + source = "registry+https://github.com/rust-lang/crates.io-index"
242 + checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a"
243 +
244 + [[package]]
245 + name = "icu_properties"
246 + version = "2.1.2"
247 + source = "registry+https://github.com/rust-lang/crates.io-index"
248 + checksum = "020bfc02fe870ec3a66d93e677ccca0562506e5872c650f893269e08615d74ec"
249 + dependencies = [
250 + "icu_collections",
251 + "icu_locale_core",
252 + "icu_properties_data",
253 + "icu_provider",
254 + "zerotrie",
255 + "zerovec",
256 + ]
257 +
258 + [[package]]
259 + name = "icu_properties_data"
260 + version = "2.1.2"
261 + source = "registry+https://github.com/rust-lang/crates.io-index"
262 + checksum = "616c294cf8d725c6afcd8f55abc17c56464ef6211f9ed59cccffe534129c77af"
263 +
264 + [[package]]
265 + name = "icu_provider"
266 + version = "2.1.1"
267 + source = "registry+https://github.com/rust-lang/crates.io-index"
268 + checksum = "85962cf0ce02e1e0a629cc34e7ca3e373ce20dda4c4d7294bbd0bf1fdb59e614"
269 + dependencies = [
270 + "displaydoc",
271 + "icu_locale_core",
272 + "writeable",
273 + "yoke",
274 + "zerofrom",
275 + "zerotrie",
276 + "zerovec",
277 + ]
278 +
279 + [[package]]
280 + name = "id-arena"
281 + version = "2.3.0"
282 + source = "registry+https://github.com/rust-lang/crates.io-index"
283 + checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954"
284 +
285 + [[package]]
286 + name = "idna"
287 + version = "1.1.0"
288 + source = "registry+https://github.com/rust-lang/crates.io-index"
289 + checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de"
290 + dependencies = [
291 + "idna_adapter",
292 + "smallvec",
293 + "utf8_iter",
294 + ]
295 +
296 + [[package]]
297 + name = "idna_adapter"
298 + version = "1.2.1"
299 + source = "registry+https://github.com/rust-lang/crates.io-index"
300 + checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344"
301 + dependencies = [
302 + "icu_normalizer",
303 + "icu_properties",
304 + ]
305 +
306 + [[package]]
307 + name = "indexmap"
308 + version = "2.13.0"
309 + source = "registry+https://github.com/rust-lang/crates.io-index"
310 + checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017"
311 + dependencies = [
312 + "equivalent",
313 + "hashbrown 0.16.1",
314 + "serde",
315 + "serde_core",
316 + ]
317 +
318 + [[package]]
319 + name = "itoa"
320 + version = "1.0.18"
321 + source = "registry+https://github.com/rust-lang/crates.io-index"
322 + checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682"
323 +
324 + [[package]]
325 + name = "js-sys"
326 + version = "0.3.91"
327 + source = "registry+https://github.com/rust-lang/crates.io-index"
328 + checksum = "b49715b7073f385ba4bc528e5747d02e66cb39c6146efb66b781f131f0fb399c"
329 + dependencies = [
330 + "once_cell",
331 + "wasm-bindgen",
332 + ]
333 +
334 + [[package]]
335 + name = "leb128fmt"
336 + version = "0.1.0"
337 + source = "registry+https://github.com/rust-lang/crates.io-index"
338 + checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2"
339 +
340 + [[package]]
341 + name = "libc"
342 + version = "0.2.183"
343 + source = "registry+https://github.com/rust-lang/crates.io-index"
344 + checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d"
345 +
346 + [[package]]
347 + name = "litemap"
348 + version = "0.8.1"
349 + source = "registry+https://github.com/rust-lang/crates.io-index"
350 + checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77"
351 +
352 + [[package]]
353 + name = "lock_api"
354 + version = "0.4.14"
355 + source = "registry+https://github.com/rust-lang/crates.io-index"
356 + checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965"
357 + dependencies = [
358 + "scopeguard",
359 + ]
360 +
361 + [[package]]
362 + name = "log"
363 + version = "0.4.29"
364 + source = "registry+https://github.com/rust-lang/crates.io-index"
365 + checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897"
366 +
367 + [[package]]
368 + name = "mac"
369 + version = "0.1.1"
370 + source = "registry+https://github.com/rust-lang/crates.io-index"
371 + checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
372 +
373 + [[package]]
374 + name = "maplit"
375 + version = "1.0.2"
376 + source = "registry+https://github.com/rust-lang/crates.io-index"
377 + checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d"
378 +
379 + [[package]]
380 + name = "markup5ever"
381 + version = "0.35.0"
382 + source = "registry+https://github.com/rust-lang/crates.io-index"
383 + checksum = "311fe69c934650f8f19652b3946075f0fc41ad8757dbb68f1ca14e7900ecc1c3"
384 + dependencies = [
385 + "log",
386 + "tendril",
387 + "web_atoms",
388 + ]
389 +
390 + [[package]]
391 + name = "match_token"
392 + version = "0.35.0"
393 + source = "registry+https://github.com/rust-lang/crates.io-index"
394 + checksum = "ac84fd3f360fcc43dc5f5d186f02a94192761a080e8bc58621ad4d12296a58cf"
395 + dependencies = [
396 + "proc-macro2",
397 + "quote",
398 + "syn",
399 + ]
400 +
401 + [[package]]
402 + name = "memchr"
403 + version = "2.8.0"
404 + source = "registry+https://github.com/rust-lang/crates.io-index"
405 + checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
406 +
407 + [[package]]
408 + name = "new_debug_unreachable"
409 + version = "1.0.6"
410 + source = "registry+https://github.com/rust-lang/crates.io-index"
411 + checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086"
412 +
413 + [[package]]
414 + name = "once_cell"
415 + version = "1.21.4"
416 + source = "registry+https://github.com/rust-lang/crates.io-index"
417 + checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50"
418 +
419 + [[package]]
420 + name = "parking_lot"
421 + version = "0.12.5"
422 + source = "registry+https://github.com/rust-lang/crates.io-index"
423 + checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a"
424 + dependencies = [
425 + "lock_api",
426 + "parking_lot_core",
427 + ]
428 +
429 + [[package]]
430 + name = "parking_lot_core"
431 + version = "0.9.12"
432 + source = "registry+https://github.com/rust-lang/crates.io-index"
433 + checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1"
434 + dependencies = [
435 + "cfg-if",
436 + "libc",
437 + "redox_syscall",
438 + "smallvec",
439 + "windows-link",
440 + ]
441 +
442 + [[package]]
443 + name = "percent-encoding"
444 + version = "2.3.2"
445 + source = "registry+https://github.com/rust-lang/crates.io-index"
446 + checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220"
447 +
448 + [[package]]
449 + name = "phf"
450 + version = "0.11.3"
451 + source = "registry+https://github.com/rust-lang/crates.io-index"
452 + checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078"
453 + dependencies = [
454 + "phf_macros",
455 + "phf_shared",
456 + ]
457 +
458 + [[package]]
459 + name = "phf_codegen"
460 + version = "0.11.3"
461 + source = "registry+https://github.com/rust-lang/crates.io-index"
462 + checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a"
463 + dependencies = [
464 + "phf_generator",
465 + "phf_shared",
466 + ]
467 +
468 + [[package]]
469 + name = "phf_generator"
470 + version = "0.11.3"
471 + source = "registry+https://github.com/rust-lang/crates.io-index"
472 + checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d"
473 + dependencies = [
474 + "phf_shared",
475 + "rand",
476 + ]
477 +
478 + [[package]]
479 + name = "phf_macros"
480 + version = "0.11.3"
481 + source = "registry+https://github.com/rust-lang/crates.io-index"
482 + checksum = "f84ac04429c13a7ff43785d75ad27569f2951ce0ffd30a3321230db2fc727216"
483 + dependencies = [
484 + "phf_generator",
485 + "phf_shared",
486 + "proc-macro2",
487 + "quote",
488 + "syn",
489 + ]
490 +
491 + [[package]]
492 + name = "phf_shared"
493 + version = "0.11.3"
494 + source = "registry+https://github.com/rust-lang/crates.io-index"
495 + checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5"
496 + dependencies = [
497 + "siphasher",
498 + ]
499 +
500 + [[package]]
Lines truncated
A Cargo.toml +22
@@ -0,0 +1,22 @@
1 + [package]
2 + name = "docengine"
3 + version = "0.3.0"
4 + edition = "2021"
5 +
6 + [features]
7 + default = []
8 + doc-loader = ["dep:regex"]
9 + mentions = ["dep:regex-lite"]
10 + quotes = ["dep:regex-lite", "dep:uuid"]
11 + frontmatter = ["dep:toml"]
12 + full = ["doc-loader", "mentions", "quotes", "frontmatter"]
13 +
14 + [dependencies]
15 + pulldown-cmark = "0.12"
16 + ammonia = "4"
17 + serde = { version = "1", features = ["derive"] }
18 +
19 + regex = { version = "1", optional = true }
20 + regex-lite = { version = "0.1", optional = true }
21 + uuid = { version = "1", features = ["serde", "v4"], optional = true }
22 + toml = { version = "0.8", optional = true }
@@ -0,0 +1,132 @@
1 + /// Strip inline code (backtick) and fenced code blocks, replacing with spaces.
2 + pub fn strip_code_spans(input: &str) -> String {
3 + let mut out = String::with_capacity(input.len());
4 + let mut chars = input.chars().peekable();
5 +
6 + while let Some(ch) = chars.next() {
7 + if ch == '`' {
8 + let mut tick_count = 1;
9 + while chars.peek() == Some(&'`') {
10 + tick_count += 1;
11 + chars.next();
12 + }
13 + let mut skipped = 0;
14 + while let Some(c) = chars.next() {
15 + skipped += 1;
16 + if c == '`' {
17 + let mut close_count = 1;
18 + while chars.peek() == Some(&'`') {
19 + close_count += 1;
20 + chars.next();
21 + }
22 + if close_count == tick_count {
23 + break;
24 + }
25 + }
26 + }
27 + let total = tick_count * 2 + skipped;
28 + for _ in 0..total {
29 + out.push(' ');
30 + }
31 + } else {
32 + out.push(ch);
33 + }
34 + }
35 + out
36 + }
37 +
38 + /// Return byte ranges of inline code spans and fenced code blocks.
39 + pub fn code_span_ranges(input: &str) -> Vec<(usize, usize)> {
40 + let mut ranges = Vec::new();
41 + let bytes = input.as_bytes();
42 + let len = bytes.len();
43 + let mut i = 0;
44 +
45 + while i < len {
46 + if bytes[i] == b'`' {
47 + let start = i;
48 + let mut tick_count = 0;
49 + while i < len && bytes[i] == b'`' {
50 + tick_count += 1;
51 + i += 1;
52 + }
53 + let mut found = false;
54 + while i < len {
55 + if bytes[i] == b'`' {
56 + let mut close_count = 0;
57 + while i < len && bytes[i] == b'`' {
58 + close_count += 1;
59 + i += 1;
60 + }
61 + if close_count == tick_count {
62 + ranges.push((start, i));
63 + found = true;
64 + break;
65 + }
66 + } else {
67 + i += 1;
68 + }
69 + }
70 + if !found {
71 + ranges.push((start, len));
72 + }
73 + } else {
74 + i += 1;
75 + }
76 + }
77 + ranges
78 + }
79 +
80 + #[cfg(test)]
81 + mod tests {
82 + use super::*;
83 +
84 + #[test]
85 + fn strip_inline_code() {
86 + let result = strip_code_spans("hello `code` world");
87 + assert!(!result.contains("code"));
88 + assert!(result.contains("hello"));
89 + assert!(result.contains("world"));
90 + }
91 +
92 + #[test]
93 + fn strip_fenced_code() {
94 + let result = strip_code_spans("text\n```\ncode block\n```\nmore");
95 + assert!(!result.contains("code block"));
96 + assert!(result.contains("text"));
97 + assert!(result.contains("more"));
98 + }
99 +
100 + #[test]
101 + fn ranges_inline_code() {
102 + let input = "hello `code` world";
103 + let ranges = code_span_ranges(input);
104 + assert_eq!(ranges.len(), 1);
105 + let (start, end) = ranges[0];
106 + assert_eq!(&input[start..end], "`code`");
107 + }
108 +
109 + #[test]
110 + fn ranges_fenced_code() {
111 + let input = "text\n```\ncode\n```\nmore";
112 + let ranges = code_span_ranges(input);
113 + assert_eq!(ranges.len(), 1);
114 + let (start, end) = ranges[0];
115 + assert!(input[start..end].starts_with("```"));
116 + assert!(input[start..end].ends_with("```"));
117 + }
118 +
119 + #[test]
120 + fn ranges_unclosed_backtick() {
121 + let input = "hello `unclosed";
122 + let ranges = code_span_ranges(input);
123 + assert_eq!(ranges.len(), 1);
124 + assert_eq!(ranges[0], (6, input.len()));
125 + }
126 +
127 + #[test]
128 + fn no_code_spans() {
129 + assert!(code_span_ranges("no code here").is_empty());
130 + assert_eq!(strip_code_spans("no code here"), "no code here");
131 + }
132 + }
@@ -0,0 +1,260 @@
1 + use std::collections::HashMap;
2 + use std::path::Path;
3 + use std::sync::LazyLock;
4 +
5 + use regex::Regex;
6 +
7 + static LINK_RE: LazyLock<Regex> = LazyLock::new(|| {
8 + Regex::new(r"\[([^\]]+)\]\(([^)]+)\)").expect("valid regex")
9 + });
10 +
11 + /// Configuration for the doc loader.
12 + pub struct DocLoaderConfig {
13 + /// Sections as `(directory_name, display_name)` pairs in display order.
14 + pub sections: Vec<(String, String)>,
15 + /// URL prefix for rewritten links (e.g., "/docs").
16 + pub link_prefix: String,
17 + /// Pattern that identifies unpublished links to strip (e.g., "unpublished/").
18 + pub unpublished_pattern: Option<String>,
19 + }
20 +
21 + /// A rendered documentation page.
22 + #[derive(Clone, Debug)]
23 + pub struct DocPage {
24 + pub title: String,
25 + pub slug: String,
26 + pub section: String,
27 + pub html_content: String,
28 + }
29 +
30 + /// Ordered entry for the docs index page.
31 + #[derive(Clone, Debug)]
32 + pub struct DocIndexEntry {
33 + pub title: String,
34 + pub slug: String,
35 + pub section: String,
36 + }
37 +
38 + /// In-memory store of rendered documentation pages, built once at startup.
39 + #[derive(Clone, Debug)]
40 + pub struct DocLoader {
41 + pages: HashMap<String, DocPage>,
42 + index: Vec<DocIndexEntry>,
43 + }
44 +
45 + impl DocLoader {
46 + /// Load all `.md` files from `base_path`, rendering them into HTML.
47 + ///
48 + /// Expects subdirectories matching the configured sections.
49 + pub fn load(base_path: &Path, config: &DocLoaderConfig) -> Self {
50 + let mut pages = HashMap::new();
51 + let mut index = Vec::new();
52 +
53 + for (dir_name, section_display) in &config.sections {
54 + let section_path = base_path.join(dir_name);
55 + if !section_path.is_dir() {
56 + continue;
57 + }
58 +
59 + let mut entries: Vec<_> = std::fs::read_dir(&section_path)
60 + .into_iter()
61 + .flatten()
62 + .filter_map(|e| e.ok())
63 + .filter(|e| {
64 + e.path()
65 + .extension()
66 + .map(|ext| ext == "md")
67 + .unwrap_or(false)
68 + })
69 + .collect();
70 +
71 + entries.sort_by_key(|e| e.file_name());
72 +
73 + for entry in entries {
74 + let path = entry.path();
75 + let slug = path
76 + .file_stem()
77 + .and_then(|s| s.to_str())
78 + .unwrap_or_default()
79 + .to_string();
80 +
81 + let raw_md = match std::fs::read_to_string(&path) {
82 + Ok(content) => content,
83 + Err(_) => continue,
84 + };
85 +
86 + let title =
87 + crate::text::extract_title(&raw_md).unwrap_or_else(|| slug.clone());
88 + let rewritten_md = rewrite_links(
89 + &raw_md,
90 + &config.link_prefix,
91 + config.unpublished_pattern.as_deref(),
92 + );
93 + let md_without_title = crate::text::strip_first_heading(&rewritten_md);
94 + let html_content = crate::render_permissive(&md_without_title);
95 +
96 + let page = DocPage {
97 + title: title.clone(),
98 + slug: slug.clone(),
99 + section: section_display.clone(),
100 + html_content,
101 + };
102 +
103 + index.push(DocIndexEntry {
104 + title: title.clone(),
105 + slug: slug.clone(),
106 + section: section_display.clone(),
107 + });
108 +
109 + pages.insert(slug, page);
110 + }
111 + }
112 +
113 + DocLoader { pages, index }
114 + }
115 +
116 + /// Look up a rendered page by slug.
117 + pub fn get(&self, slug: &str) -> Option<&DocPage> {
118 + self.pages.get(slug)
119 + }
120 +
121 + /// Get the full ordered index.
122 + pub fn index(&self) -> &[DocIndexEntry] {
123 + &self.index
124 + }
125 + }
126 +
127 + /// Rewrite relative `.md` links to the configured prefix.
128 + fn rewrite_links(markdown: &str, link_prefix: &str, unpublished_pattern: Option<&str>) -> String {
129 + LINK_RE
130 + .replace_all(markdown, |caps: &regex::Captures| {
131 + let text = &caps[1];
132 + let url = &caps[2];
133 +
134 + // Preserve absolute URLs, mailto, and internal routes.
135 + if url.starts_with("http://")
136 + || url.starts_with("https://")
137 + || url.starts_with("mailto:")
138 + || url.starts_with('/')
139 + {
140 + return caps[0].to_string();
141 + }
142 +
143 + // Unpublished docs: strip link, keep text.
144 + if let Some(pattern) = unpublished_pattern {
145 + if url.contains(pattern) {
146 + return text.to_string();
147 + }
148 + }
149 +
150 + // Only rewrite links containing .md
151 + if !url.contains(".md") {
152 + return caps[0].to_string();
153 + }
154 +
155 + // Split off any #anchor.
156 + let (path_part, anchor): (&str, Option<&str>) = match url.split_once('#') {
157 + Some((p, a)) => (p, Some(a)),
158 + None => (url, None),
159 + };
160 +
161 + // Extract slug from filename: ../support/faq.md -> faq
162 + let filename = path_part
163 + .rsplit('/')
164 + .next()
165 + .unwrap_or(path_part)
166 + .trim_end_matches(".md");
167 +
168 + let mut new_url = format!("{link_prefix}/{filename}");
169 + if let Some(anchor) = anchor {
170 + new_url.push('#');
171 + new_url.push_str(anchor);
172 + }
173 +
174 + format!("[{text}]({new_url})")
175 + })
176 + .to_string()
177 + }
178 +
179 + #[cfg(test)]
180 + mod tests {
181 + use super::*;
182 +
183 + #[test]
184 + fn rewrite_same_section_link() {
185 + let md = "See [SLA](./guarantees.md) for details.";
186 + let result = rewrite_links(md, "/docs", Some("unpublished/"));
187 + assert_eq!(result, "See [SLA](/docs/guarantees) for details.");
188 + }
189 +
190 + #[test]
191 + fn rewrite_cross_section_link() {
192 + let md = "Check [FAQ](../support/faq.md) for more.";
193 + let result = rewrite_links(md, "/docs", Some("unpublished/"));
194 + assert_eq!(result, "Check [FAQ](/docs/faq) for more.");
195 + }
196 +
197 + #[test]
198 + fn rewrite_unpublished_link_becomes_plain_text() {
199 + let md = "See [Content Moderation](../../unpublished/legal/moderation.md) for details.";
200 + let result = rewrite_links(md, "/docs", Some("unpublished/"));
201 + assert_eq!(result, "See Content Moderation for details.");
202 + }
203 +
204 + #[test]
205 + fn rewrite_preserves_absolute_urls() {
206 + let md = "Visit [our site](https://example.com) today.";
207 + let result = rewrite_links(md, "/docs", Some("unpublished/"));
208 + assert_eq!(result, md);
209 + }
210 +
211 + #[test]
212 + fn rewrite_preserves_mailto() {
213 + let md = "Email [us](mailto:test@example.com)";
214 + let result = rewrite_links(md, "/docs", Some("unpublished/"));
215 + assert_eq!(result, md);
216 + }
217 +
218 + #[test]
219 + fn rewrite_preserves_internal_routes() {
220 + let md = "Go to [pricing](/pricing) page.";
221 + let result = rewrite_links(md, "/docs", Some("unpublished/"));
222 + assert_eq!(result, md);
223 + }
224 +
225 + #[test]
226 + fn rewrite_link_with_anchor() {
227 + let md = "See [section](./faq.md#billing).";
228 + let result = rewrite_links(md, "/docs", Some("unpublished/"));
229 + assert_eq!(result, "See [section](/docs/faq#billing).");
230 + }
231 +
232 + #[test]
233 + fn rewrite_public_cross_ref() {
234 + let md = "See [Acceptable Use](../../public/legal/acceptable-use.md).";
235 + let result = rewrite_links(md, "/docs", Some("unpublished/"));
236 + assert_eq!(result, "See [Acceptable Use](/docs/acceptable-use).");
237 + }
238 +
239 + #[test]
240 + fn rewrite_custom_prefix() {
241 + let md = "See [FAQ](./faq.md) here.";
242 + let result = rewrite_links(md, "/help", None);
243 + assert_eq!(result, "See [FAQ](/help/faq) here.");
244 + }
245 +
246 + #[test]
247 + fn rewrite_no_unpublished_pattern() {
248 + let md = "See [doc](../../unpublished/foo.md).";
249 + let result = rewrite_links(md, "/docs", None);
250 + // Without the pattern, it just rewrites normally
251 + assert_eq!(result, "See [doc](/docs/foo).");
252 + }
253 +
254 + #[test]
255 + fn rewrite_non_md_link_preserved() {
256 + let md = "See [image](./photo.png) here.";
257 + let result = rewrite_links(md, "/docs", None);
258 + assert_eq!(result, md);
259 + }
260 + }
@@ -0,0 +1,130 @@
1 + use std::collections::HashMap;
2 +
3 + use serde::Deserialize;
4 +
5 + /// Parsed TOML frontmatter from a markdown document.
6 + #[derive(Debug, Clone, Default, Deserialize)]
7 + pub struct Frontmatter {
8 + pub title: Option<String>,
9 + pub date: Option<String>,
10 + pub tags: Option<Vec<String>>,
11 + pub section: Option<String>,
12 + pub draft: Option<bool>,
13 + #[serde(flatten)]
14 + pub extra: HashMap<String, toml::Value>,
15 + }
16 +
17 + /// Parse TOML frontmatter delimited by `+++` from the beginning of a document.
18 + ///
19 + /// Returns the parsed frontmatter (if present) and the remaining markdown
20 + /// content.
21 + pub fn parse_frontmatter(input: &str) -> (Option<Frontmatter>, &str) {
22 + let trimmed = input.trim_start();
23 + if !trimmed.starts_with("+++") {
24 + return (None, input);
25 + }
26 +
27 + // Find the closing +++
28 + let after_opening = &trimmed[3..];
29 + let after_opening = after_opening.strip_prefix('\n').unwrap_or(after_opening);
30 +
31 + if let Some(end_pos) = after_opening.find("\n+++") {
32 + let toml_content = &after_opening[..end_pos];
33 + let rest_start = end_pos + 4; // skip \n+++
34 + let rest = &after_opening[rest_start..];
35 + let rest = rest.strip_prefix('\n').unwrap_or(rest);
36 +
37 + // Calculate the actual offset into the original input
38 + let rest_offset = input.len() - rest.len();
39 + let rest_slice = &input[rest_offset..];
40 +
41 + match toml::from_str::<Frontmatter>(toml_content) {
42 + Ok(fm) => (Some(fm), rest_slice),
43 + Err(_) => (None, input),
44 + }
45 + } else {
46 + (None, input)
47 + }
48 + }
49 +
50 + #[cfg(test)]
51 + mod tests {
52 + use super::*;
53 +
54 + #[test]
55 + fn parse_basic_frontmatter() {
56 + let input = "+++\ntitle = \"Hello\"\ndate = \"2026-01-01\"\n+++\n\n# Body";
57 + let (fm, rest) = parse_frontmatter(input);
58 + let fm = fm.unwrap();
59 + assert_eq!(fm.title.as_deref(), Some("Hello"));
60 + assert_eq!(fm.date.as_deref(), Some("2026-01-01"));
61 + assert!(rest.contains("# Body"));
62 + }
63 +
64 + #[test]
65 + fn parse_with_tags() {
66 + let input = "+++\ntitle = \"Post\"\ntags = [\"rust\", \"web\"]\n+++\nContent";
67 + let (fm, _rest) = parse_frontmatter(input);
68 + let fm = fm.unwrap();
69 + assert_eq!(fm.tags.as_deref(), Some(&["rust".to_string(), "web".to_string()][..]));
70 + }
71 +
72 + #[test]
73 + fn parse_with_draft() {
74 + let input = "+++\ndraft = true\n+++\nContent";
75 + let (fm, _rest) = parse_frontmatter(input);
76 + assert_eq!(fm.unwrap().draft, Some(true));
77 + }
78 +
79 + #[test]
80 + fn parse_with_extra_fields() {
81 + let input = "+++\ntitle = \"Test\"\ncustom_key = \"custom_value\"\n+++\nBody";
82 + let (fm, _) = parse_frontmatter(input);
83 + let fm = fm.unwrap();
84 + assert_eq!(fm.title.as_deref(), Some("Test"));
85 + assert_eq!(
86 + fm.extra.get("custom_key").and_then(|v| v.as_str()),
87 + Some("custom_value")
88 + );
89 + }
90 +
91 + #[test]
92 + fn no_frontmatter() {
93 + let input = "# Just Markdown\n\nBody text";
94 + let (fm, rest) = parse_frontmatter(input);
95 + assert!(fm.is_none());
96 + assert_eq!(rest, input);
97 + }
98 +
99 + #[test]
100 + fn unclosed_frontmatter() {
101 + let input = "+++\ntitle = \"Oops\"\nNo closing delimiter";
102 + let (fm, rest) = parse_frontmatter(input);
103 + assert!(fm.is_none());
104 + assert_eq!(rest, input);
105 + }
106 +
107 + #[test]
108 + fn invalid_toml_returns_none() {
109 + let input = "+++\nnot valid toml {{{\n+++\nBody";
110 + let (fm, rest) = parse_frontmatter(input);
111 + assert!(fm.is_none());
112 + assert_eq!(rest, input);
113 + }
114 +
115 + #[test]
116 + fn empty_frontmatter() {
117 + let input = "+++\n\n+++\nBody";
118 + let (fm, rest) = parse_frontmatter(input);
119 + let fm = fm.unwrap();
120 + assert!(fm.title.is_none());
121 + assert!(rest.contains("Body"));
122 + }
123 +
124 + #[test]
125 + fn frontmatter_with_section() {
126 + let input = "+++\nsection = \"guide\"\n+++\nContent";
127 + let (fm, _) = parse_frontmatter(input);
128 + assert_eq!(fm.unwrap().section.as_deref(), Some("guide"));
129 + }
130 + }
A src/lib.rs +51
@@ -0,0 +1,51 @@
1 + #[cfg(any(feature = "mentions", test))]
2 + mod code_spans;
3 + mod render;
4 + mod sanitize;
5 + mod text;
6 + mod toc;
7 +
8 + #[cfg(feature = "doc-loader")]
9 + mod doc_loader;
10 + #[cfg(feature = "frontmatter")]
11 + mod frontmatter;
12 + #[cfg(feature = "mentions")]
13 + mod mentions;
14 + #[cfg(feature = "quotes")]
15 + mod quotes;
16 +
17 + // Re-export core types
18 + pub use render::{RenderResult, Renderer};
19 + pub use sanitize::SanitizePreset;
20 + pub use text::{extract_title, reading_time_minutes, strip_first_heading, word_count};
21 + pub use toc::{TocEntry, extract_toc, render_toc_html};
22 +
23 + // Re-export feature-gated types
24 + #[cfg(feature = "doc-loader")]
25 + pub use doc_loader::{DocIndexEntry, DocLoader, DocLoaderConfig, DocPage};
26 + #[cfg(feature = "frontmatter")]
27 + pub use frontmatter::{Frontmatter, parse_frontmatter};
28 + #[cfg(feature = "mentions")]
29 + pub use mentions::{extract_mentions, resolve_mentions};
30 + #[cfg(feature = "quotes")]
31 + pub use quotes::{QuoteAuthor, post_process_quotes};
32 +
33 + /// Render markdown with the permissive preset (GFM features, default ammonia).
34 + pub fn render_permissive(markdown: &str) -> String {
35 + Renderer::permissive().render(markdown)
36 + }
37 +
38 + /// Render markdown with the standard preset (GFM features, no images).
39 + pub fn render_standard(markdown: &str) -> String {
40 + Renderer::standard().render(markdown)
41 + }
42 +
43 + /// Render markdown with the strict preset (no images, no raw HTML, nofollow).
44 + pub fn render_strict(markdown: &str) -> String {
45 + Renderer::strict().render(markdown)
46 + }
47 +
48 + /// Sanitize HTML without markdown parsing.
49 + pub fn sanitize_html(html: &str) -> String {
50 + Renderer::sanitize_only().sanitize_html(html)
51 + }
@@ -0,0 +1,144 @@
1 + use std::collections::HashSet;
2 +
3 + use crate::code_spans::code_span_ranges;
4 +
5 + /// Extract unique `@username` mentions from raw markdown input.
6 + /// Skips mentions inside inline code (backtick-wrapped).
7 + pub fn extract_mentions(input: &str) -> Vec<String> {
8 + static MENTION_RE: std::sync::LazyLock<regex_lite::Regex> =
9 + std::sync::LazyLock::new(|| regex_lite::Regex::new(r"@([A-Za-z0-9_-]+)").unwrap());
10 +
11 + let stripped = crate::code_spans::strip_code_spans(input);
12 + let mut seen = HashSet::new();
13 + let mut result = Vec::new();
14 + for caps in MENTION_RE.captures_iter(&stripped) {
15 + let username = caps[1].to_string();
16 + if seen.insert(username.clone()) {
17 + result.push(username);
18 + }
19 + }
20 + result
21 + }
22 +
23 + /// Replace `@username` with markdown profile links for valid usernames.
24 + ///
25 + /// `url_template` uses `{username}` as placeholder. For example:
26 + /// `/p/my-community/u/{username}` becomes `/p/my-community/u/alice`.
27 + ///
28 + /// Unknown usernames are left as plain text.
29 + pub fn resolve_mentions(
30 + input: &str,
31 + valid_usernames: &HashSet<String>,
32 + url_template: &str,
33 + ) -> String {
34 + static MENTION_RE: std::sync::LazyLock<regex_lite::Regex> =
35 + std::sync::LazyLock::new(|| regex_lite::Regex::new(r"@([A-Za-z0-9_-]+)").unwrap());
36 +
37 + let mut result = String::with_capacity(input.len());
38 + let mut pos = 0;
39 +
40 + for (code_start, code_end) in code_span_ranges(input) {
41 + let before = &input[pos..code_start];
42 + result.push_str(&replace_mentions(before, valid_usernames, url_template, &MENTION_RE));
43 + result.push_str(&input[code_start..code_end]);
44 + pos = code_end;
45 + }
46 + let tail = &input[pos..];
47 + result.push_str(&replace_mentions(tail, valid_usernames, url_template, &MENTION_RE));
48 +
49 + result
50 + }
51 +
52 + fn replace_mentions(
53 + text: &str,
54 + valid_usernames: &HashSet<String>,
55 + url_template: &str,
56 + re: &regex_lite::Regex,
57 + ) -> String {
58 + re.replace_all(text, |caps: &regex_lite::Captures| {
59 + let username = &caps[1];
60 + if valid_usernames.contains(username) {
61 + let url = url_template.replace("{username}", username);
62 + format!("[@{username}]({url})")
63 + } else {
64 + caps[0].to_string()
65 + }
66 + })
67 + .to_string()
68 + }
69 +
70 + #[cfg(test)]
71 + mod tests {
72 + use super::*;
73 +
74 + #[test]
75 + fn extract_basic() {
76 + let usernames = extract_mentions("Hello @alice and @bob!");
77 + assert_eq!(usernames, vec!["alice", "bob"]);
78 + }
79 +
80 + #[test]
81 + fn extract_deduplicates() {
82 + let usernames = extract_mentions("@alice said @alice agrees");
83 + assert_eq!(usernames, vec!["alice"]);
84 + }
85 +
86 + #[test]
87 + fn extract_skips_inline_code() {
88 + let usernames = extract_mentions("Hello `@notreal` and @real");
89 + assert_eq!(usernames, vec!["real"]);
90 + }
91 +
92 + #[test]
93 + fn extract_skips_fenced_code() {
94 + let usernames = extract_mentions("text\n```\n@inside\n```\n@outside");
95 + assert_eq!(usernames, vec!["outside"]);
96 + }
97 +
98 + #[test]
99 + fn extract_empty() {
100 + let usernames = extract_mentions("no mentions here");
101 + assert!(usernames.is_empty());
102 + }
103 +
104 + #[test]
105 + fn extract_with_hyphens_underscores() {
106 + let usernames = extract_mentions("@user-name @user_name");
107 + assert_eq!(usernames, vec!["user-name", "user_name"]);
108 + }
109 +
110 + #[test]
111 + fn resolve_valid_replaced() {
112 + let valid: HashSet<String> = ["alice"].iter().map(|s| s.to_string()).collect();
113 + let result = resolve_mentions("Hello @alice!", &valid, "/p/test-community/u/{username}");
114 + assert_eq!(result, "Hello [@alice](/p/test-community/u/alice)!");
115 + }
116 +
117 + #[test]
118 + fn resolve_unknown_left_alone() {
119 + let valid: HashSet<String> = HashSet::new();
120 + let result = resolve_mentions("Hello @unknown!", &valid, "/u/{username}");
121 + assert_eq!(result, "Hello @unknown!");
122 + }
123 +
124 + #[test]
125 + fn resolve_in_code_not_replaced() {
126 + let valid: HashSet<String> = ["alice"].iter().map(|s| s.to_string()).collect();
127 + let result = resolve_mentions("Use `@alice` in code", &valid, "/u/{username}");
128 + assert_eq!(result, "Use `@alice` in code");
129 + }
130 +
131 + #[test]
132 + fn resolve_mixed_valid_invalid() {
133 + let valid: HashSet<String> = ["alice"].iter().map(|s| s.to_string()).collect();
134 + let result = resolve_mentions("@alice and @unknown", &valid, "/p/slug/u/{username}");
135 + assert_eq!(result, "[@alice](/p/slug/u/alice) and @unknown");
136 + }
137 +
138 + #[test]
139 + fn resolve_custom_url_template() {
140 + let valid: HashSet<String> = ["bob"].iter().map(|s| s.to_string()).collect();
141 + let result = resolve_mentions("Hi @bob", &valid, "/users/{username}/profile");
142 + assert_eq!(result, "Hi [@bob](/users/bob/profile)");
143 + }
144 + }
A src/quotes.rs +130
@@ -0,0 +1,130 @@
1 + use std::collections::HashMap;
2 +
3 + /// Quote author info for attribution rendering.
4 + pub struct QuoteAuthor {
5 + pub username: String,
6 + pub display_name: String,
7 + pub is_removed: bool,
8 + }
9 +
10 + /// HTML-escape a string for safe interpolation into raw HTML.
11 + fn html_escape(s: &str) -> String {
12 + s.replace('&', "&amp;")
13 + .replace('<', "&lt;")
14 + .replace('>', "&gt;")
15 + .replace('"', "&quot;")
16 + .replace('\'', "&#x27;")
17 + }
18 +
19 + /// Post-process rendered HTML to replace `[quote:POST_ID:HASH]` markers with
20 + /// clickable author attribution.
21 + pub fn post_process_quotes(
22 + html: &str,
23 + quote_authors: &HashMap<uuid::Uuid, QuoteAuthor>,
24 + ) -> String {
25 + static QUOTE_RE: std::sync::LazyLock<regex_lite::Regex> = std::sync::LazyLock::new(|| {
26 + regex_lite::Regex::new(r"\[quote:([0-9a-f\-]{36}):([0-9a-f]{8})\]").unwrap()
27 + });
28 + QUOTE_RE
29 + .replace_all(html, |caps: &regex_lite::Captures| {
30 + let post_id_str = &caps[1];
31 + let resolved = uuid::Uuid::parse_str(post_id_str)
32 + .ok()
33 + .and_then(|post_id| quote_authors.get(&post_id));
34 +
35 + if let Some(author) = resolved {
36 + if author.is_removed {
37 + format!(
38 + "<cite class=\"quote-attribution\"><a href=\"#post-{}\">(original post removed)</a></cite>",
39 + post_id_str
40 + )
41 + } else {
42 + format!(
43 + "<cite class=\"quote-attribution\"><a href=\"#post-{}\">— {} (@{})</a></cite>",
44 + post_id_str,
45 + html_escape(&author.display_name),
46 + html_escape(&author.username),
47 + )
48 + }
49 + } else {
50 + caps[0].to_string()
51 + }
52 + })
53 + .to_string()
54 + }
55 +
56 + #[cfg(test)]
57 + mod tests {
58 + use super::*;
59 +
60 + #[test]
61 + fn replaces_quote_marker_with_attribution() {
62 + let post_id = uuid::Uuid::new_v4();
63 + let mut authors = HashMap::new();
64 + authors.insert(
65 + post_id,
66 + QuoteAuthor {
67 + username: "alice".to_string(),
68 + display_name: "Alice Smith".to_string(),
69 + is_removed: false,
70 + },
71 + );
72 + let input = format!("[quote:{}:abcd1234]", post_id);
73 + let result = post_process_quotes(&input, &authors);
74 + assert!(result.contains("Alice Smith"));
75 + assert!(result.contains("@alice"));
76 + assert!(result.contains("quote-attribution"));
77 + }
78 +
79 + #[test]
80 + fn removed_post_shows_removed_text() {
81 + let post_id = uuid::Uuid::new_v4();
82 + let mut authors = HashMap::new();
83 + authors.insert(
84 + post_id,
85 + QuoteAuthor {
86 + username: "bob".to_string(),
87 + display_name: "Bob".to_string(),
88 + is_removed: true,
89 + },
90 + );
91 + let input = format!("[quote:{}:abcd1234]", post_id);
92 + let result = post_process_quotes(&input, &authors);
93 + assert!(result.contains("original post removed"));
94 + assert!(!result.contains("Bob"));
95 + }
96 +
97 + #[test]
98 + fn unknown_post_id_left_unchanged() {
99 + let authors = HashMap::new();
100 + let input = "[quote:00000000-0000-0000-0000-000000000000:abcd1234]";
101 + let result = post_process_quotes(input, &authors);
102 + assert_eq!(result, input);
103 + }
104 +
105 + #[test]
106 + fn non_quote_text_unchanged() {
107 + let authors = HashMap::new();
108 + let input = "<p>Hello world</p>";
109 + let result = post_process_quotes(input, &authors);
110 + assert_eq!(result, input);
111 + }
112 +
113 + #[test]
114 + fn html_escapes_display_name() {
115 + let post_id = uuid::Uuid::new_v4();
116 + let mut authors = HashMap::new();
117 + authors.insert(
118 + post_id,
119 + QuoteAuthor {
120 + username: "user".to_string(),
121 + display_name: "A <B> & C".to_string(),
122 + is_removed: false,
123 + },
124 + );
125 + let input = format!("[quote:{}:abcd1234]", post_id);
126 + let result = post_process_quotes(&input, &authors);
127 + assert!(result.contains("A &lt;B&gt; &amp; C"));
128 + assert!(!result.contains("<B>"));
129 + }
130 + }
A src/render.rs +500
@@ -0,0 +1,511 @@
1 + use pulldown_cmark::{CowStr, Event, Options, Parser, Tag, TagEnd, html};
2 +
3 + use crate::sanitize::SanitizePreset;
4 +
5 + /// Returns true if the URL uses a scheme not in the safe allowlist.
6 + ///
7 + /// Safe schemes: `http`, `https`, `mailto`, `ftp`. Relative URLs (no scheme) are safe.
8 + fn has_dangerous_scheme(url: &str) -> bool {
9 + let trimmed = url.trim();
10 + if let Some(colon_pos) = trimmed.find(':') {
11 + let before_colon = &trimmed[..colon_pos];
12 + if before_colon.contains('/')
13 + || before_colon.contains('#')
14 + || before_colon.contains('?')
15 + {
16 + return false;
17 + }
18 + let scheme = before_colon.to_ascii_lowercase();
19 + !matches!(scheme.as_str(), "http" | "https" | "mailto" | "ftp")
20 + } else {
21 + false
22 + }
23 + }
24 +
25 + /// Result of rendering markdown with metadata.
26 + #[derive(Debug, Clone)]
27 + pub struct RenderResult {
28 + pub html: String,
29 + pub word_count: u32,
30 + pub reading_time_minutes: u32,
31 + }
32 +
33 + /// Configurable markdown renderer with builder pattern.
34 + pub struct Renderer {
35 + tables: bool,
36 + strikethrough: bool,
37 + footnotes: bool,
38 + smart_punctuation: bool,
39 + tasklists: bool,
40 + strip_images: bool,
41 + strip_raw_html: bool,
42 + dangerous_scheme_filter: bool,
43 + sanitize: SanitizePreset,
44 + }
45 +
46 + impl Renderer {
47 + /// GFM features, default ammonia sanitization. Suitable for trusted content
48 + /// like docs and blog posts.
49 + pub fn permissive() -> Self {
50 + Self {
51 + tables: true,
52 + strikethrough: true,
53 + footnotes: true,
54 + smart_punctuation: true,
55 + tasklists: true,
56 + strip_images: false,
57 + strip_raw_html: false,
58 + dangerous_scheme_filter: false,
59 + sanitize: SanitizePreset::Permissive,
60 + }
61 + }
62 +
63 + /// GFM features, no images. Suitable for app text fields (descriptions,
64 + /// notes).
65 + pub fn standard() -> Self {
66 + Self {
67 + tables: true,
68 + strikethrough: true,
69 + footnotes: false,
70 + smart_punctuation: true,
71 + tasklists: true,
72 + strip_images: true,
73 + strip_raw_html: false,
74 + dangerous_scheme_filter: false,
75 + sanitize: SanitizePreset::Standard,
76 + }
77 + }
78 +
79 + /// No images, no raw HTML, dangerous scheme blocking, nofollow on links.
80 + /// Suitable for user-generated content (forum posts).
81 + pub fn strict() -> Self {
82 + Self {
83 + tables: false,
84 + strikethrough: false,
85 + footnotes: false,
86 + smart_punctuation: false,
87 + tasklists: false,
88 + strip_images: true,
89 + strip_raw_html: true,
90 + dangerous_scheme_filter: true,
91 + sanitize: SanitizePreset::Strict,
92 + }
93 + }
94 +
95 + /// No markdown parsing, just ammonia sanitization. Suitable for HTML from
96 + /// external sources (RSS feeds).
97 + pub fn sanitize_only() -> Self {
98 + Self {
99 + tables: false,
100 + strikethrough: false,
101 + footnotes: false,
102 + smart_punctuation: false,
103 + tasklists: false,
104 + strip_images: false,
105 + strip_raw_html: false,
106 + dangerous_scheme_filter: false,
107 + sanitize: SanitizePreset::Permissive,
108 + }
109 + }
110 +
111 + pub fn with_tables(mut self, enabled: bool) -> Self {
112 + self.tables = enabled;
113 + self
114 + }
115 +
116 + pub fn with_strikethrough(mut self, enabled: bool) -> Self {
117 + self.strikethrough = enabled;
118 + self
119 + }
120 +
121 + pub fn with_footnotes(mut self, enabled: bool) -> Self {
122 + self.footnotes = enabled;
123 + self
124 + }
125 +
126 + pub fn with_smart_punctuation(mut self, enabled: bool) -> Self {
127 + self.smart_punctuation = enabled;
128 + self
129 + }
130 +
131 + pub fn with_tasklists(mut self, enabled: bool) -> Self {
132 + self.tasklists = enabled;
133 + self
134 + }
135 +
136 + pub fn with_strip_images(mut self, enabled: bool) -> Self {
137 + self.strip_images = enabled;
138 + self
139 + }
140 +
141 + pub fn with_strip_raw_html(mut self, enabled: bool) -> Self {
142 + self.strip_raw_html = enabled;
143 + self
144 + }
145 +
146 + pub fn with_dangerous_scheme_filter(mut self, enabled: bool) -> Self {
147 + self.dangerous_scheme_filter = enabled;
148 + self
149 + }
150 +
151 + pub fn with_sanitize(mut self, preset: SanitizePreset) -> Self {
152 + self.sanitize = preset;
153 + self
154 + }
155 +
156 + fn build_options(&self) -> Options {
157 + let mut opts = Options::empty();
158 + if self.tables {
159 + opts.insert(Options::ENABLE_TABLES);
160 + }
161 + if self.strikethrough {
162 + opts.insert(Options::ENABLE_STRIKETHROUGH);
163 + }
164 + if self.footnotes {
165 + opts.insert(Options::ENABLE_FOOTNOTES);
166 + }
167 + if self.smart_punctuation {
168 + opts.insert(Options::ENABLE_SMART_PUNCTUATION);
169 + }
170 + if self.tasklists {
171 + opts.insert(Options::ENABLE_TASKLISTS);
172 + }
173 + opts
174 + }
175 +
176 + /// Render markdown to sanitized HTML.
177 + pub fn render(&self, input: &str) -> String {
178 + if input.is_empty() {
179 + return String::new();
180 + }
181 + let html_output = self.render_raw(input);
182 + self.sanitize.clean(&html_output)
183 + }
184 +
185 + /// Render markdown to sanitized HTML with metadata.
186 + pub fn render_with_meta(&self, input: &str) -> RenderResult {
187 + let html = self.render(input);
188 + let wc = crate::text::word_count(input);
189 + RenderResult {
190 + html,
191 + word_count: wc,
192 + reading_time_minutes: crate::text::reading_time_minutes(wc),
193 + }
194 + }
195 +
196 + /// Sanitize pre-rendered HTML without markdown parsing.
197 + pub fn sanitize_html(&self, html: &str) -> String {
198 + self.sanitize.clean(html)
199 + }
200 +
201 + fn render_raw(&self, input: &str) -> String {
202 + let options = self.build_options();
203 + let parser = Parser::new_ext(input, options);
204 +
205 + let strip_images = self.strip_images;
206 + let strip_raw_html = self.strip_raw_html;
207 + let scheme_filter = self.dangerous_scheme_filter;
208 +
209 + let filtered = parser.filter_map(move |event| match event {
210 + // Strip raw HTML events
211 + Event::Html(_) | Event::InlineHtml(_) if strip_raw_html => None,
212 + // Neutralize dangerous schemes on links
213 + Event::Start(Tag::Link {
214 + link_type,
215 + dest_url,
216 + title,
217 + id,
218 + }) if scheme_filter && has_dangerous_scheme(&dest_url) => {
219 + Some(Event::Start(Tag::Link {
220 + link_type,
221 + dest_url: CowStr::Borrowed("#"),
222 + title,
223 + id,
224 + }))
225 + }
226 + // Strip images entirely (alt text passes through as plain text)
227 + Event::Start(Tag::Image { .. }) | Event::End(TagEnd::Image) if strip_images => None,
228 + other => Some(other),
229 + });
230 +
231 + let mut output = String::new();
232 + html::push_html(&mut output, filtered);
233 + output
234 + }
235 + }
236 +
237 + #[cfg(test)]
238 + mod tests {
239 + use super::*;
240 +
241 + // ===== has_dangerous_scheme =====
242 +
243 + #[test]
244 + fn safe_schemes() {
245 + assert!(!has_dangerous_scheme("https://example.com"));
246 + assert!(!has_dangerous_scheme("http://example.com"));
247 + assert!(!has_dangerous_scheme("mailto:user@example.com"));
248 + assert!(!has_dangerous_scheme("ftp://files.example.com"));
249 + }
250 +
251 + #[test]
252 + fn dangerous_schemes() {
253 + assert!(has_dangerous_scheme("javascript:alert(1)"));
254 + assert!(has_dangerous_scheme("data:text/html,<script>"));
255 + assert!(has_dangerous_scheme("vbscript:msgbox"));
256 + }
257 +
258 + #[test]
259 + fn case_insensitive_schemes() {
260 + assert!(has_dangerous_scheme("JaVaScRiPt:alert(1)"));
261 + assert!(has_dangerous_scheme("DATA:text/html,x"));
262 + }
263 +
264 + #[test]
265 + fn relative_urls_are_safe() {
266 + assert!(!has_dangerous_scheme("/about"));
267 + assert!(!has_dangerous_scheme("#heading"));
268 + assert!(!has_dangerous_scheme("page.html"));
269 + assert!(!has_dangerous_scheme("path/to:file"));
270 + }
271 +
272 + // ===== Permissive preset =====
273 +
274 + #[test]
275 + fn permissive_basic_markdown() {
276 + let r = Renderer::permissive();
277 + let html = r.render("# Hello\n\nThis is a **test**.");
278 + assert!(html.contains("<h1>Hello</h1>"));
279 + assert!(html.contains("<strong>test</strong>"));
280 + }
281 +
282 + #[test]
283 + fn permissive_tables() {
284 + let r = Renderer::permissive();
285 + let html = r.render("| A | B |\n|---|---|\n| 1 | 2 |");
286 + assert!(html.contains("<table>"));
287 + assert!(html.contains("<td>"));
288 + }
289 +
290 + #[test]
291 + fn permissive_smart_punctuation() {
292 + let r = Renderer::permissive();
293 + let html = r.render("It's a \"test\"");
294 + assert!(
295 + html.contains('\u{201c}') || html.contains('\u{201d}') || html.contains("\"")
296 + );
297 + }
298 +
299 + #[test]
300 + fn permissive_strips_script() {
301 + let r = Renderer::permissive();
302 + let html = r.render("Hello <script>alert('xss')</script> world");
303 + assert!(!html.contains("<script>"));
304 + }
305 +
306 + #[test]
307 + fn permissive_keeps_images() {
308 + let r = Renderer::permissive();
309 + let html = r.render("![alt](https://example.com/img.png)");
310 + assert!(html.contains("<img"));
311 + }
312 +
313 + #[test]
314 + fn permissive_empty_input() {
315 + assert_eq!(Renderer::permissive().render(""), "");
316 + }
317 +
318 + // ===== Standard preset =====
319 +
320 + #[test]
321 + fn standard_strips_images() {
322 + let r = Renderer::standard();
323 + let html = r.render("![alt text](https://example.com/img.png)");
324 + assert!(!html.contains("<img"));
325 + assert!(html.contains("alt text"));
326 + }
327 +
328 + #[test]
329 + fn standard_keeps_tables() {
330 + let r = Renderer::standard();
331 + let html = r.render("| A |\n|---|\n| 1 |");
332 + assert!(html.contains("<table>"));
333 + }
334 +
335 + // ===== Strict preset =====
336 +
337 + #[test]
338 + fn strict_strips_raw_html() {
339 + let r = Renderer::strict();
340 + let html = r.render("<script>alert('xss')</script>");
341 + assert!(!html.contains("<script>"));
342 + assert!(!html.contains("</script>"));
343 + }
344 +
345 + #[test]
346 + fn strict_strips_inline_html() {
347 + let r = Renderer::strict();
348 + let html = r.render("hello <b>bold</b> world");
349 + assert!(!html.contains("<b>"));
350 + assert!(html.contains("hello"));
351 + assert!(html.contains("world"));
352 + }
353 +
354 + #[test]
355 + fn strict_strips_images() {
356 + let r = Renderer::strict();
357 + let html = r.render("![alt text](https://example.com/img.png)");
358 + assert!(!html.contains("<img"));
359 + assert!(html.contains("alt text"));
360 + }
361 +
362 + #[test]
363 + fn strict_neutralizes_javascript_urls() {
364 + let r = Renderer::strict();
365 + let html = r.render("[click me](javascript:alert(1))");
366 + assert!(html.contains("click me"));
367 + assert!(!html.contains("javascript:"));
368 + assert!(html.contains(r##"href="#""##));
369 + }
370 +
371 + #[test]
372 + fn strict_neutralizes_case_insensitive() {
373 + let r = Renderer::strict();
374 + let html = r.render("[xss](JaVaScRiPt:alert(1))");
375 + assert!(!html.contains("javascript:"));
376 + assert!(!html.contains("JaVaScRiPt:"));
377 + }
378 +
379 + #[test]
380 + fn strict_neutralizes_data_urls() {
381 + let r = Renderer::strict();
382 + let html = r.render("[xss](data:text/html,<script>alert(1)</script>)");
383 + assert!(!html.contains("data:text"));
384 + }
385 +
386 + #[test]
387 + fn strict_neutralizes_vbscript() {
388 + let r = Renderer::strict();
389 + let html = r.render("[xss](vbscript:msgbox)");
390 + assert!(!html.contains("vbscript:"));
391 + }
392 +
393 + #[test]
394 + fn strict_preserves_safe_urls() {
395 + let r = Renderer::strict();
396 + let html = r.render("[link](https://example.com)");
397 + assert!(html.contains(r#"href="https://example.com""#));
398 +
399 + let html = r.render("[mail](mailto:user@example.com)");
400 + assert!(html.contains(r#"href="mailto:user@example.com""#));
401 + }
402 +
403 + #[test]
404 + fn strict_preserves_relative_urls() {
405 + let r = Renderer::strict();
406 + let html = r.render("[page](/about)");
407 + assert!(html.contains(r#"href="/about""#));
408 +
409 + let html = r.render("[section](#heading)");
410 + assert!(html.contains(r##"href="#heading""##));
411 + }
412 +
413 + #[test]
414 + fn strict_links_have_nofollow() {
415 + let r = Renderer::strict();
416 + let html = r.render("[example](https://example.com)");
417 + assert!(result_has_rel(&html, "nofollow"));
418 + assert!(result_has_rel(&html, "noopener"));
419 + }
420 +
421 + #[test]
422 + fn strict_bold_italic() {
423 + let r = Renderer::strict();
424 + let html = r.render("**bold** and *italic*");
425 + assert!(html.contains("<strong>bold</strong>"));
426 + assert!(html.contains("<em>italic</em>"));
427 + }
428 +
429 + #[test]
430 + fn strict_inline_code() {
431 + let r = Renderer::strict();
432 + let html = r.render("use `foo()` here");
433 + assert!(html.contains("<code>foo()</code>"));
434 + }
435 +
436 + #[test]
437 + fn strict_code_block() {
438 + let r = Renderer::strict();
439 + let html = r.render("```\nlet x = 1;\n```");
440 + assert!(html.contains("<pre><code>"));
441 + assert!(html.contains("let x = 1;"));
442 + }
443 +
444 + #[test]
445 + fn strict_blockquote() {
446 + let r = Renderer::strict();
447 + let html = r.render("> quoted text");
448 + assert!(html.contains("<blockquote>"));
449 + assert!(html.contains("quoted text"));
450 + }
451 +
452 + #[test]
453 + fn strict_unordered_list() {
454 + let r = Renderer::strict();
455 + let html = r.render("- item one\n- item two");
456 + assert!(html.contains("<ul>"));
457 + assert!(html.contains("<li>item one</li>"));
458 + }
459 +
460 + #[test]
461 + fn strict_heading() {
462 + let r = Renderer::strict();
463 + let html = r.render("## Section Title");
464 + assert!(html.contains("<h2>Section Title</h2>"));
465 + }
466 +
467 + #[test]
468 + fn strict_plain_text() {
469 + let r = Renderer::strict();
470 + assert_eq!(r.render("hello world"), "<p>hello world</p>\n");
471 + }
472 +
473 + #[test]
474 + fn strict_empty_input() {
475 + assert_eq!(Renderer::strict().render(""), "");
476 + }
477 +
478 + // ===== Sanitize-only preset =====
479 +
480 + #[test]
481 + fn sanitize_only_cleans_html() {
482 + let r = Renderer::sanitize_only();
483 + let html = r.sanitize_html("<p>Hello</p><script>bad</script>");
484 + assert!(html.contains("<p>Hello</p>"));
485 + assert!(!html.contains("<script>"));
486 + }
487 +
488 + // ===== Builder methods =====
489 +
490 + #[test]
491 + fn builder_override() {
492 + let r = Renderer::strict().with_strip_images(false);
493 + let html = r.render("![alt](https://example.com/img.png)");
494 + assert!(html.contains("<img"));
495 + }
496 +
497 + // ===== render_with_meta =====
498 +
499 + #[test]
500 + fn render_with_meta_includes_counts() {
Lines truncated
@@ -0,0 +1,79 @@
1 + /// Sanitization preset for ammonia HTML cleaning.
2 + #[derive(Debug, Clone, Copy, PartialEq, Eq)]
3 + pub enum SanitizePreset {
4 + /// Default ammonia settings. Allows most safe HTML.
5 + Permissive,
6 + /// Default ammonia settings. Same as Permissive.
7 + Standard,
8 + /// Adds `rel="noopener noreferrer nofollow"` to all links.
9 + Strict,
10 + /// Only allows p, em, strong, code, br, pre tags.
11 + Minimal,
12 + }
13 +
14 + impl SanitizePreset {
15 + pub(crate) fn clean(&self, html: &str) -> String {
16 + match self {
17 + SanitizePreset::Permissive | SanitizePreset::Standard => ammonia::clean(html),
18 + SanitizePreset::Strict => ammonia::Builder::default()
19 + .link_rel(Some("noopener noreferrer nofollow"))
20 + .clean(html)
21 + .to_string(),
22 + SanitizePreset::Minimal => {
23 + let tags: std::collections::HashSet<&str> =
24 + ["p", "em", "strong", "code", "br", "pre"]
25 + .iter()
26 + .copied()
27 + .collect();
28 + ammonia::Builder::default()
29 + .tags(tags)
30 + .clean(html)
31 + .to_string()
32 + }
33 + }
34 + }
35 + }
36 +
37 + #[cfg(test)]
38 + mod tests {
39 + use super::*;
40 +
41 + #[test]
42 + fn permissive_allows_safe_html() {
43 + let html = "<p>Hello <strong>world</strong></p>";
44 + let result = SanitizePreset::Permissive.clean(html);
45 + assert!(result.contains("<strong>world</strong>"));
46 + }
47 +
48 + #[test]
49 + fn permissive_strips_script() {
50 + let html = "<p>Hello</p><script>alert(1)</script>";
51 + let result = SanitizePreset::Permissive.clean(html);
52 + assert!(!result.contains("<script>"));
53 + }
54 +
55 + #[test]
56 + fn strict_adds_nofollow() {
57 + let html = r#"<a href="https://example.com">link</a>"#;
58 + let result = SanitizePreset::Strict.clean(html);
59 + assert!(result.contains("nofollow"));
60 + assert!(result.contains("noopener"));
61 + }
62 +
63 + #[test]
64 + fn minimal_strips_extra_tags() {
65 + let html = r#"<p>Hello <a href="x">link</a> <em>italic</em></p>"#;
66 + let result = SanitizePreset::Minimal.clean(html);
67 + assert!(result.contains("<em>italic</em>"));
68 + assert!(!result.contains("<a"));
69 + assert!(result.contains("link"));
70 + }
71 +
72 + #[test]
73 + fn minimal_keeps_code() {
74 + let html = "<pre><code>fn main()</code></pre>";
75 + let result = SanitizePreset::Minimal.clean(html);
76 + assert!(result.contains("<code>"));
77 + assert!(result.contains("<pre>"));
78 + }
79 + }
A src/text.rs +131
@@ -0,0 +1,131 @@
1 + /// Calculate word count from text content.
2 + pub fn word_count(text: &str) -> u32 {
3 + text.split_whitespace().count() as u32
4 + }
5 +
6 + /// Calculate estimated reading time in minutes.
7 + /// Assumes average reading speed of 200 words per minute.
8 + pub fn reading_time_minutes(word_count: u32) -> u32 {
9 + const WORDS_PER_MINUTE: u32 = 200;
10 + word_count.div_ceil(WORDS_PER_MINUTE)
11 + }
12 +
13 + /// Extract the title from the first `# Heading` line in markdown.
14 + pub fn extract_title(markdown: &str) -> Option<String> {
15 + for line in markdown.lines() {
16 + let trimmed = line.trim();
17 + if let Some(title) = trimmed.strip_prefix("# ") {
18 + return Some(title.trim().to_string());
19 + }
20 + if !trimmed.is_empty() && trimmed != "---" {
21 + break;
22 + }
23 + }
24 + None
25 + }
26 +
27 + /// Strip the first `# Heading` line so templates can render it separately.
28 + pub fn strip_first_heading(markdown: &str) -> String {
29 + let mut found = false;
30 + markdown
31 + .lines()
32 + .filter(|line| {
33 + if !found {
34 + let t = line.trim();
35 + if t.starts_with("# ") && !t.starts_with("## ") {
36 + found = true;
37 + return false;
38 + }
39 + }
40 + true
41 + })
42 + .collect::<Vec<_>>()
43 + .join("\n")
44 + }
45 +
46 + #[cfg(test)]
47 + mod tests {
48 + use super::*;
49 +
50 + #[test]
51 + fn word_count_basic() {
52 + assert_eq!(word_count("Hello world"), 2);
53 + assert_eq!(word_count("One two three four five"), 5);
54 + assert_eq!(word_count(""), 0);
55 + }
56 +
57 + #[test]
58 + fn word_count_whitespace() {
59 + assert_eq!(word_count(" spaced out "), 2);
60 + assert_eq!(word_count("\ttabbed\nlines"), 2);
61 + }
62 +
63 + #[test]
64 + fn reading_time_basic() {
65 + assert_eq!(reading_time_minutes(200), 1);
66 + assert_eq!(reading_time_minutes(400), 2);
67 + assert_eq!(reading_time_minutes(250), 2);
68 + assert_eq!(reading_time_minutes(0), 0);
69 + }
70 +
71 + #[test]
72 + fn reading_time_rounds_up() {
73 + assert_eq!(reading_time_minutes(1), 1);
74 + assert_eq!(reading_time_minutes(201), 2);
75 + }
76 +
77 + #[test]
78 + fn extract_title_basic() {
79 + assert_eq!(
80 + extract_title("# Hello World\n\nBody"),
81 + Some("Hello World".to_string())
82 + );
83 + }
84 +
85 + #[test]
86 + fn extract_title_with_leading_blank_lines() {
87 + assert_eq!(extract_title("\n# Title\n"), Some("Title".to_string()));
88 + }
89 +
90 + #[test]
91 + fn extract_title_none_when_missing() {
92 + assert_eq!(extract_title("No heading here"), None);
93 + }
94 +
95 + #[test]
96 + fn extract_title_ignores_h2() {
97 + assert_eq!(extract_title("## Not H1"), None);
98 + }
99 +
100 + #[test]
101 + fn extract_title_skips_horizontal_rules() {
102 + assert_eq!(
103 + extract_title("---\n# After Rule"),
104 + Some("After Rule".to_string())
105 + );
106 + }
107 +
108 + #[test]
109 + fn strip_first_heading_removes_h1() {
110 + let md = "# Title\n\nBody text\n## Subheading";
111 + let stripped = strip_first_heading(md);
112 + assert!(!stripped.contains("# Title"));
113 + assert!(stripped.contains("Body text"));
114 + assert!(stripped.contains("## Subheading"));
115 + }
116 +
117 + #[test]
118 + fn strip_first_heading_only_removes_first() {
119 + let md = "# First\n\n# Second";
120 + let stripped = strip_first_heading(md);
121 + assert!(!stripped.contains("# First"));
122 + assert!(stripped.contains("# Second"));
123 + }
124 +
125 + #[test]
126 + fn strip_first_heading_no_h1() {
127 + let md = "## Only H2\n\nBody";
128 + let stripped = strip_first_heading(md);
129 + assert_eq!(stripped, md);
130 + }
131 + }
A src/toc.rs +180
@@ -0,0 +1,180 @@
1 + use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
2 +
3 + /// A single entry in a table of contents.
4 + #[derive(Debug, Clone, PartialEq, Eq)]
5 + pub struct TocEntry {
6 + pub level: u8,
7 + pub text: String,
8 + pub anchor: String,
9 + }
10 +
11 + /// Extract a table of contents from markdown headings.
12 + pub fn extract_toc(markdown: &str) -> Vec<TocEntry> {
13 + let mut options = Options::empty();
14 + options.insert(Options::ENABLE_TABLES);
15 + options.insert(Options::ENABLE_STRIKETHROUGH);
16 +
17 + let parser = Parser::new_ext(markdown, options);
18 + let mut entries = Vec::new();
19 + let mut in_heading: Option<u8> = None;
20 + let mut heading_text = String::new();
21 +
22 + for event in parser {
23 + match event {
24 + Event::Start(Tag::Heading { level, .. }) => {
25 + in_heading = Some(level as u8);
26 + heading_text.clear();
27 + }
28 + Event::Text(text) if in_heading.is_some() => {
29 + heading_text.push_str(&text);
30 + }
31 + Event::Code(code) if in_heading.is_some() => {
32 + heading_text.push_str(&code);
33 + }
34 + Event::End(TagEnd::Heading(_)) => {
35 + if let Some(level) = in_heading.take() {
36 + let anchor = make_anchor(&heading_text);
37 + entries.push(TocEntry {
38 + level,
39 + text: heading_text.clone(),
40 + anchor,
41 + });
42 + }
43 + }
44 + _ => {}
45 + }
46 + }
47 + entries
48 + }
49 +
50 + /// Render TOC entries as an HTML nested list.
51 + pub fn render_toc_html(entries: &[TocEntry]) -> String {
52 + if entries.is_empty() {
53 + return String::new();
54 + }
55 + let mut html = String::from("<nav class=\"toc\"><ul>\n");
56 + for entry in entries {
57 + html.push_str(&format!(
58 + "<li class=\"toc-h{}\"><a href=\"#{}\">{}</a></li>\n",
59 + entry.level,
60 + html_escape_attr(&entry.anchor),
61 + html_escape(&entry.text),
62 + ));
63 + }
64 + html.push_str("</ul></nav>");
65 + html
66 + }
67 +
68 + /// GitHub-style anchor generation: lowercase, spaces to hyphens, strip
69 + /// non-alphanumeric (except hyphens).
70 + fn make_anchor(text: &str) -> String {
71 + text.to_lowercase()
72 + .chars()
73 + .map(|c| if c == ' ' { '-' } else { c })
74 + .filter(|c| c.is_alphanumeric() || *c == '-')
75 + .collect()
76 + }
77 +
78 + fn html_escape(s: &str) -> String {
79 + s.replace('&', "&amp;")
80 + .replace('<', "&lt;")
81 + .replace('>', "&gt;")
82 + }
83 +
84 + fn html_escape_attr(s: &str) -> String {
85 + s.replace('&', "&amp;")
86 + .replace('"', "&quot;")
87 + .replace('<', "&lt;")
88 + .replace('>', "&gt;")
89 + }
90 +
91 + #[cfg(test)]
92 + mod tests {
93 + use super::*;
94 +
95 + #[test]
96 + fn extract_basic_headings() {
97 + let md = "# Title\n\n## Section One\n\nBody\n\n## Section Two\n\nMore";
98 + let toc = extract_toc(md);
99 + assert_eq!(toc.len(), 3);
100 + assert_eq!(toc[0].level, 1);
101 + assert_eq!(toc[0].text, "Title");
102 + assert_eq!(toc[0].anchor, "title");
103 + assert_eq!(toc[1].level, 2);
104 + assert_eq!(toc[1].text, "Section One");
105 + assert_eq!(toc[1].anchor, "section-one");
106 + assert_eq!(toc[2].level, 2);
107 + assert_eq!(toc[2].text, "Section Two");
108 + }
109 +
110 + #[test]
111 + fn anchor_strips_special_chars() {
112 + assert_eq!(make_anchor("Hello, World!"), "hello-world");
113 + assert_eq!(make_anchor("C++ & Rust"), "c--rust");
114 + assert_eq!(make_anchor("Version 2.0"), "version-20");
115 + }
116 +
117 + #[test]
118 + fn extract_empty() {
119 + let toc = extract_toc("No headings here, just text.");
120 + assert!(toc.is_empty());
121 + }
122 +
123 + #[test]
124 + fn extract_nested_levels() {
125 + let md = "# H1\n## H2\n### H3\n#### H4";
126 + let toc = extract_toc(md);
127 + assert_eq!(toc.len(), 4);
128 + assert_eq!(toc[0].level, 1);
129 + assert_eq!(toc[1].level, 2);
130 + assert_eq!(toc[2].level, 3);
131 + assert_eq!(toc[3].level, 4);
132 + }
133 +
134 + #[test]
135 + fn heading_with_inline_code() {
136 + let md = "## Using `render()` function";
137 + let toc = extract_toc(md);
138 + assert_eq!(toc.len(), 1);
139 + assert_eq!(toc[0].text, "Using render() function");
140 + assert_eq!(toc[0].anchor, "using-render-function");
141 + }
142 +
143 + #[test]
144 + fn render_toc_html_basic() {
145 + let entries = vec![
146 + TocEntry {
147 + level: 1,
148 + text: "Title".to_string(),
149 + anchor: "title".to_string(),
150 + },
151 + TocEntry {
152 + level: 2,
153 + text: "Section".to_string(),
154 + anchor: "section".to_string(),
155 + },
156 + ];
157 + let html = render_toc_html(&entries);
158 + assert!(html.contains("<nav class=\"toc\">"));
159 + assert!(html.contains("toc-h1"));
160 + assert!(html.contains("toc-h2"));
161 + assert!(html.contains(r##"href="#title""##));
162 + assert!(html.contains(r##"href="#section""##));
163 + }
164 +
165 + #[test]
166 + fn render_toc_empty() {
167 + assert_eq!(render_toc_html(&[]), "");
168 + }
169 +
170 + #[test]
171 + fn toc_escapes_html_in_text() {
172 + let entries = vec![TocEntry {
173 + level: 2,
174 + text: "A & B <C>".to_string(),
175 + anchor: "a--b-c".to_string(),
176 + }];
177 + let html = render_toc_html(&entries);
178 + assert!(html.contains("A &amp; B &lt;C&gt;"));
179 + }
180 + }