max / docengine
13 files changed,
+2261 insertions,
-0 deletions
| @@ -0,0 +1,2 @@ | |||
| 1 | + | /target/ | |
| 2 | + | .DS_Store |
| @@ -0,0 +1,1183 @@ | |||
| 1 | + | # This file is automatically @generated by Cargo. | |
| 2 | + | # It is not intended for manual editing. | |
| 3 | + | version = 4 | |
| 4 | + | ||
| 5 | + | [[package]] | |
| 6 | + | name = "aho-corasick" | |
| 7 | + | version = "1.1.4" | |
| 8 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 9 | + | checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" | |
| 10 | + | dependencies = [ | |
| 11 | + | "memchr", | |
| 12 | + | ] | |
| 13 | + | ||
| 14 | + | [[package]] | |
| 15 | + | name = "ammonia" | |
| 16 | + | version = "4.1.2" | |
| 17 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 18 | + | checksum = "17e913097e1a2124b46746c980134e8c954bc17a6a59bb3fde96f088d126dde6" | |
| 19 | + | dependencies = [ | |
| 20 | + | "cssparser", | |
| 21 | + | "html5ever", | |
| 22 | + | "maplit", | |
| 23 | + | "tendril", | |
| 24 | + | "url", | |
| 25 | + | ] | |
| 26 | + | ||
| 27 | + | [[package]] | |
| 28 | + | name = "anyhow" | |
| 29 | + | version = "1.0.102" | |
| 30 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 31 | + | checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" | |
| 32 | + | ||
| 33 | + | [[package]] | |
| 34 | + | name = "bitflags" | |
| 35 | + | version = "2.11.0" | |
| 36 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 37 | + | checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af" | |
| 38 | + | ||
| 39 | + | [[package]] | |
| 40 | + | name = "bumpalo" | |
| 41 | + | version = "3.20.2" | |
| 42 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 43 | + | checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" | |
| 44 | + | ||
| 45 | + | [[package]] | |
| 46 | + | name = "cfg-if" | |
| 47 | + | version = "1.0.4" | |
| 48 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 49 | + | checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" | |
| 50 | + | ||
| 51 | + | [[package]] | |
| 52 | + | name = "cssparser" | |
| 53 | + | version = "0.35.0" | |
| 54 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 55 | + | checksum = "4e901edd733a1472f944a45116df3f846f54d37e67e68640ac8bb69689aca2aa" | |
| 56 | + | dependencies = [ | |
| 57 | + | "cssparser-macros", | |
| 58 | + | "dtoa-short", | |
| 59 | + | "itoa", | |
| 60 | + | "phf", | |
| 61 | + | "smallvec", | |
| 62 | + | ] | |
| 63 | + | ||
| 64 | + | [[package]] | |
| 65 | + | name = "cssparser-macros" | |
| 66 | + | version = "0.6.1" | |
| 67 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 68 | + | checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331" | |
| 69 | + | dependencies = [ | |
| 70 | + | "quote", | |
| 71 | + | "syn", | |
| 72 | + | ] | |
| 73 | + | ||
| 74 | + | [[package]] | |
| 75 | + | name = "displaydoc" | |
| 76 | + | version = "0.2.5" | |
| 77 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 78 | + | checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" | |
| 79 | + | dependencies = [ | |
| 80 | + | "proc-macro2", | |
| 81 | + | "quote", | |
| 82 | + | "syn", | |
| 83 | + | ] | |
| 84 | + | ||
| 85 | + | [[package]] | |
| 86 | + | name = "docengine" | |
| 87 | + | version = "0.3.0" | |
| 88 | + | dependencies = [ | |
| 89 | + | "ammonia", | |
| 90 | + | "pulldown-cmark", | |
| 91 | + | "regex", | |
| 92 | + | "regex-lite", | |
| 93 | + | "serde", | |
| 94 | + | "toml", | |
| 95 | + | "uuid", | |
| 96 | + | ] | |
| 97 | + | ||
| 98 | + | [[package]] | |
| 99 | + | name = "dtoa" | |
| 100 | + | version = "1.0.11" | |
| 101 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 102 | + | checksum = "4c3cf4824e2d5f025c7b531afcb2325364084a16806f6d47fbc1f5fbd9960590" | |
| 103 | + | ||
| 104 | + | [[package]] | |
| 105 | + | name = "dtoa-short" | |
| 106 | + | version = "0.3.5" | |
| 107 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 108 | + | checksum = "cd1511a7b6a56299bd043a9c167a6d2bfb37bf84a6dfceaba651168adfb43c87" | |
| 109 | + | dependencies = [ | |
| 110 | + | "dtoa", | |
| 111 | + | ] | |
| 112 | + | ||
| 113 | + | [[package]] | |
| 114 | + | name = "equivalent" | |
| 115 | + | version = "1.0.2" | |
| 116 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 117 | + | checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" | |
| 118 | + | ||
| 119 | + | [[package]] | |
| 120 | + | name = "foldhash" | |
| 121 | + | version = "0.1.5" | |
| 122 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 123 | + | checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" | |
| 124 | + | ||
| 125 | + | [[package]] | |
| 126 | + | name = "form_urlencoded" | |
| 127 | + | version = "1.2.2" | |
| 128 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 129 | + | checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf" | |
| 130 | + | dependencies = [ | |
| 131 | + | "percent-encoding", | |
| 132 | + | ] | |
| 133 | + | ||
| 134 | + | [[package]] | |
| 135 | + | name = "futf" | |
| 136 | + | version = "0.1.5" | |
| 137 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 138 | + | checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843" | |
| 139 | + | dependencies = [ | |
| 140 | + | "mac", | |
| 141 | + | "new_debug_unreachable", | |
| 142 | + | ] | |
| 143 | + | ||
| 144 | + | [[package]] | |
| 145 | + | name = "getopts" | |
| 146 | + | version = "0.2.24" | |
| 147 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 148 | + | checksum = "cfe4fbac503b8d1f88e6676011885f34b7174f46e59956bba534ba83abded4df" | |
| 149 | + | dependencies = [ | |
| 150 | + | "unicode-width", | |
| 151 | + | ] | |
| 152 | + | ||
| 153 | + | [[package]] | |
| 154 | + | name = "getrandom" | |
| 155 | + | version = "0.4.2" | |
| 156 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 157 | + | checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555" | |
| 158 | + | dependencies = [ | |
| 159 | + | "cfg-if", | |
| 160 | + | "libc", | |
| 161 | + | "r-efi", | |
| 162 | + | "wasip2", | |
| 163 | + | "wasip3", | |
| 164 | + | ] | |
| 165 | + | ||
| 166 | + | [[package]] | |
| 167 | + | name = "hashbrown" | |
| 168 | + | version = "0.15.5" | |
| 169 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 170 | + | checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" | |
| 171 | + | dependencies = [ | |
| 172 | + | "foldhash", | |
| 173 | + | ] | |
| 174 | + | ||
| 175 | + | [[package]] | |
| 176 | + | name = "hashbrown" | |
| 177 | + | version = "0.16.1" | |
| 178 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 179 | + | checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" | |
| 180 | + | ||
| 181 | + | [[package]] | |
| 182 | + | name = "heck" | |
| 183 | + | version = "0.5.0" | |
| 184 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 185 | + | checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" | |
| 186 | + | ||
| 187 | + | [[package]] | |
| 188 | + | name = "html5ever" | |
| 189 | + | version = "0.35.0" | |
| 190 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 191 | + | checksum = "55d958c2f74b664487a2035fe1dadb032c48718a03b63f3ab0b8537db8549ed4" | |
| 192 | + | dependencies = [ | |
| 193 | + | "log", | |
| 194 | + | "markup5ever", | |
| 195 | + | "match_token", | |
| 196 | + | ] | |
| 197 | + | ||
| 198 | + | [[package]] | |
| 199 | + | name = "icu_collections" | |
| 200 | + | version = "2.1.1" | |
| 201 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 202 | + | checksum = "4c6b649701667bbe825c3b7e6388cb521c23d88644678e83c0c4d0a621a34b43" | |
| 203 | + | dependencies = [ | |
| 204 | + | "displaydoc", | |
| 205 | + | "potential_utf", | |
| 206 | + | "yoke", | |
| 207 | + | "zerofrom", | |
| 208 | + | "zerovec", | |
| 209 | + | ] | |
| 210 | + | ||
| 211 | + | [[package]] | |
| 212 | + | name = "icu_locale_core" | |
| 213 | + | version = "2.1.1" | |
| 214 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 215 | + | checksum = "edba7861004dd3714265b4db54a3c390e880ab658fec5f7db895fae2046b5bb6" | |
| 216 | + | dependencies = [ | |
| 217 | + | "displaydoc", | |
| 218 | + | "litemap", | |
| 219 | + | "tinystr", | |
| 220 | + | "writeable", | |
| 221 | + | "zerovec", | |
| 222 | + | ] | |
| 223 | + | ||
| 224 | + | [[package]] | |
| 225 | + | name = "icu_normalizer" | |
| 226 | + | version = "2.1.1" | |
| 227 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 228 | + | checksum = "5f6c8828b67bf8908d82127b2054ea1b4427ff0230ee9141c54251934ab1b599" | |
| 229 | + | dependencies = [ | |
| 230 | + | "icu_collections", | |
| 231 | + | "icu_normalizer_data", | |
| 232 | + | "icu_properties", | |
| 233 | + | "icu_provider", | |
| 234 | + | "smallvec", | |
| 235 | + | "zerovec", | |
| 236 | + | ] | |
| 237 | + | ||
| 238 | + | [[package]] | |
| 239 | + | name = "icu_normalizer_data" | |
| 240 | + | version = "2.1.1" | |
| 241 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 242 | + | checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a" | |
| 243 | + | ||
| 244 | + | [[package]] | |
| 245 | + | name = "icu_properties" | |
| 246 | + | version = "2.1.2" | |
| 247 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 248 | + | checksum = "020bfc02fe870ec3a66d93e677ccca0562506e5872c650f893269e08615d74ec" | |
| 249 | + | dependencies = [ | |
| 250 | + | "icu_collections", | |
| 251 | + | "icu_locale_core", | |
| 252 | + | "icu_properties_data", | |
| 253 | + | "icu_provider", | |
| 254 | + | "zerotrie", | |
| 255 | + | "zerovec", | |
| 256 | + | ] | |
| 257 | + | ||
| 258 | + | [[package]] | |
| 259 | + | name = "icu_properties_data" | |
| 260 | + | version = "2.1.2" | |
| 261 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 262 | + | checksum = "616c294cf8d725c6afcd8f55abc17c56464ef6211f9ed59cccffe534129c77af" | |
| 263 | + | ||
| 264 | + | [[package]] | |
| 265 | + | name = "icu_provider" | |
| 266 | + | version = "2.1.1" | |
| 267 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 268 | + | checksum = "85962cf0ce02e1e0a629cc34e7ca3e373ce20dda4c4d7294bbd0bf1fdb59e614" | |
| 269 | + | dependencies = [ | |
| 270 | + | "displaydoc", | |
| 271 | + | "icu_locale_core", | |
| 272 | + | "writeable", | |
| 273 | + | "yoke", | |
| 274 | + | "zerofrom", | |
| 275 | + | "zerotrie", | |
| 276 | + | "zerovec", | |
| 277 | + | ] | |
| 278 | + | ||
| 279 | + | [[package]] | |
| 280 | + | name = "id-arena" | |
| 281 | + | version = "2.3.0" | |
| 282 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 283 | + | checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" | |
| 284 | + | ||
| 285 | + | [[package]] | |
| 286 | + | name = "idna" | |
| 287 | + | version = "1.1.0" | |
| 288 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 289 | + | checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de" | |
| 290 | + | dependencies = [ | |
| 291 | + | "idna_adapter", | |
| 292 | + | "smallvec", | |
| 293 | + | "utf8_iter", | |
| 294 | + | ] | |
| 295 | + | ||
| 296 | + | [[package]] | |
| 297 | + | name = "idna_adapter" | |
| 298 | + | version = "1.2.1" | |
| 299 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 300 | + | checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344" | |
| 301 | + | dependencies = [ | |
| 302 | + | "icu_normalizer", | |
| 303 | + | "icu_properties", | |
| 304 | + | ] | |
| 305 | + | ||
| 306 | + | [[package]] | |
| 307 | + | name = "indexmap" | |
| 308 | + | version = "2.13.0" | |
| 309 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 310 | + | checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" | |
| 311 | + | dependencies = [ | |
| 312 | + | "equivalent", | |
| 313 | + | "hashbrown 0.16.1", | |
| 314 | + | "serde", | |
| 315 | + | "serde_core", | |
| 316 | + | ] | |
| 317 | + | ||
| 318 | + | [[package]] | |
| 319 | + | name = "itoa" | |
| 320 | + | version = "1.0.18" | |
| 321 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 322 | + | checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" | |
| 323 | + | ||
| 324 | + | [[package]] | |
| 325 | + | name = "js-sys" | |
| 326 | + | version = "0.3.91" | |
| 327 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 328 | + | checksum = "b49715b7073f385ba4bc528e5747d02e66cb39c6146efb66b781f131f0fb399c" | |
| 329 | + | dependencies = [ | |
| 330 | + | "once_cell", | |
| 331 | + | "wasm-bindgen", | |
| 332 | + | ] | |
| 333 | + | ||
| 334 | + | [[package]] | |
| 335 | + | name = "leb128fmt" | |
| 336 | + | version = "0.1.0" | |
| 337 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 338 | + | checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" | |
| 339 | + | ||
| 340 | + | [[package]] | |
| 341 | + | name = "libc" | |
| 342 | + | version = "0.2.183" | |
| 343 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 344 | + | checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d" | |
| 345 | + | ||
| 346 | + | [[package]] | |
| 347 | + | name = "litemap" | |
| 348 | + | version = "0.8.1" | |
| 349 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 350 | + | checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77" | |
| 351 | + | ||
| 352 | + | [[package]] | |
| 353 | + | name = "lock_api" | |
| 354 | + | version = "0.4.14" | |
| 355 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 356 | + | checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" | |
| 357 | + | dependencies = [ | |
| 358 | + | "scopeguard", | |
| 359 | + | ] | |
| 360 | + | ||
| 361 | + | [[package]] | |
| 362 | + | name = "log" | |
| 363 | + | version = "0.4.29" | |
| 364 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 365 | + | checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" | |
| 366 | + | ||
| 367 | + | [[package]] | |
| 368 | + | name = "mac" | |
| 369 | + | version = "0.1.1" | |
| 370 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 371 | + | checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" | |
| 372 | + | ||
| 373 | + | [[package]] | |
| 374 | + | name = "maplit" | |
| 375 | + | version = "1.0.2" | |
| 376 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 377 | + | checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d" | |
| 378 | + | ||
| 379 | + | [[package]] | |
| 380 | + | name = "markup5ever" | |
| 381 | + | version = "0.35.0" | |
| 382 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 383 | + | checksum = "311fe69c934650f8f19652b3946075f0fc41ad8757dbb68f1ca14e7900ecc1c3" | |
| 384 | + | dependencies = [ | |
| 385 | + | "log", | |
| 386 | + | "tendril", | |
| 387 | + | "web_atoms", | |
| 388 | + | ] | |
| 389 | + | ||
| 390 | + | [[package]] | |
| 391 | + | name = "match_token" | |
| 392 | + | version = "0.35.0" | |
| 393 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 394 | + | checksum = "ac84fd3f360fcc43dc5f5d186f02a94192761a080e8bc58621ad4d12296a58cf" | |
| 395 | + | dependencies = [ | |
| 396 | + | "proc-macro2", | |
| 397 | + | "quote", | |
| 398 | + | "syn", | |
| 399 | + | ] | |
| 400 | + | ||
| 401 | + | [[package]] | |
| 402 | + | name = "memchr" | |
| 403 | + | version = "2.8.0" | |
| 404 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 405 | + | checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" | |
| 406 | + | ||
| 407 | + | [[package]] | |
| 408 | + | name = "new_debug_unreachable" | |
| 409 | + | version = "1.0.6" | |
| 410 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 411 | + | checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" | |
| 412 | + | ||
| 413 | + | [[package]] | |
| 414 | + | name = "once_cell" | |
| 415 | + | version = "1.21.4" | |
| 416 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 417 | + | checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" | |
| 418 | + | ||
| 419 | + | [[package]] | |
| 420 | + | name = "parking_lot" | |
| 421 | + | version = "0.12.5" | |
| 422 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 423 | + | checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a" | |
| 424 | + | dependencies = [ | |
| 425 | + | "lock_api", | |
| 426 | + | "parking_lot_core", | |
| 427 | + | ] | |
| 428 | + | ||
| 429 | + | [[package]] | |
| 430 | + | name = "parking_lot_core" | |
| 431 | + | version = "0.9.12" | |
| 432 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 433 | + | checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" | |
| 434 | + | dependencies = [ | |
| 435 | + | "cfg-if", | |
| 436 | + | "libc", | |
| 437 | + | "redox_syscall", | |
| 438 | + | "smallvec", | |
| 439 | + | "windows-link", | |
| 440 | + | ] | |
| 441 | + | ||
| 442 | + | [[package]] | |
| 443 | + | name = "percent-encoding" | |
| 444 | + | version = "2.3.2" | |
| 445 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 446 | + | checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" | |
| 447 | + | ||
| 448 | + | [[package]] | |
| 449 | + | name = "phf" | |
| 450 | + | version = "0.11.3" | |
| 451 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 452 | + | checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078" | |
| 453 | + | dependencies = [ | |
| 454 | + | "phf_macros", | |
| 455 | + | "phf_shared", | |
| 456 | + | ] | |
| 457 | + | ||
| 458 | + | [[package]] | |
| 459 | + | name = "phf_codegen" | |
| 460 | + | version = "0.11.3" | |
| 461 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 462 | + | checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a" | |
| 463 | + | dependencies = [ | |
| 464 | + | "phf_generator", | |
| 465 | + | "phf_shared", | |
| 466 | + | ] | |
| 467 | + | ||
| 468 | + | [[package]] | |
| 469 | + | name = "phf_generator" | |
| 470 | + | version = "0.11.3" | |
| 471 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 472 | + | checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d" | |
| 473 | + | dependencies = [ | |
| 474 | + | "phf_shared", | |
| 475 | + | "rand", | |
| 476 | + | ] | |
| 477 | + | ||
| 478 | + | [[package]] | |
| 479 | + | name = "phf_macros" | |
| 480 | + | version = "0.11.3" | |
| 481 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 482 | + | checksum = "f84ac04429c13a7ff43785d75ad27569f2951ce0ffd30a3321230db2fc727216" | |
| 483 | + | dependencies = [ | |
| 484 | + | "phf_generator", | |
| 485 | + | "phf_shared", | |
| 486 | + | "proc-macro2", | |
| 487 | + | "quote", | |
| 488 | + | "syn", | |
| 489 | + | ] | |
| 490 | + | ||
| 491 | + | [[package]] | |
| 492 | + | name = "phf_shared" | |
| 493 | + | version = "0.11.3" | |
| 494 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 495 | + | checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5" | |
| 496 | + | dependencies = [ | |
| 497 | + | "siphasher", | |
| 498 | + | ] | |
| 499 | + | ||
| 500 | + | [[package]] |
Lines truncated
| @@ -0,0 +1,22 @@ | |||
| 1 | + | [package] | |
| 2 | + | name = "docengine" | |
| 3 | + | version = "0.3.0" | |
| 4 | + | edition = "2021" | |
| 5 | + | ||
| 6 | + | [features] | |
| 7 | + | default = [] | |
| 8 | + | doc-loader = ["dep:regex"] | |
| 9 | + | mentions = ["dep:regex-lite"] | |
| 10 | + | quotes = ["dep:regex-lite", "dep:uuid"] | |
| 11 | + | frontmatter = ["dep:toml"] | |
| 12 | + | full = ["doc-loader", "mentions", "quotes", "frontmatter"] | |
| 13 | + | ||
| 14 | + | [dependencies] | |
| 15 | + | pulldown-cmark = "0.12" | |
| 16 | + | ammonia = "4" | |
| 17 | + | serde = { version = "1", features = ["derive"] } | |
| 18 | + | ||
| 19 | + | regex = { version = "1", optional = true } | |
| 20 | + | regex-lite = { version = "0.1", optional = true } | |
| 21 | + | uuid = { version = "1", features = ["serde", "v4"], optional = true } | |
| 22 | + | toml = { version = "0.8", optional = true } |
| @@ -0,0 +1,132 @@ | |||
| 1 | + | /// Strip inline code (backtick) and fenced code blocks, replacing with spaces. | |
| 2 | + | pub fn strip_code_spans(input: &str) -> String { | |
| 3 | + | let mut out = String::with_capacity(input.len()); | |
| 4 | + | let mut chars = input.chars().peekable(); | |
| 5 | + | ||
| 6 | + | while let Some(ch) = chars.next() { | |
| 7 | + | if ch == '`' { | |
| 8 | + | let mut tick_count = 1; | |
| 9 | + | while chars.peek() == Some(&'`') { | |
| 10 | + | tick_count += 1; | |
| 11 | + | chars.next(); | |
| 12 | + | } | |
| 13 | + | let mut skipped = 0; | |
| 14 | + | while let Some(c) = chars.next() { | |
| 15 | + | skipped += 1; | |
| 16 | + | if c == '`' { | |
| 17 | + | let mut close_count = 1; | |
| 18 | + | while chars.peek() == Some(&'`') { | |
| 19 | + | close_count += 1; | |
| 20 | + | chars.next(); | |
| 21 | + | } | |
| 22 | + | if close_count == tick_count { | |
| 23 | + | break; | |
| 24 | + | } | |
| 25 | + | } | |
| 26 | + | } | |
| 27 | + | let total = tick_count * 2 + skipped; | |
| 28 | + | for _ in 0..total { | |
| 29 | + | out.push(' '); | |
| 30 | + | } | |
| 31 | + | } else { | |
| 32 | + | out.push(ch); | |
| 33 | + | } | |
| 34 | + | } | |
| 35 | + | out | |
| 36 | + | } | |
| 37 | + | ||
| 38 | + | /// Return byte ranges of inline code spans and fenced code blocks. | |
| 39 | + | pub fn code_span_ranges(input: &str) -> Vec<(usize, usize)> { | |
| 40 | + | let mut ranges = Vec::new(); | |
| 41 | + | let bytes = input.as_bytes(); | |
| 42 | + | let len = bytes.len(); | |
| 43 | + | let mut i = 0; | |
| 44 | + | ||
| 45 | + | while i < len { | |
| 46 | + | if bytes[i] == b'`' { | |
| 47 | + | let start = i; | |
| 48 | + | let mut tick_count = 0; | |
| 49 | + | while i < len && bytes[i] == b'`' { | |
| 50 | + | tick_count += 1; | |
| 51 | + | i += 1; | |
| 52 | + | } | |
| 53 | + | let mut found = false; | |
| 54 | + | while i < len { | |
| 55 | + | if bytes[i] == b'`' { | |
| 56 | + | let mut close_count = 0; | |
| 57 | + | while i < len && bytes[i] == b'`' { | |
| 58 | + | close_count += 1; | |
| 59 | + | i += 1; | |
| 60 | + | } | |
| 61 | + | if close_count == tick_count { | |
| 62 | + | ranges.push((start, i)); | |
| 63 | + | found = true; | |
| 64 | + | break; | |
| 65 | + | } | |
| 66 | + | } else { | |
| 67 | + | i += 1; | |
| 68 | + | } | |
| 69 | + | } | |
| 70 | + | if !found { | |
| 71 | + | ranges.push((start, len)); | |
| 72 | + | } | |
| 73 | + | } else { | |
| 74 | + | i += 1; | |
| 75 | + | } | |
| 76 | + | } | |
| 77 | + | ranges | |
| 78 | + | } | |
| 79 | + | ||
| 80 | + | #[cfg(test)] | |
| 81 | + | mod tests { | |
| 82 | + | use super::*; | |
| 83 | + | ||
| 84 | + | #[test] | |
| 85 | + | fn strip_inline_code() { | |
| 86 | + | let result = strip_code_spans("hello `code` world"); | |
| 87 | + | assert!(!result.contains("code")); | |
| 88 | + | assert!(result.contains("hello")); | |
| 89 | + | assert!(result.contains("world")); | |
| 90 | + | } | |
| 91 | + | ||
| 92 | + | #[test] | |
| 93 | + | fn strip_fenced_code() { | |
| 94 | + | let result = strip_code_spans("text\n```\ncode block\n```\nmore"); | |
| 95 | + | assert!(!result.contains("code block")); | |
| 96 | + | assert!(result.contains("text")); | |
| 97 | + | assert!(result.contains("more")); | |
| 98 | + | } | |
| 99 | + | ||
| 100 | + | #[test] | |
| 101 | + | fn ranges_inline_code() { | |
| 102 | + | let input = "hello `code` world"; | |
| 103 | + | let ranges = code_span_ranges(input); | |
| 104 | + | assert_eq!(ranges.len(), 1); | |
| 105 | + | let (start, end) = ranges[0]; | |
| 106 | + | assert_eq!(&input[start..end], "`code`"); | |
| 107 | + | } | |
| 108 | + | ||
| 109 | + | #[test] | |
| 110 | + | fn ranges_fenced_code() { | |
| 111 | + | let input = "text\n```\ncode\n```\nmore"; | |
| 112 | + | let ranges = code_span_ranges(input); | |
| 113 | + | assert_eq!(ranges.len(), 1); | |
| 114 | + | let (start, end) = ranges[0]; | |
| 115 | + | assert!(input[start..end].starts_with("```")); | |
| 116 | + | assert!(input[start..end].ends_with("```")); | |
| 117 | + | } | |
| 118 | + | ||
| 119 | + | #[test] | |
| 120 | + | fn ranges_unclosed_backtick() { | |
| 121 | + | let input = "hello `unclosed"; | |
| 122 | + | let ranges = code_span_ranges(input); | |
| 123 | + | assert_eq!(ranges.len(), 1); | |
| 124 | + | assert_eq!(ranges[0], (6, input.len())); | |
| 125 | + | } | |
| 126 | + | ||
| 127 | + | #[test] | |
| 128 | + | fn no_code_spans() { | |
| 129 | + | assert!(code_span_ranges("no code here").is_empty()); | |
| 130 | + | assert_eq!(strip_code_spans("no code here"), "no code here"); | |
| 131 | + | } | |
| 132 | + | } |
| @@ -0,0 +1,260 @@ | |||
| 1 | + | use std::collections::HashMap; | |
| 2 | + | use std::path::Path; | |
| 3 | + | use std::sync::LazyLock; | |
| 4 | + | ||
| 5 | + | use regex::Regex; | |
| 6 | + | ||
| 7 | + | static LINK_RE: LazyLock<Regex> = LazyLock::new(|| { | |
| 8 | + | Regex::new(r"\[([^\]]+)\]\(([^)]+)\)").expect("valid regex") | |
| 9 | + | }); | |
| 10 | + | ||
| 11 | + | /// Configuration for the doc loader. | |
| 12 | + | pub struct DocLoaderConfig { | |
| 13 | + | /// Sections as `(directory_name, display_name)` pairs in display order. | |
| 14 | + | pub sections: Vec<(String, String)>, | |
| 15 | + | /// URL prefix for rewritten links (e.g., "/docs"). | |
| 16 | + | pub link_prefix: String, | |
| 17 | + | /// Pattern that identifies unpublished links to strip (e.g., "unpublished/"). | |
| 18 | + | pub unpublished_pattern: Option<String>, | |
| 19 | + | } | |
| 20 | + | ||
| 21 | + | /// A rendered documentation page. | |
| 22 | + | #[derive(Clone, Debug)] | |
| 23 | + | pub struct DocPage { | |
| 24 | + | pub title: String, | |
| 25 | + | pub slug: String, | |
| 26 | + | pub section: String, | |
| 27 | + | pub html_content: String, | |
| 28 | + | } | |
| 29 | + | ||
| 30 | + | /// Ordered entry for the docs index page. | |
| 31 | + | #[derive(Clone, Debug)] | |
| 32 | + | pub struct DocIndexEntry { | |
| 33 | + | pub title: String, | |
| 34 | + | pub slug: String, | |
| 35 | + | pub section: String, | |
| 36 | + | } | |
| 37 | + | ||
| 38 | + | /// In-memory store of rendered documentation pages, built once at startup. | |
| 39 | + | #[derive(Clone, Debug)] | |
| 40 | + | pub struct DocLoader { | |
| 41 | + | pages: HashMap<String, DocPage>, | |
| 42 | + | index: Vec<DocIndexEntry>, | |
| 43 | + | } | |
| 44 | + | ||
| 45 | + | impl DocLoader { | |
| 46 | + | /// Load all `.md` files from `base_path`, rendering them into HTML. | |
| 47 | + | /// | |
| 48 | + | /// Expects subdirectories matching the configured sections. | |
| 49 | + | pub fn load(base_path: &Path, config: &DocLoaderConfig) -> Self { | |
| 50 | + | let mut pages = HashMap::new(); | |
| 51 | + | let mut index = Vec::new(); | |
| 52 | + | ||
| 53 | + | for (dir_name, section_display) in &config.sections { | |
| 54 | + | let section_path = base_path.join(dir_name); | |
| 55 | + | if !section_path.is_dir() { | |
| 56 | + | continue; | |
| 57 | + | } | |
| 58 | + | ||
| 59 | + | let mut entries: Vec<_> = std::fs::read_dir(§ion_path) | |
| 60 | + | .into_iter() | |
| 61 | + | .flatten() | |
| 62 | + | .filter_map(|e| e.ok()) | |
| 63 | + | .filter(|e| { | |
| 64 | + | e.path() | |
| 65 | + | .extension() | |
| 66 | + | .map(|ext| ext == "md") | |
| 67 | + | .unwrap_or(false) | |
| 68 | + | }) | |
| 69 | + | .collect(); | |
| 70 | + | ||
| 71 | + | entries.sort_by_key(|e| e.file_name()); | |
| 72 | + | ||
| 73 | + | for entry in entries { | |
| 74 | + | let path = entry.path(); | |
| 75 | + | let slug = path | |
| 76 | + | .file_stem() | |
| 77 | + | .and_then(|s| s.to_str()) | |
| 78 | + | .unwrap_or_default() | |
| 79 | + | .to_string(); | |
| 80 | + | ||
| 81 | + | let raw_md = match std::fs::read_to_string(&path) { | |
| 82 | + | Ok(content) => content, | |
| 83 | + | Err(_) => continue, | |
| 84 | + | }; | |
| 85 | + | ||
| 86 | + | let title = | |
| 87 | + | crate::text::extract_title(&raw_md).unwrap_or_else(|| slug.clone()); | |
| 88 | + | let rewritten_md = rewrite_links( | |
| 89 | + | &raw_md, | |
| 90 | + | &config.link_prefix, | |
| 91 | + | config.unpublished_pattern.as_deref(), | |
| 92 | + | ); | |
| 93 | + | let md_without_title = crate::text::strip_first_heading(&rewritten_md); | |
| 94 | + | let html_content = crate::render_permissive(&md_without_title); | |
| 95 | + | ||
| 96 | + | let page = DocPage { | |
| 97 | + | title: title.clone(), | |
| 98 | + | slug: slug.clone(), | |
| 99 | + | section: section_display.clone(), | |
| 100 | + | html_content, | |
| 101 | + | }; | |
| 102 | + | ||
| 103 | + | index.push(DocIndexEntry { | |
| 104 | + | title: title.clone(), | |
| 105 | + | slug: slug.clone(), | |
| 106 | + | section: section_display.clone(), | |
| 107 | + | }); | |
| 108 | + | ||
| 109 | + | pages.insert(slug, page); | |
| 110 | + | } | |
| 111 | + | } | |
| 112 | + | ||
| 113 | + | DocLoader { pages, index } | |
| 114 | + | } | |
| 115 | + | ||
| 116 | + | /// Look up a rendered page by slug. | |
| 117 | + | pub fn get(&self, slug: &str) -> Option<&DocPage> { | |
| 118 | + | self.pages.get(slug) | |
| 119 | + | } | |
| 120 | + | ||
| 121 | + | /// Get the full ordered index. | |
| 122 | + | pub fn index(&self) -> &[DocIndexEntry] { | |
| 123 | + | &self.index | |
| 124 | + | } | |
| 125 | + | } | |
| 126 | + | ||
| 127 | + | /// Rewrite relative `.md` links to the configured prefix. | |
| 128 | + | fn rewrite_links(markdown: &str, link_prefix: &str, unpublished_pattern: Option<&str>) -> String { | |
| 129 | + | LINK_RE | |
| 130 | + | .replace_all(markdown, |caps: ®ex::Captures| { | |
| 131 | + | let text = &caps[1]; | |
| 132 | + | let url = &caps[2]; | |
| 133 | + | ||
| 134 | + | // Preserve absolute URLs, mailto, and internal routes. | |
| 135 | + | if url.starts_with("http://") | |
| 136 | + | || url.starts_with("https://") | |
| 137 | + | || url.starts_with("mailto:") | |
| 138 | + | || url.starts_with('/') | |
| 139 | + | { | |
| 140 | + | return caps[0].to_string(); | |
| 141 | + | } | |
| 142 | + | ||
| 143 | + | // Unpublished docs: strip link, keep text. | |
| 144 | + | if let Some(pattern) = unpublished_pattern { | |
| 145 | + | if url.contains(pattern) { | |
| 146 | + | return text.to_string(); | |
| 147 | + | } | |
| 148 | + | } | |
| 149 | + | ||
| 150 | + | // Only rewrite links containing .md | |
| 151 | + | if !url.contains(".md") { | |
| 152 | + | return caps[0].to_string(); | |
| 153 | + | } | |
| 154 | + | ||
| 155 | + | // Split off any #anchor. | |
| 156 | + | let (path_part, anchor): (&str, Option<&str>) = match url.split_once('#') { | |
| 157 | + | Some((p, a)) => (p, Some(a)), | |
| 158 | + | None => (url, None), | |
| 159 | + | }; | |
| 160 | + | ||
| 161 | + | // Extract slug from filename: ../support/faq.md -> faq | |
| 162 | + | let filename = path_part | |
| 163 | + | .rsplit('/') | |
| 164 | + | .next() | |
| 165 | + | .unwrap_or(path_part) | |
| 166 | + | .trim_end_matches(".md"); | |
| 167 | + | ||
| 168 | + | let mut new_url = format!("{link_prefix}/{filename}"); | |
| 169 | + | if let Some(anchor) = anchor { | |
| 170 | + | new_url.push('#'); | |
| 171 | + | new_url.push_str(anchor); | |
| 172 | + | } | |
| 173 | + | ||
| 174 | + | format!("[{text}]({new_url})") | |
| 175 | + | }) | |
| 176 | + | .to_string() | |
| 177 | + | } | |
| 178 | + | ||
| 179 | + | #[cfg(test)] | |
| 180 | + | mod tests { | |
| 181 | + | use super::*; | |
| 182 | + | ||
| 183 | + | #[test] | |
| 184 | + | fn rewrite_same_section_link() { | |
| 185 | + | let md = "See [SLA](./guarantees.md) for details."; | |
| 186 | + | let result = rewrite_links(md, "/docs", Some("unpublished/")); | |
| 187 | + | assert_eq!(result, "See [SLA](/docs/guarantees) for details."); | |
| 188 | + | } | |
| 189 | + | ||
| 190 | + | #[test] | |
| 191 | + | fn rewrite_cross_section_link() { | |
| 192 | + | let md = "Check [FAQ](../support/faq.md) for more."; | |
| 193 | + | let result = rewrite_links(md, "/docs", Some("unpublished/")); | |
| 194 | + | assert_eq!(result, "Check [FAQ](/docs/faq) for more."); | |
| 195 | + | } | |
| 196 | + | ||
| 197 | + | #[test] | |
| 198 | + | fn rewrite_unpublished_link_becomes_plain_text() { | |
| 199 | + | let md = "See [Content Moderation](../../unpublished/legal/moderation.md) for details."; | |
| 200 | + | let result = rewrite_links(md, "/docs", Some("unpublished/")); | |
| 201 | + | assert_eq!(result, "See Content Moderation for details."); | |
| 202 | + | } | |
| 203 | + | ||
| 204 | + | #[test] | |
| 205 | + | fn rewrite_preserves_absolute_urls() { | |
| 206 | + | let md = "Visit [our site](https://example.com) today."; | |
| 207 | + | let result = rewrite_links(md, "/docs", Some("unpublished/")); | |
| 208 | + | assert_eq!(result, md); | |
| 209 | + | } | |
| 210 | + | ||
| 211 | + | #[test] | |
| 212 | + | fn rewrite_preserves_mailto() { | |
| 213 | + | let md = "Email [us](mailto:test@example.com)"; | |
| 214 | + | let result = rewrite_links(md, "/docs", Some("unpublished/")); | |
| 215 | + | assert_eq!(result, md); | |
| 216 | + | } | |
| 217 | + | ||
| 218 | + | #[test] | |
| 219 | + | fn rewrite_preserves_internal_routes() { | |
| 220 | + | let md = "Go to [pricing](/pricing) page."; | |
| 221 | + | let result = rewrite_links(md, "/docs", Some("unpublished/")); | |
| 222 | + | assert_eq!(result, md); | |
| 223 | + | } | |
| 224 | + | ||
| 225 | + | #[test] | |
| 226 | + | fn rewrite_link_with_anchor() { | |
| 227 | + | let md = "See [section](./faq.md#billing)."; | |
| 228 | + | let result = rewrite_links(md, "/docs", Some("unpublished/")); | |
| 229 | + | assert_eq!(result, "See [section](/docs/faq#billing)."); | |
| 230 | + | } | |
| 231 | + | ||
| 232 | + | #[test] | |
| 233 | + | fn rewrite_public_cross_ref() { | |
| 234 | + | let md = "See [Acceptable Use](../../public/legal/acceptable-use.md)."; | |
| 235 | + | let result = rewrite_links(md, "/docs", Some("unpublished/")); | |
| 236 | + | assert_eq!(result, "See [Acceptable Use](/docs/acceptable-use)."); | |
| 237 | + | } | |
| 238 | + | ||
| 239 | + | #[test] | |
| 240 | + | fn rewrite_custom_prefix() { | |
| 241 | + | let md = "See [FAQ](./faq.md) here."; | |
| 242 | + | let result = rewrite_links(md, "/help", None); | |
| 243 | + | assert_eq!(result, "See [FAQ](/help/faq) here."); | |
| 244 | + | } | |
| 245 | + | ||
| 246 | + | #[test] | |
| 247 | + | fn rewrite_no_unpublished_pattern() { | |
| 248 | + | let md = "See [doc](../../unpublished/foo.md)."; | |
| 249 | + | let result = rewrite_links(md, "/docs", None); | |
| 250 | + | // Without the pattern, it just rewrites normally | |
| 251 | + | assert_eq!(result, "See [doc](/docs/foo)."); | |
| 252 | + | } | |
| 253 | + | ||
| 254 | + | #[test] | |
| 255 | + | fn rewrite_non_md_link_preserved() { | |
| 256 | + | let md = "See [image](./photo.png) here."; | |
| 257 | + | let result = rewrite_links(md, "/docs", None); | |
| 258 | + | assert_eq!(result, md); | |
| 259 | + | } | |
| 260 | + | } |
| @@ -0,0 +1,130 @@ | |||
| 1 | + | use std::collections::HashMap; | |
| 2 | + | ||
| 3 | + | use serde::Deserialize; | |
| 4 | + | ||
| 5 | + | /// Parsed TOML frontmatter from a markdown document. | |
| 6 | + | #[derive(Debug, Clone, Default, Deserialize)] | |
| 7 | + | pub struct Frontmatter { | |
| 8 | + | pub title: Option<String>, | |
| 9 | + | pub date: Option<String>, | |
| 10 | + | pub tags: Option<Vec<String>>, | |
| 11 | + | pub section: Option<String>, | |
| 12 | + | pub draft: Option<bool>, | |
| 13 | + | #[serde(flatten)] | |
| 14 | + | pub extra: HashMap<String, toml::Value>, | |
| 15 | + | } | |
| 16 | + | ||
| 17 | + | /// Parse TOML frontmatter delimited by `+++` from the beginning of a document. | |
| 18 | + | /// | |
| 19 | + | /// Returns the parsed frontmatter (if present) and the remaining markdown | |
| 20 | + | /// content. | |
| 21 | + | pub fn parse_frontmatter(input: &str) -> (Option<Frontmatter>, &str) { | |
| 22 | + | let trimmed = input.trim_start(); | |
| 23 | + | if !trimmed.starts_with("+++") { | |
| 24 | + | return (None, input); | |
| 25 | + | } | |
| 26 | + | ||
| 27 | + | // Find the closing +++ | |
| 28 | + | let after_opening = &trimmed[3..]; | |
| 29 | + | let after_opening = after_opening.strip_prefix('\n').unwrap_or(after_opening); | |
| 30 | + | ||
| 31 | + | if let Some(end_pos) = after_opening.find("\n+++") { | |
| 32 | + | let toml_content = &after_opening[..end_pos]; | |
| 33 | + | let rest_start = end_pos + 4; // skip \n+++ | |
| 34 | + | let rest = &after_opening[rest_start..]; | |
| 35 | + | let rest = rest.strip_prefix('\n').unwrap_or(rest); | |
| 36 | + | ||
| 37 | + | // Calculate the actual offset into the original input | |
| 38 | + | let rest_offset = input.len() - rest.len(); | |
| 39 | + | let rest_slice = &input[rest_offset..]; | |
| 40 | + | ||
| 41 | + | match toml::from_str::<Frontmatter>(toml_content) { | |
| 42 | + | Ok(fm) => (Some(fm), rest_slice), | |
| 43 | + | Err(_) => (None, input), | |
| 44 | + | } | |
| 45 | + | } else { | |
| 46 | + | (None, input) | |
| 47 | + | } | |
| 48 | + | } | |
| 49 | + | ||
| 50 | + | #[cfg(test)] | |
| 51 | + | mod tests { | |
| 52 | + | use super::*; | |
| 53 | + | ||
| 54 | + | #[test] | |
| 55 | + | fn parse_basic_frontmatter() { | |
| 56 | + | let input = "+++\ntitle = \"Hello\"\ndate = \"2026-01-01\"\n+++\n\n# Body"; | |
| 57 | + | let (fm, rest) = parse_frontmatter(input); | |
| 58 | + | let fm = fm.unwrap(); | |
| 59 | + | assert_eq!(fm.title.as_deref(), Some("Hello")); | |
| 60 | + | assert_eq!(fm.date.as_deref(), Some("2026-01-01")); | |
| 61 | + | assert!(rest.contains("# Body")); | |
| 62 | + | } | |
| 63 | + | ||
| 64 | + | #[test] | |
| 65 | + | fn parse_with_tags() { | |
| 66 | + | let input = "+++\ntitle = \"Post\"\ntags = [\"rust\", \"web\"]\n+++\nContent"; | |
| 67 | + | let (fm, _rest) = parse_frontmatter(input); | |
| 68 | + | let fm = fm.unwrap(); | |
| 69 | + | assert_eq!(fm.tags.as_deref(), Some(&["rust".to_string(), "web".to_string()][..])); | |
| 70 | + | } | |
| 71 | + | ||
| 72 | + | #[test] | |
| 73 | + | fn parse_with_draft() { | |
| 74 | + | let input = "+++\ndraft = true\n+++\nContent"; | |
| 75 | + | let (fm, _rest) = parse_frontmatter(input); | |
| 76 | + | assert_eq!(fm.unwrap().draft, Some(true)); | |
| 77 | + | } | |
| 78 | + | ||
| 79 | + | #[test] | |
| 80 | + | fn parse_with_extra_fields() { | |
| 81 | + | let input = "+++\ntitle = \"Test\"\ncustom_key = \"custom_value\"\n+++\nBody"; | |
| 82 | + | let (fm, _) = parse_frontmatter(input); | |
| 83 | + | let fm = fm.unwrap(); | |
| 84 | + | assert_eq!(fm.title.as_deref(), Some("Test")); | |
| 85 | + | assert_eq!( | |
| 86 | + | fm.extra.get("custom_key").and_then(|v| v.as_str()), | |
| 87 | + | Some("custom_value") | |
| 88 | + | ); | |
| 89 | + | } | |
| 90 | + | ||
| 91 | + | #[test] | |
| 92 | + | fn no_frontmatter() { | |
| 93 | + | let input = "# Just Markdown\n\nBody text"; | |
| 94 | + | let (fm, rest) = parse_frontmatter(input); | |
| 95 | + | assert!(fm.is_none()); | |
| 96 | + | assert_eq!(rest, input); | |
| 97 | + | } | |
| 98 | + | ||
| 99 | + | #[test] | |
| 100 | + | fn unclosed_frontmatter() { | |
| 101 | + | let input = "+++\ntitle = \"Oops\"\nNo closing delimiter"; | |
| 102 | + | let (fm, rest) = parse_frontmatter(input); | |
| 103 | + | assert!(fm.is_none()); | |
| 104 | + | assert_eq!(rest, input); | |
| 105 | + | } | |
| 106 | + | ||
| 107 | + | #[test] | |
| 108 | + | fn invalid_toml_returns_none() { | |
| 109 | + | let input = "+++\nnot valid toml {{{\n+++\nBody"; | |
| 110 | + | let (fm, rest) = parse_frontmatter(input); | |
| 111 | + | assert!(fm.is_none()); | |
| 112 | + | assert_eq!(rest, input); | |
| 113 | + | } | |
| 114 | + | ||
| 115 | + | #[test] | |
| 116 | + | fn empty_frontmatter() { | |
| 117 | + | let input = "+++\n\n+++\nBody"; | |
| 118 | + | let (fm, rest) = parse_frontmatter(input); | |
| 119 | + | let fm = fm.unwrap(); | |
| 120 | + | assert!(fm.title.is_none()); | |
| 121 | + | assert!(rest.contains("Body")); | |
| 122 | + | } | |
| 123 | + | ||
| 124 | + | #[test] | |
| 125 | + | fn frontmatter_with_section() { | |
| 126 | + | let input = "+++\nsection = \"guide\"\n+++\nContent"; | |
| 127 | + | let (fm, _) = parse_frontmatter(input); | |
| 128 | + | assert_eq!(fm.unwrap().section.as_deref(), Some("guide")); | |
| 129 | + | } | |
| 130 | + | } |
| @@ -0,0 +1,51 @@ | |||
| 1 | + | #[cfg(any(feature = "mentions", test))] | |
| 2 | + | mod code_spans; | |
| 3 | + | mod render; | |
| 4 | + | mod sanitize; | |
| 5 | + | mod text; | |
| 6 | + | mod toc; | |
| 7 | + | ||
| 8 | + | #[cfg(feature = "doc-loader")] | |
| 9 | + | mod doc_loader; | |
| 10 | + | #[cfg(feature = "frontmatter")] | |
| 11 | + | mod frontmatter; | |
| 12 | + | #[cfg(feature = "mentions")] | |
| 13 | + | mod mentions; | |
| 14 | + | #[cfg(feature = "quotes")] | |
| 15 | + | mod quotes; | |
| 16 | + | ||
| 17 | + | // Re-export core types | |
| 18 | + | pub use render::{RenderResult, Renderer}; | |
| 19 | + | pub use sanitize::SanitizePreset; | |
| 20 | + | pub use text::{extract_title, reading_time_minutes, strip_first_heading, word_count}; | |
| 21 | + | pub use toc::{TocEntry, extract_toc, render_toc_html}; | |
| 22 | + | ||
| 23 | + | // Re-export feature-gated types | |
| 24 | + | #[cfg(feature = "doc-loader")] | |
| 25 | + | pub use doc_loader::{DocIndexEntry, DocLoader, DocLoaderConfig, DocPage}; | |
| 26 | + | #[cfg(feature = "frontmatter")] | |
| 27 | + | pub use frontmatter::{Frontmatter, parse_frontmatter}; | |
| 28 | + | #[cfg(feature = "mentions")] | |
| 29 | + | pub use mentions::{extract_mentions, resolve_mentions}; | |
| 30 | + | #[cfg(feature = "quotes")] | |
| 31 | + | pub use quotes::{QuoteAuthor, post_process_quotes}; | |
| 32 | + | ||
| 33 | + | /// Render markdown with the permissive preset (GFM features, default ammonia). | |
| 34 | + | pub fn render_permissive(markdown: &str) -> String { | |
| 35 | + | Renderer::permissive().render(markdown) | |
| 36 | + | } | |
| 37 | + | ||
| 38 | + | /// Render markdown with the standard preset (GFM features, no images). | |
| 39 | + | pub fn render_standard(markdown: &str) -> String { | |
| 40 | + | Renderer::standard().render(markdown) | |
| 41 | + | } | |
| 42 | + | ||
| 43 | + | /// Render markdown with the strict preset (no images, no raw HTML, nofollow). | |
| 44 | + | pub fn render_strict(markdown: &str) -> String { | |
| 45 | + | Renderer::strict().render(markdown) | |
| 46 | + | } | |
| 47 | + | ||
| 48 | + | /// Sanitize HTML without markdown parsing. | |
| 49 | + | pub fn sanitize_html(html: &str) -> String { | |
| 50 | + | Renderer::sanitize_only().sanitize_html(html) | |
| 51 | + | } |
| @@ -0,0 +1,144 @@ | |||
| 1 | + | use std::collections::HashSet; | |
| 2 | + | ||
| 3 | + | use crate::code_spans::code_span_ranges; | |
| 4 | + | ||
| 5 | + | /// Extract unique `@username` mentions from raw markdown input. | |
| 6 | + | /// Skips mentions inside inline code (backtick-wrapped). | |
| 7 | + | pub fn extract_mentions(input: &str) -> Vec<String> { | |
| 8 | + | static MENTION_RE: std::sync::LazyLock<regex_lite::Regex> = | |
| 9 | + | std::sync::LazyLock::new(|| regex_lite::Regex::new(r"@([A-Za-z0-9_-]+)").unwrap()); | |
| 10 | + | ||
| 11 | + | let stripped = crate::code_spans::strip_code_spans(input); | |
| 12 | + | let mut seen = HashSet::new(); | |
| 13 | + | let mut result = Vec::new(); | |
| 14 | + | for caps in MENTION_RE.captures_iter(&stripped) { | |
| 15 | + | let username = caps[1].to_string(); | |
| 16 | + | if seen.insert(username.clone()) { | |
| 17 | + | result.push(username); | |
| 18 | + | } | |
| 19 | + | } | |
| 20 | + | result | |
| 21 | + | } | |
| 22 | + | ||
| 23 | + | /// Replace `@username` with markdown profile links for valid usernames. | |
| 24 | + | /// | |
| 25 | + | /// `url_template` uses `{username}` as placeholder. For example: | |
| 26 | + | /// `/p/my-community/u/{username}` becomes `/p/my-community/u/alice`. | |
| 27 | + | /// | |
| 28 | + | /// Unknown usernames are left as plain text. | |
| 29 | + | pub fn resolve_mentions( | |
| 30 | + | input: &str, | |
| 31 | + | valid_usernames: &HashSet<String>, | |
| 32 | + | url_template: &str, | |
| 33 | + | ) -> String { | |
| 34 | + | static MENTION_RE: std::sync::LazyLock<regex_lite::Regex> = | |
| 35 | + | std::sync::LazyLock::new(|| regex_lite::Regex::new(r"@([A-Za-z0-9_-]+)").unwrap()); | |
| 36 | + | ||
| 37 | + | let mut result = String::with_capacity(input.len()); | |
| 38 | + | let mut pos = 0; | |
| 39 | + | ||
| 40 | + | for (code_start, code_end) in code_span_ranges(input) { | |
| 41 | + | let before = &input[pos..code_start]; | |
| 42 | + | result.push_str(&replace_mentions(before, valid_usernames, url_template, &MENTION_RE)); | |
| 43 | + | result.push_str(&input[code_start..code_end]); | |
| 44 | + | pos = code_end; | |
| 45 | + | } | |
| 46 | + | let tail = &input[pos..]; | |
| 47 | + | result.push_str(&replace_mentions(tail, valid_usernames, url_template, &MENTION_RE)); | |
| 48 | + | ||
| 49 | + | result | |
| 50 | + | } | |
| 51 | + | ||
| 52 | + | fn replace_mentions( | |
| 53 | + | text: &str, | |
| 54 | + | valid_usernames: &HashSet<String>, | |
| 55 | + | url_template: &str, | |
| 56 | + | re: ®ex_lite::Regex, | |
| 57 | + | ) -> String { | |
| 58 | + | re.replace_all(text, |caps: ®ex_lite::Captures| { | |
| 59 | + | let username = &caps[1]; | |
| 60 | + | if valid_usernames.contains(username) { | |
| 61 | + | let url = url_template.replace("{username}", username); | |
| 62 | + | format!("[@{username}]({url})") | |
| 63 | + | } else { | |
| 64 | + | caps[0].to_string() | |
| 65 | + | } | |
| 66 | + | }) | |
| 67 | + | .to_string() | |
| 68 | + | } | |
| 69 | + | ||
| 70 | + | #[cfg(test)] | |
| 71 | + | mod tests { | |
| 72 | + | use super::*; | |
| 73 | + | ||
| 74 | + | #[test] | |
| 75 | + | fn extract_basic() { | |
| 76 | + | let usernames = extract_mentions("Hello @alice and @bob!"); | |
| 77 | + | assert_eq!(usernames, vec!["alice", "bob"]); | |
| 78 | + | } | |
| 79 | + | ||
| 80 | + | #[test] | |
| 81 | + | fn extract_deduplicates() { | |
| 82 | + | let usernames = extract_mentions("@alice said @alice agrees"); | |
| 83 | + | assert_eq!(usernames, vec!["alice"]); | |
| 84 | + | } | |
| 85 | + | ||
| 86 | + | #[test] | |
| 87 | + | fn extract_skips_inline_code() { | |
| 88 | + | let usernames = extract_mentions("Hello `@notreal` and @real"); | |
| 89 | + | assert_eq!(usernames, vec!["real"]); | |
| 90 | + | } | |
| 91 | + | ||
| 92 | + | #[test] | |
| 93 | + | fn extract_skips_fenced_code() { | |
| 94 | + | let usernames = extract_mentions("text\n```\n@inside\n```\n@outside"); | |
| 95 | + | assert_eq!(usernames, vec!["outside"]); | |
| 96 | + | } | |
| 97 | + | ||
| 98 | + | #[test] | |
| 99 | + | fn extract_empty() { | |
| 100 | + | let usernames = extract_mentions("no mentions here"); | |
| 101 | + | assert!(usernames.is_empty()); | |
| 102 | + | } | |
| 103 | + | ||
| 104 | + | #[test] | |
| 105 | + | fn extract_with_hyphens_underscores() { | |
| 106 | + | let usernames = extract_mentions("@user-name @user_name"); | |
| 107 | + | assert_eq!(usernames, vec!["user-name", "user_name"]); | |
| 108 | + | } | |
| 109 | + | ||
| 110 | + | #[test] | |
| 111 | + | fn resolve_valid_replaced() { | |
| 112 | + | let valid: HashSet<String> = ["alice"].iter().map(|s| s.to_string()).collect(); | |
| 113 | + | let result = resolve_mentions("Hello @alice!", &valid, "/p/test-community/u/{username}"); | |
| 114 | + | assert_eq!(result, "Hello [@alice](/p/test-community/u/alice)!"); | |
| 115 | + | } | |
| 116 | + | ||
| 117 | + | #[test] | |
| 118 | + | fn resolve_unknown_left_alone() { | |
| 119 | + | let valid: HashSet<String> = HashSet::new(); | |
| 120 | + | let result = resolve_mentions("Hello @unknown!", &valid, "/u/{username}"); | |
| 121 | + | assert_eq!(result, "Hello @unknown!"); | |
| 122 | + | } | |
| 123 | + | ||
| 124 | + | #[test] | |
| 125 | + | fn resolve_in_code_not_replaced() { | |
| 126 | + | let valid: HashSet<String> = ["alice"].iter().map(|s| s.to_string()).collect(); | |
| 127 | + | let result = resolve_mentions("Use `@alice` in code", &valid, "/u/{username}"); | |
| 128 | + | assert_eq!(result, "Use `@alice` in code"); | |
| 129 | + | } | |
| 130 | + | ||
| 131 | + | #[test] | |
| 132 | + | fn resolve_mixed_valid_invalid() { | |
| 133 | + | let valid: HashSet<String> = ["alice"].iter().map(|s| s.to_string()).collect(); | |
| 134 | + | let result = resolve_mentions("@alice and @unknown", &valid, "/p/slug/u/{username}"); | |
| 135 | + | assert_eq!(result, "[@alice](/p/slug/u/alice) and @unknown"); | |
| 136 | + | } | |
| 137 | + | ||
| 138 | + | #[test] | |
| 139 | + | fn resolve_custom_url_template() { | |
| 140 | + | let valid: HashSet<String> = ["bob"].iter().map(|s| s.to_string()).collect(); | |
| 141 | + | let result = resolve_mentions("Hi @bob", &valid, "/users/{username}/profile"); | |
| 142 | + | assert_eq!(result, "Hi [@bob](/users/bob/profile)"); | |
| 143 | + | } | |
| 144 | + | } |
| @@ -0,0 +1,130 @@ | |||
| 1 | + | use std::collections::HashMap; | |
| 2 | + | ||
| 3 | + | /// Quote author info for attribution rendering. | |
| 4 | + | pub struct QuoteAuthor { | |
| 5 | + | pub username: String, | |
| 6 | + | pub display_name: String, | |
| 7 | + | pub is_removed: bool, | |
| 8 | + | } | |
| 9 | + | ||
| 10 | + | /// HTML-escape a string for safe interpolation into raw HTML. | |
| 11 | + | fn html_escape(s: &str) -> String { | |
| 12 | + | s.replace('&', "&") | |
| 13 | + | .replace('<', "<") | |
| 14 | + | .replace('>', ">") | |
| 15 | + | .replace('"', """) | |
| 16 | + | .replace('\'', "'") | |
| 17 | + | } | |
| 18 | + | ||
| 19 | + | /// Post-process rendered HTML to replace `[quote:POST_ID:HASH]` markers with | |
| 20 | + | /// clickable author attribution. | |
| 21 | + | pub fn post_process_quotes( | |
| 22 | + | html: &str, | |
| 23 | + | quote_authors: &HashMap<uuid::Uuid, QuoteAuthor>, | |
| 24 | + | ) -> String { | |
| 25 | + | static QUOTE_RE: std::sync::LazyLock<regex_lite::Regex> = std::sync::LazyLock::new(|| { | |
| 26 | + | regex_lite::Regex::new(r"\[quote:([0-9a-f\-]{36}):([0-9a-f]{8})\]").unwrap() | |
| 27 | + | }); | |
| 28 | + | QUOTE_RE | |
| 29 | + | .replace_all(html, |caps: ®ex_lite::Captures| { | |
| 30 | + | let post_id_str = &caps[1]; | |
| 31 | + | let resolved = uuid::Uuid::parse_str(post_id_str) | |
| 32 | + | .ok() | |
| 33 | + | .and_then(|post_id| quote_authors.get(&post_id)); | |
| 34 | + | ||
| 35 | + | if let Some(author) = resolved { | |
| 36 | + | if author.is_removed { | |
| 37 | + | format!( | |
| 38 | + | "<cite class=\"quote-attribution\"><a href=\"#post-{}\">(original post removed)</a></cite>", | |
| 39 | + | post_id_str | |
| 40 | + | ) | |
| 41 | + | } else { | |
| 42 | + | format!( | |
| 43 | + | "<cite class=\"quote-attribution\"><a href=\"#post-{}\">— {} (@{})</a></cite>", | |
| 44 | + | post_id_str, | |
| 45 | + | html_escape(&author.display_name), | |
| 46 | + | html_escape(&author.username), | |
| 47 | + | ) | |
| 48 | + | } | |
| 49 | + | } else { | |
| 50 | + | caps[0].to_string() | |
| 51 | + | } | |
| 52 | + | }) | |
| 53 | + | .to_string() | |
| 54 | + | } | |
| 55 | + | ||
| 56 | + | #[cfg(test)] | |
| 57 | + | mod tests { | |
| 58 | + | use super::*; | |
| 59 | + | ||
| 60 | + | #[test] | |
| 61 | + | fn replaces_quote_marker_with_attribution() { | |
| 62 | + | let post_id = uuid::Uuid::new_v4(); | |
| 63 | + | let mut authors = HashMap::new(); | |
| 64 | + | authors.insert( | |
| 65 | + | post_id, | |
| 66 | + | QuoteAuthor { | |
| 67 | + | username: "alice".to_string(), | |
| 68 | + | display_name: "Alice Smith".to_string(), | |
| 69 | + | is_removed: false, | |
| 70 | + | }, | |
| 71 | + | ); | |
| 72 | + | let input = format!("[quote:{}:abcd1234]", post_id); | |
| 73 | + | let result = post_process_quotes(&input, &authors); | |
| 74 | + | assert!(result.contains("Alice Smith")); | |
| 75 | + | assert!(result.contains("@alice")); | |
| 76 | + | assert!(result.contains("quote-attribution")); | |
| 77 | + | } | |
| 78 | + | ||
| 79 | + | #[test] | |
| 80 | + | fn removed_post_shows_removed_text() { | |
| 81 | + | let post_id = uuid::Uuid::new_v4(); | |
| 82 | + | let mut authors = HashMap::new(); | |
| 83 | + | authors.insert( | |
| 84 | + | post_id, | |
| 85 | + | QuoteAuthor { | |
| 86 | + | username: "bob".to_string(), | |
| 87 | + | display_name: "Bob".to_string(), | |
| 88 | + | is_removed: true, | |
| 89 | + | }, | |
| 90 | + | ); | |
| 91 | + | let input = format!("[quote:{}:abcd1234]", post_id); | |
| 92 | + | let result = post_process_quotes(&input, &authors); | |
| 93 | + | assert!(result.contains("original post removed")); | |
| 94 | + | assert!(!result.contains("Bob")); | |
| 95 | + | } | |
| 96 | + | ||
| 97 | + | #[test] | |
| 98 | + | fn unknown_post_id_left_unchanged() { | |
| 99 | + | let authors = HashMap::new(); | |
| 100 | + | let input = "[quote:00000000-0000-0000-0000-000000000000:abcd1234]"; | |
| 101 | + | let result = post_process_quotes(input, &authors); | |
| 102 | + | assert_eq!(result, input); | |
| 103 | + | } | |
| 104 | + | ||
| 105 | + | #[test] | |
| 106 | + | fn non_quote_text_unchanged() { | |
| 107 | + | let authors = HashMap::new(); | |
| 108 | + | let input = "<p>Hello world</p>"; | |
| 109 | + | let result = post_process_quotes(input, &authors); | |
| 110 | + | assert_eq!(result, input); | |
| 111 | + | } | |
| 112 | + | ||
| 113 | + | #[test] | |
| 114 | + | fn html_escapes_display_name() { | |
| 115 | + | let post_id = uuid::Uuid::new_v4(); | |
| 116 | + | let mut authors = HashMap::new(); | |
| 117 | + | authors.insert( | |
| 118 | + | post_id, | |
| 119 | + | QuoteAuthor { | |
| 120 | + | username: "user".to_string(), | |
| 121 | + | display_name: "A <B> & C".to_string(), | |
| 122 | + | is_removed: false, | |
| 123 | + | }, | |
| 124 | + | ); | |
| 125 | + | let input = format!("[quote:{}:abcd1234]", post_id); | |
| 126 | + | let result = post_process_quotes(&input, &authors); | |
| 127 | + | assert!(result.contains("A <B> & C")); | |
| 128 | + | assert!(!result.contains("<B>")); | |
| 129 | + | } | |
| 130 | + | } |
| @@ -0,0 +1,511 @@ | |||
| 1 | + | use pulldown_cmark::{CowStr, Event, Options, Parser, Tag, TagEnd, html}; | |
| 2 | + | ||
| 3 | + | use crate::sanitize::SanitizePreset; | |
| 4 | + | ||
| 5 | + | /// Returns true if the URL uses a scheme not in the safe allowlist. | |
| 6 | + | /// | |
| 7 | + | /// Safe schemes: `http`, `https`, `mailto`, `ftp`. Relative URLs (no scheme) are safe. | |
| 8 | + | fn has_dangerous_scheme(url: &str) -> bool { | |
| 9 | + | let trimmed = url.trim(); | |
| 10 | + | if let Some(colon_pos) = trimmed.find(':') { | |
| 11 | + | let before_colon = &trimmed[..colon_pos]; | |
| 12 | + | if before_colon.contains('/') | |
| 13 | + | || before_colon.contains('#') | |
| 14 | + | || before_colon.contains('?') | |
| 15 | + | { | |
| 16 | + | return false; | |
| 17 | + | } | |
| 18 | + | let scheme = before_colon.to_ascii_lowercase(); | |
| 19 | + | !matches!(scheme.as_str(), "http" | "https" | "mailto" | "ftp") | |
| 20 | + | } else { | |
| 21 | + | false | |
| 22 | + | } | |
| 23 | + | } | |
| 24 | + | ||
| 25 | + | /// Result of rendering markdown with metadata. | |
| 26 | + | #[derive(Debug, Clone)] | |
| 27 | + | pub struct RenderResult { | |
| 28 | + | pub html: String, | |
| 29 | + | pub word_count: u32, | |
| 30 | + | pub reading_time_minutes: u32, | |
| 31 | + | } | |
| 32 | + | ||
| 33 | + | /// Configurable markdown renderer with builder pattern. | |
| 34 | + | pub struct Renderer { | |
| 35 | + | tables: bool, | |
| 36 | + | strikethrough: bool, | |
| 37 | + | footnotes: bool, | |
| 38 | + | smart_punctuation: bool, | |
| 39 | + | tasklists: bool, | |
| 40 | + | strip_images: bool, | |
| 41 | + | strip_raw_html: bool, | |
| 42 | + | dangerous_scheme_filter: bool, | |
| 43 | + | sanitize: SanitizePreset, | |
| 44 | + | } | |
| 45 | + | ||
| 46 | + | impl Renderer { | |
| 47 | + | /// GFM features, default ammonia sanitization. Suitable for trusted content | |
| 48 | + | /// like docs and blog posts. | |
| 49 | + | pub fn permissive() -> Self { | |
| 50 | + | Self { | |
| 51 | + | tables: true, | |
| 52 | + | strikethrough: true, | |
| 53 | + | footnotes: true, | |
| 54 | + | smart_punctuation: true, | |
| 55 | + | tasklists: true, | |
| 56 | + | strip_images: false, | |
| 57 | + | strip_raw_html: false, | |
| 58 | + | dangerous_scheme_filter: false, | |
| 59 | + | sanitize: SanitizePreset::Permissive, | |
| 60 | + | } | |
| 61 | + | } | |
| 62 | + | ||
| 63 | + | /// GFM features, no images. Suitable for app text fields (descriptions, | |
| 64 | + | /// notes). | |
| 65 | + | pub fn standard() -> Self { | |
| 66 | + | Self { | |
| 67 | + | tables: true, | |
| 68 | + | strikethrough: true, | |
| 69 | + | footnotes: false, | |
| 70 | + | smart_punctuation: true, | |
| 71 | + | tasklists: true, | |
| 72 | + | strip_images: true, | |
| 73 | + | strip_raw_html: false, | |
| 74 | + | dangerous_scheme_filter: false, | |
| 75 | + | sanitize: SanitizePreset::Standard, | |
| 76 | + | } | |
| 77 | + | } | |
| 78 | + | ||
| 79 | + | /// No images, no raw HTML, dangerous scheme blocking, nofollow on links. | |
| 80 | + | /// Suitable for user-generated content (forum posts). | |
| 81 | + | pub fn strict() -> Self { | |
| 82 | + | Self { | |
| 83 | + | tables: false, | |
| 84 | + | strikethrough: false, | |
| 85 | + | footnotes: false, | |
| 86 | + | smart_punctuation: false, | |
| 87 | + | tasklists: false, | |
| 88 | + | strip_images: true, | |
| 89 | + | strip_raw_html: true, | |
| 90 | + | dangerous_scheme_filter: true, | |
| 91 | + | sanitize: SanitizePreset::Strict, | |
| 92 | + | } | |
| 93 | + | } | |
| 94 | + | ||
| 95 | + | /// No markdown parsing, just ammonia sanitization. Suitable for HTML from | |
| 96 | + | /// external sources (RSS feeds). | |
| 97 | + | pub fn sanitize_only() -> Self { | |
| 98 | + | Self { | |
| 99 | + | tables: false, | |
| 100 | + | strikethrough: false, | |
| 101 | + | footnotes: false, | |
| 102 | + | smart_punctuation: false, | |
| 103 | + | tasklists: false, | |
| 104 | + | strip_images: false, | |
| 105 | + | strip_raw_html: false, | |
| 106 | + | dangerous_scheme_filter: false, | |
| 107 | + | sanitize: SanitizePreset::Permissive, | |
| 108 | + | } | |
| 109 | + | } | |
| 110 | + | ||
| 111 | + | pub fn with_tables(mut self, enabled: bool) -> Self { | |
| 112 | + | self.tables = enabled; | |
| 113 | + | self | |
| 114 | + | } | |
| 115 | + | ||
| 116 | + | pub fn with_strikethrough(mut self, enabled: bool) -> Self { | |
| 117 | + | self.strikethrough = enabled; | |
| 118 | + | self | |
| 119 | + | } | |
| 120 | + | ||
| 121 | + | pub fn with_footnotes(mut self, enabled: bool) -> Self { | |
| 122 | + | self.footnotes = enabled; | |
| 123 | + | self | |
| 124 | + | } | |
| 125 | + | ||
| 126 | + | pub fn with_smart_punctuation(mut self, enabled: bool) -> Self { | |
| 127 | + | self.smart_punctuation = enabled; | |
| 128 | + | self | |
| 129 | + | } | |
| 130 | + | ||
| 131 | + | pub fn with_tasklists(mut self, enabled: bool) -> Self { | |
| 132 | + | self.tasklists = enabled; | |
| 133 | + | self | |
| 134 | + | } | |
| 135 | + | ||
| 136 | + | pub fn with_strip_images(mut self, enabled: bool) -> Self { | |
| 137 | + | self.strip_images = enabled; | |
| 138 | + | self | |
| 139 | + | } | |
| 140 | + | ||
| 141 | + | pub fn with_strip_raw_html(mut self, enabled: bool) -> Self { | |
| 142 | + | self.strip_raw_html = enabled; | |
| 143 | + | self | |
| 144 | + | } | |
| 145 | + | ||
| 146 | + | pub fn with_dangerous_scheme_filter(mut self, enabled: bool) -> Self { | |
| 147 | + | self.dangerous_scheme_filter = enabled; | |
| 148 | + | self | |
| 149 | + | } | |
| 150 | + | ||
| 151 | + | pub fn with_sanitize(mut self, preset: SanitizePreset) -> Self { | |
| 152 | + | self.sanitize = preset; | |
| 153 | + | self | |
| 154 | + | } | |
| 155 | + | ||
| 156 | + | fn build_options(&self) -> Options { | |
| 157 | + | let mut opts = Options::empty(); | |
| 158 | + | if self.tables { | |
| 159 | + | opts.insert(Options::ENABLE_TABLES); | |
| 160 | + | } | |
| 161 | + | if self.strikethrough { | |
| 162 | + | opts.insert(Options::ENABLE_STRIKETHROUGH); | |
| 163 | + | } | |
| 164 | + | if self.footnotes { | |
| 165 | + | opts.insert(Options::ENABLE_FOOTNOTES); | |
| 166 | + | } | |
| 167 | + | if self.smart_punctuation { | |
| 168 | + | opts.insert(Options::ENABLE_SMART_PUNCTUATION); | |
| 169 | + | } | |
| 170 | + | if self.tasklists { | |
| 171 | + | opts.insert(Options::ENABLE_TASKLISTS); | |
| 172 | + | } | |
| 173 | + | opts | |
| 174 | + | } | |
| 175 | + | ||
| 176 | + | /// Render markdown to sanitized HTML. | |
| 177 | + | pub fn render(&self, input: &str) -> String { | |
| 178 | + | if input.is_empty() { | |
| 179 | + | return String::new(); | |
| 180 | + | } | |
| 181 | + | let html_output = self.render_raw(input); | |
| 182 | + | self.sanitize.clean(&html_output) | |
| 183 | + | } | |
| 184 | + | ||
| 185 | + | /// Render markdown to sanitized HTML with metadata. | |
| 186 | + | pub fn render_with_meta(&self, input: &str) -> RenderResult { | |
| 187 | + | let html = self.render(input); | |
| 188 | + | let wc = crate::text::word_count(input); | |
| 189 | + | RenderResult { | |
| 190 | + | html, | |
| 191 | + | word_count: wc, | |
| 192 | + | reading_time_minutes: crate::text::reading_time_minutes(wc), | |
| 193 | + | } | |
| 194 | + | } | |
| 195 | + | ||
| 196 | + | /// Sanitize pre-rendered HTML without markdown parsing. | |
| 197 | + | pub fn sanitize_html(&self, html: &str) -> String { | |
| 198 | + | self.sanitize.clean(html) | |
| 199 | + | } | |
| 200 | + | ||
| 201 | + | fn render_raw(&self, input: &str) -> String { | |
| 202 | + | let options = self.build_options(); | |
| 203 | + | let parser = Parser::new_ext(input, options); | |
| 204 | + | ||
| 205 | + | let strip_images = self.strip_images; | |
| 206 | + | let strip_raw_html = self.strip_raw_html; | |
| 207 | + | let scheme_filter = self.dangerous_scheme_filter; | |
| 208 | + | ||
| 209 | + | let filtered = parser.filter_map(move |event| match event { | |
| 210 | + | // Strip raw HTML events | |
| 211 | + | Event::Html(_) | Event::InlineHtml(_) if strip_raw_html => None, | |
| 212 | + | // Neutralize dangerous schemes on links | |
| 213 | + | Event::Start(Tag::Link { | |
| 214 | + | link_type, | |
| 215 | + | dest_url, | |
| 216 | + | title, | |
| 217 | + | id, | |
| 218 | + | }) if scheme_filter && has_dangerous_scheme(&dest_url) => { | |
| 219 | + | Some(Event::Start(Tag::Link { | |
| 220 | + | link_type, | |
| 221 | + | dest_url: CowStr::Borrowed("#"), | |
| 222 | + | title, | |
| 223 | + | id, | |
| 224 | + | })) | |
| 225 | + | } | |
| 226 | + | // Strip images entirely (alt text passes through as plain text) | |
| 227 | + | Event::Start(Tag::Image { .. }) | Event::End(TagEnd::Image) if strip_images => None, | |
| 228 | + | other => Some(other), | |
| 229 | + | }); | |
| 230 | + | ||
| 231 | + | let mut output = String::new(); | |
| 232 | + | html::push_html(&mut output, filtered); | |
| 233 | + | output | |
| 234 | + | } | |
| 235 | + | } | |
| 236 | + | ||
| 237 | + | #[cfg(test)] | |
| 238 | + | mod tests { | |
| 239 | + | use super::*; | |
| 240 | + | ||
| 241 | + | // ===== has_dangerous_scheme ===== | |
| 242 | + | ||
| 243 | + | #[test] | |
| 244 | + | fn safe_schemes() { | |
| 245 | + | assert!(!has_dangerous_scheme("https://example.com")); | |
| 246 | + | assert!(!has_dangerous_scheme("http://example.com")); | |
| 247 | + | assert!(!has_dangerous_scheme("mailto:user@example.com")); | |
| 248 | + | assert!(!has_dangerous_scheme("ftp://files.example.com")); | |
| 249 | + | } | |
| 250 | + | ||
| 251 | + | #[test] | |
| 252 | + | fn dangerous_schemes() { | |
| 253 | + | assert!(has_dangerous_scheme("javascript:alert(1)")); | |
| 254 | + | assert!(has_dangerous_scheme("data:text/html,<script>")); | |
| 255 | + | assert!(has_dangerous_scheme("vbscript:msgbox")); | |
| 256 | + | } | |
| 257 | + | ||
| 258 | + | #[test] | |
| 259 | + | fn case_insensitive_schemes() { | |
| 260 | + | assert!(has_dangerous_scheme("JaVaScRiPt:alert(1)")); | |
| 261 | + | assert!(has_dangerous_scheme("DATA:text/html,x")); | |
| 262 | + | } | |
| 263 | + | ||
| 264 | + | #[test] | |
| 265 | + | fn relative_urls_are_safe() { | |
| 266 | + | assert!(!has_dangerous_scheme("/about")); | |
| 267 | + | assert!(!has_dangerous_scheme("#heading")); | |
| 268 | + | assert!(!has_dangerous_scheme("page.html")); | |
| 269 | + | assert!(!has_dangerous_scheme("path/to:file")); | |
| 270 | + | } | |
| 271 | + | ||
| 272 | + | // ===== Permissive preset ===== | |
| 273 | + | ||
| 274 | + | #[test] | |
| 275 | + | fn permissive_basic_markdown() { | |
| 276 | + | let r = Renderer::permissive(); | |
| 277 | + | let html = r.render("# Hello\n\nThis is a **test**."); | |
| 278 | + | assert!(html.contains("<h1>Hello</h1>")); | |
| 279 | + | assert!(html.contains("<strong>test</strong>")); | |
| 280 | + | } | |
| 281 | + | ||
| 282 | + | #[test] | |
| 283 | + | fn permissive_tables() { | |
| 284 | + | let r = Renderer::permissive(); | |
| 285 | + | let html = r.render("| A | B |\n|---|---|\n| 1 | 2 |"); | |
| 286 | + | assert!(html.contains("<table>")); | |
| 287 | + | assert!(html.contains("<td>")); | |
| 288 | + | } | |
| 289 | + | ||
| 290 | + | #[test] | |
| 291 | + | fn permissive_smart_punctuation() { | |
| 292 | + | let r = Renderer::permissive(); | |
| 293 | + | let html = r.render("It's a \"test\""); | |
| 294 | + | assert!( | |
| 295 | + | html.contains('\u{201c}') || html.contains('\u{201d}') || html.contains("\"") | |
| 296 | + | ); | |
| 297 | + | } | |
| 298 | + | ||
| 299 | + | #[test] | |
| 300 | + | fn permissive_strips_script() { | |
| 301 | + | let r = Renderer::permissive(); | |
| 302 | + | let html = r.render("Hello <script>alert('xss')</script> world"); | |
| 303 | + | assert!(!html.contains("<script>")); | |
| 304 | + | } | |
| 305 | + | ||
| 306 | + | #[test] | |
| 307 | + | fn permissive_keeps_images() { | |
| 308 | + | let r = Renderer::permissive(); | |
| 309 | + | let html = r.render(""); | |
| 310 | + | assert!(html.contains("<img")); | |
| 311 | + | } | |
| 312 | + | ||
| 313 | + | #[test] | |
| 314 | + | fn permissive_empty_input() { | |
| 315 | + | assert_eq!(Renderer::permissive().render(""), ""); | |
| 316 | + | } | |
| 317 | + | ||
| 318 | + | // ===== Standard preset ===== | |
| 319 | + | ||
| 320 | + | #[test] | |
| 321 | + | fn standard_strips_images() { | |
| 322 | + | let r = Renderer::standard(); | |
| 323 | + | let html = r.render(""); | |
| 324 | + | assert!(!html.contains("<img")); | |
| 325 | + | assert!(html.contains("alt text")); | |
| 326 | + | } | |
| 327 | + | ||
| 328 | + | #[test] | |
| 329 | + | fn standard_keeps_tables() { | |
| 330 | + | let r = Renderer::standard(); | |
| 331 | + | let html = r.render("| A |\n|---|\n| 1 |"); | |
| 332 | + | assert!(html.contains("<table>")); | |
| 333 | + | } | |
| 334 | + | ||
| 335 | + | // ===== Strict preset ===== | |
| 336 | + | ||
| 337 | + | #[test] | |
| 338 | + | fn strict_strips_raw_html() { | |
| 339 | + | let r = Renderer::strict(); | |
| 340 | + | let html = r.render("<script>alert('xss')</script>"); | |
| 341 | + | assert!(!html.contains("<script>")); | |
| 342 | + | assert!(!html.contains("</script>")); | |
| 343 | + | } | |
| 344 | + | ||
| 345 | + | #[test] | |
| 346 | + | fn strict_strips_inline_html() { | |
| 347 | + | let r = Renderer::strict(); | |
| 348 | + | let html = r.render("hello <b>bold</b> world"); | |
| 349 | + | assert!(!html.contains("<b>")); | |
| 350 | + | assert!(html.contains("hello")); | |
| 351 | + | assert!(html.contains("world")); | |
| 352 | + | } | |
| 353 | + | ||
| 354 | + | #[test] | |
| 355 | + | fn strict_strips_images() { | |
| 356 | + | let r = Renderer::strict(); | |
| 357 | + | let html = r.render(""); | |
| 358 | + | assert!(!html.contains("<img")); | |
| 359 | + | assert!(html.contains("alt text")); | |
| 360 | + | } | |
| 361 | + | ||
| 362 | + | #[test] | |
| 363 | + | fn strict_neutralizes_javascript_urls() { | |
| 364 | + | let r = Renderer::strict(); | |
| 365 | + | let html = r.render("[click me](javascript:alert(1))"); | |
| 366 | + | assert!(html.contains("click me")); | |
| 367 | + | assert!(!html.contains("javascript:")); | |
| 368 | + | assert!(html.contains(r##"href="#""##)); | |
| 369 | + | } | |
| 370 | + | ||
| 371 | + | #[test] | |
| 372 | + | fn strict_neutralizes_case_insensitive() { | |
| 373 | + | let r = Renderer::strict(); | |
| 374 | + | let html = r.render("[xss](JaVaScRiPt:alert(1))"); | |
| 375 | + | assert!(!html.contains("javascript:")); | |
| 376 | + | assert!(!html.contains("JaVaScRiPt:")); | |
| 377 | + | } | |
| 378 | + | ||
| 379 | + | #[test] | |
| 380 | + | fn strict_neutralizes_data_urls() { | |
| 381 | + | let r = Renderer::strict(); | |
| 382 | + | let html = r.render("[xss](data:text/html,<script>alert(1)</script>)"); | |
| 383 | + | assert!(!html.contains("data:text")); | |
| 384 | + | } | |
| 385 | + | ||
| 386 | + | #[test] | |
| 387 | + | fn strict_neutralizes_vbscript() { | |
| 388 | + | let r = Renderer::strict(); | |
| 389 | + | let html = r.render("[xss](vbscript:msgbox)"); | |
| 390 | + | assert!(!html.contains("vbscript:")); | |
| 391 | + | } | |
| 392 | + | ||
| 393 | + | #[test] | |
| 394 | + | fn strict_preserves_safe_urls() { | |
| 395 | + | let r = Renderer::strict(); | |
| 396 | + | let html = r.render("[link](https://example.com)"); | |
| 397 | + | assert!(html.contains(r#"href="https://example.com""#)); | |
| 398 | + | ||
| 399 | + | let html = r.render("[mail](mailto:user@example.com)"); | |
| 400 | + | assert!(html.contains(r#"href="mailto:user@example.com""#)); | |
| 401 | + | } | |
| 402 | + | ||
| 403 | + | #[test] | |
| 404 | + | fn strict_preserves_relative_urls() { | |
| 405 | + | let r = Renderer::strict(); | |
| 406 | + | let html = r.render("[page](/about)"); | |
| 407 | + | assert!(html.contains(r#"href="/about""#)); | |
| 408 | + | ||
| 409 | + | let html = r.render("[section](#heading)"); | |
| 410 | + | assert!(html.contains(r##"href="#heading""##)); | |
| 411 | + | } | |
| 412 | + | ||
| 413 | + | #[test] | |
| 414 | + | fn strict_links_have_nofollow() { | |
| 415 | + | let r = Renderer::strict(); | |
| 416 | + | let html = r.render("[example](https://example.com)"); | |
| 417 | + | assert!(result_has_rel(&html, "nofollow")); | |
| 418 | + | assert!(result_has_rel(&html, "noopener")); | |
| 419 | + | } | |
| 420 | + | ||
| 421 | + | #[test] | |
| 422 | + | fn strict_bold_italic() { | |
| 423 | + | let r = Renderer::strict(); | |
| 424 | + | let html = r.render("**bold** and *italic*"); | |
| 425 | + | assert!(html.contains("<strong>bold</strong>")); | |
| 426 | + | assert!(html.contains("<em>italic</em>")); | |
| 427 | + | } | |
| 428 | + | ||
| 429 | + | #[test] | |
| 430 | + | fn strict_inline_code() { | |
| 431 | + | let r = Renderer::strict(); | |
| 432 | + | let html = r.render("use `foo()` here"); | |
| 433 | + | assert!(html.contains("<code>foo()</code>")); | |
| 434 | + | } | |
| 435 | + | ||
| 436 | + | #[test] | |
| 437 | + | fn strict_code_block() { | |
| 438 | + | let r = Renderer::strict(); | |
| 439 | + | let html = r.render("```\nlet x = 1;\n```"); | |
| 440 | + | assert!(html.contains("<pre><code>")); | |
| 441 | + | assert!(html.contains("let x = 1;")); | |
| 442 | + | } | |
| 443 | + | ||
| 444 | + | #[test] | |
| 445 | + | fn strict_blockquote() { | |
| 446 | + | let r = Renderer::strict(); | |
| 447 | + | let html = r.render("> quoted text"); | |
| 448 | + | assert!(html.contains("<blockquote>")); | |
| 449 | + | assert!(html.contains("quoted text")); | |
| 450 | + | } | |
| 451 | + | ||
| 452 | + | #[test] | |
| 453 | + | fn strict_unordered_list() { | |
| 454 | + | let r = Renderer::strict(); | |
| 455 | + | let html = r.render("- item one\n- item two"); | |
| 456 | + | assert!(html.contains("<ul>")); | |
| 457 | + | assert!(html.contains("<li>item one</li>")); | |
| 458 | + | } | |
| 459 | + | ||
| 460 | + | #[test] | |
| 461 | + | fn strict_heading() { | |
| 462 | + | let r = Renderer::strict(); | |
| 463 | + | let html = r.render("## Section Title"); | |
| 464 | + | assert!(html.contains("<h2>Section Title</h2>")); | |
| 465 | + | } | |
| 466 | + | ||
| 467 | + | #[test] | |
| 468 | + | fn strict_plain_text() { | |
| 469 | + | let r = Renderer::strict(); | |
| 470 | + | assert_eq!(r.render("hello world"), "<p>hello world</p>\n"); | |
| 471 | + | } | |
| 472 | + | ||
| 473 | + | #[test] | |
| 474 | + | fn strict_empty_input() { | |
| 475 | + | assert_eq!(Renderer::strict().render(""), ""); | |
| 476 | + | } | |
| 477 | + | ||
| 478 | + | // ===== Sanitize-only preset ===== | |
| 479 | + | ||
| 480 | + | #[test] | |
| 481 | + | fn sanitize_only_cleans_html() { | |
| 482 | + | let r = Renderer::sanitize_only(); | |
| 483 | + | let html = r.sanitize_html("<p>Hello</p><script>bad</script>"); | |
| 484 | + | assert!(html.contains("<p>Hello</p>")); | |
| 485 | + | assert!(!html.contains("<script>")); | |
| 486 | + | } | |
| 487 | + | ||
| 488 | + | // ===== Builder methods ===== | |
| 489 | + | ||
| 490 | + | #[test] | |
| 491 | + | fn builder_override() { | |
| 492 | + | let r = Renderer::strict().with_strip_images(false); | |
| 493 | + | let html = r.render(""); | |
| 494 | + | assert!(html.contains("<img")); | |
| 495 | + | } | |
| 496 | + | ||
| 497 | + | // ===== render_with_meta ===== | |
| 498 | + | ||
| 499 | + | #[test] | |
| 500 | + | fn render_with_meta_includes_counts() { |
Lines truncated
| @@ -0,0 +1,79 @@ | |||
| 1 | + | /// Sanitization preset for ammonia HTML cleaning. | |
| 2 | + | #[derive(Debug, Clone, Copy, PartialEq, Eq)] | |
| 3 | + | pub enum SanitizePreset { | |
| 4 | + | /// Default ammonia settings. Allows most safe HTML. | |
| 5 | + | Permissive, | |
| 6 | + | /// Default ammonia settings. Same as Permissive. | |
| 7 | + | Standard, | |
| 8 | + | /// Adds `rel="noopener noreferrer nofollow"` to all links. | |
| 9 | + | Strict, | |
| 10 | + | /// Only allows p, em, strong, code, br, pre tags. | |
| 11 | + | Minimal, | |
| 12 | + | } | |
| 13 | + | ||
| 14 | + | impl SanitizePreset { | |
| 15 | + | pub(crate) fn clean(&self, html: &str) -> String { | |
| 16 | + | match self { | |
| 17 | + | SanitizePreset::Permissive | SanitizePreset::Standard => ammonia::clean(html), | |
| 18 | + | SanitizePreset::Strict => ammonia::Builder::default() | |
| 19 | + | .link_rel(Some("noopener noreferrer nofollow")) | |
| 20 | + | .clean(html) | |
| 21 | + | .to_string(), | |
| 22 | + | SanitizePreset::Minimal => { | |
| 23 | + | let tags: std::collections::HashSet<&str> = | |
| 24 | + | ["p", "em", "strong", "code", "br", "pre"] | |
| 25 | + | .iter() | |
| 26 | + | .copied() | |
| 27 | + | .collect(); | |
| 28 | + | ammonia::Builder::default() | |
| 29 | + | .tags(tags) | |
| 30 | + | .clean(html) | |
| 31 | + | .to_string() | |
| 32 | + | } | |
| 33 | + | } | |
| 34 | + | } | |
| 35 | + | } | |
| 36 | + | ||
| 37 | + | #[cfg(test)] | |
| 38 | + | mod tests { | |
| 39 | + | use super::*; | |
| 40 | + | ||
| 41 | + | #[test] | |
| 42 | + | fn permissive_allows_safe_html() { | |
| 43 | + | let html = "<p>Hello <strong>world</strong></p>"; | |
| 44 | + | let result = SanitizePreset::Permissive.clean(html); | |
| 45 | + | assert!(result.contains("<strong>world</strong>")); | |
| 46 | + | } | |
| 47 | + | ||
| 48 | + | #[test] | |
| 49 | + | fn permissive_strips_script() { | |
| 50 | + | let html = "<p>Hello</p><script>alert(1)</script>"; | |
| 51 | + | let result = SanitizePreset::Permissive.clean(html); | |
| 52 | + | assert!(!result.contains("<script>")); | |
| 53 | + | } | |
| 54 | + | ||
| 55 | + | #[test] | |
| 56 | + | fn strict_adds_nofollow() { | |
| 57 | + | let html = r#"<a href="https://example.com">link</a>"#; | |
| 58 | + | let result = SanitizePreset::Strict.clean(html); | |
| 59 | + | assert!(result.contains("nofollow")); | |
| 60 | + | assert!(result.contains("noopener")); | |
| 61 | + | } | |
| 62 | + | ||
| 63 | + | #[test] | |
| 64 | + | fn minimal_strips_extra_tags() { | |
| 65 | + | let html = r#"<p>Hello <a href="x">link</a> <em>italic</em></p>"#; | |
| 66 | + | let result = SanitizePreset::Minimal.clean(html); | |
| 67 | + | assert!(result.contains("<em>italic</em>")); | |
| 68 | + | assert!(!result.contains("<a")); | |
| 69 | + | assert!(result.contains("link")); | |
| 70 | + | } | |
| 71 | + | ||
| 72 | + | #[test] | |
| 73 | + | fn minimal_keeps_code() { | |
| 74 | + | let html = "<pre><code>fn main()</code></pre>"; | |
| 75 | + | let result = SanitizePreset::Minimal.clean(html); | |
| 76 | + | assert!(result.contains("<code>")); | |
| 77 | + | assert!(result.contains("<pre>")); | |
| 78 | + | } | |
| 79 | + | } |
| @@ -0,0 +1,131 @@ | |||
| 1 | + | /// Calculate word count from text content. | |
| 2 | + | pub fn word_count(text: &str) -> u32 { | |
| 3 | + | text.split_whitespace().count() as u32 | |
| 4 | + | } | |
| 5 | + | ||
| 6 | + | /// Calculate estimated reading time in minutes. | |
| 7 | + | /// Assumes average reading speed of 200 words per minute. | |
| 8 | + | pub fn reading_time_minutes(word_count: u32) -> u32 { | |
| 9 | + | const WORDS_PER_MINUTE: u32 = 200; | |
| 10 | + | word_count.div_ceil(WORDS_PER_MINUTE) | |
| 11 | + | } | |
| 12 | + | ||
| 13 | + | /// Extract the title from the first `# Heading` line in markdown. | |
| 14 | + | pub fn extract_title(markdown: &str) -> Option<String> { | |
| 15 | + | for line in markdown.lines() { | |
| 16 | + | let trimmed = line.trim(); | |
| 17 | + | if let Some(title) = trimmed.strip_prefix("# ") { | |
| 18 | + | return Some(title.trim().to_string()); | |
| 19 | + | } | |
| 20 | + | if !trimmed.is_empty() && trimmed != "---" { | |
| 21 | + | break; | |
| 22 | + | } | |
| 23 | + | } | |
| 24 | + | None | |
| 25 | + | } | |
| 26 | + | ||
| 27 | + | /// Strip the first `# Heading` line so templates can render it separately. | |
| 28 | + | pub fn strip_first_heading(markdown: &str) -> String { | |
| 29 | + | let mut found = false; | |
| 30 | + | markdown | |
| 31 | + | .lines() | |
| 32 | + | .filter(|line| { | |
| 33 | + | if !found { | |
| 34 | + | let t = line.trim(); | |
| 35 | + | if t.starts_with("# ") && !t.starts_with("## ") { | |
| 36 | + | found = true; | |
| 37 | + | return false; | |
| 38 | + | } | |
| 39 | + | } | |
| 40 | + | true | |
| 41 | + | }) | |
| 42 | + | .collect::<Vec<_>>() | |
| 43 | + | .join("\n") | |
| 44 | + | } | |
| 45 | + | ||
| 46 | + | #[cfg(test)] | |
| 47 | + | mod tests { | |
| 48 | + | use super::*; | |
| 49 | + | ||
| 50 | + | #[test] | |
| 51 | + | fn word_count_basic() { | |
| 52 | + | assert_eq!(word_count("Hello world"), 2); | |
| 53 | + | assert_eq!(word_count("One two three four five"), 5); | |
| 54 | + | assert_eq!(word_count(""), 0); | |
| 55 | + | } | |
| 56 | + | ||
| 57 | + | #[test] | |
| 58 | + | fn word_count_whitespace() { | |
| 59 | + | assert_eq!(word_count(" spaced out "), 2); | |
| 60 | + | assert_eq!(word_count("\ttabbed\nlines"), 2); | |
| 61 | + | } | |
| 62 | + | ||
| 63 | + | #[test] | |
| 64 | + | fn reading_time_basic() { | |
| 65 | + | assert_eq!(reading_time_minutes(200), 1); | |
| 66 | + | assert_eq!(reading_time_minutes(400), 2); | |
| 67 | + | assert_eq!(reading_time_minutes(250), 2); | |
| 68 | + | assert_eq!(reading_time_minutes(0), 0); | |
| 69 | + | } | |
| 70 | + | ||
| 71 | + | #[test] | |
| 72 | + | fn reading_time_rounds_up() { | |
| 73 | + | assert_eq!(reading_time_minutes(1), 1); | |
| 74 | + | assert_eq!(reading_time_minutes(201), 2); | |
| 75 | + | } | |
| 76 | + | ||
| 77 | + | #[test] | |
| 78 | + | fn extract_title_basic() { | |
| 79 | + | assert_eq!( | |
| 80 | + | extract_title("# Hello World\n\nBody"), | |
| 81 | + | Some("Hello World".to_string()) | |
| 82 | + | ); | |
| 83 | + | } | |
| 84 | + | ||
| 85 | + | #[test] | |
| 86 | + | fn extract_title_with_leading_blank_lines() { | |
| 87 | + | assert_eq!(extract_title("\n# Title\n"), Some("Title".to_string())); | |
| 88 | + | } | |
| 89 | + | ||
| 90 | + | #[test] | |
| 91 | + | fn extract_title_none_when_missing() { | |
| 92 | + | assert_eq!(extract_title("No heading here"), None); | |
| 93 | + | } | |
| 94 | + | ||
| 95 | + | #[test] | |
| 96 | + | fn extract_title_ignores_h2() { | |
| 97 | + | assert_eq!(extract_title("## Not H1"), None); | |
| 98 | + | } | |
| 99 | + | ||
| 100 | + | #[test] | |
| 101 | + | fn extract_title_skips_horizontal_rules() { | |
| 102 | + | assert_eq!( | |
| 103 | + | extract_title("---\n# After Rule"), | |
| 104 | + | Some("After Rule".to_string()) | |
| 105 | + | ); | |
| 106 | + | } | |
| 107 | + | ||
| 108 | + | #[test] | |
| 109 | + | fn strip_first_heading_removes_h1() { | |
| 110 | + | let md = "# Title\n\nBody text\n## Subheading"; | |
| 111 | + | let stripped = strip_first_heading(md); | |
| 112 | + | assert!(!stripped.contains("# Title")); | |
| 113 | + | assert!(stripped.contains("Body text")); | |
| 114 | + | assert!(stripped.contains("## Subheading")); | |
| 115 | + | } | |
| 116 | + | ||
| 117 | + | #[test] | |
| 118 | + | fn strip_first_heading_only_removes_first() { | |
| 119 | + | let md = "# First\n\n# Second"; | |
| 120 | + | let stripped = strip_first_heading(md); | |
| 121 | + | assert!(!stripped.contains("# First")); | |
| 122 | + | assert!(stripped.contains("# Second")); | |
| 123 | + | } | |
| 124 | + | ||
| 125 | + | #[test] | |
| 126 | + | fn strip_first_heading_no_h1() { | |
| 127 | + | let md = "## Only H2\n\nBody"; | |
| 128 | + | let stripped = strip_first_heading(md); | |
| 129 | + | assert_eq!(stripped, md); | |
| 130 | + | } | |
| 131 | + | } |
| @@ -0,0 +1,180 @@ | |||
| 1 | + | use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd}; | |
| 2 | + | ||
| 3 | + | /// A single entry in a table of contents. | |
| 4 | + | #[derive(Debug, Clone, PartialEq, Eq)] | |
| 5 | + | pub struct TocEntry { | |
| 6 | + | pub level: u8, | |
| 7 | + | pub text: String, | |
| 8 | + | pub anchor: String, | |
| 9 | + | } | |
| 10 | + | ||
| 11 | + | /// Extract a table of contents from markdown headings. | |
| 12 | + | pub fn extract_toc(markdown: &str) -> Vec<TocEntry> { | |
| 13 | + | let mut options = Options::empty(); | |
| 14 | + | options.insert(Options::ENABLE_TABLES); | |
| 15 | + | options.insert(Options::ENABLE_STRIKETHROUGH); | |
| 16 | + | ||
| 17 | + | let parser = Parser::new_ext(markdown, options); | |
| 18 | + | let mut entries = Vec::new(); | |
| 19 | + | let mut in_heading: Option<u8> = None; | |
| 20 | + | let mut heading_text = String::new(); | |
| 21 | + | ||
| 22 | + | for event in parser { | |
| 23 | + | match event { | |
| 24 | + | Event::Start(Tag::Heading { level, .. }) => { | |
| 25 | + | in_heading = Some(level as u8); | |
| 26 | + | heading_text.clear(); | |
| 27 | + | } | |
| 28 | + | Event::Text(text) if in_heading.is_some() => { | |
| 29 | + | heading_text.push_str(&text); | |
| 30 | + | } | |
| 31 | + | Event::Code(code) if in_heading.is_some() => { | |
| 32 | + | heading_text.push_str(&code); | |
| 33 | + | } | |
| 34 | + | Event::End(TagEnd::Heading(_)) => { | |
| 35 | + | if let Some(level) = in_heading.take() { | |
| 36 | + | let anchor = make_anchor(&heading_text); | |
| 37 | + | entries.push(TocEntry { | |
| 38 | + | level, | |
| 39 | + | text: heading_text.clone(), | |
| 40 | + | anchor, | |
| 41 | + | }); | |
| 42 | + | } | |
| 43 | + | } | |
| 44 | + | _ => {} | |
| 45 | + | } | |
| 46 | + | } | |
| 47 | + | entries | |
| 48 | + | } | |
| 49 | + | ||
| 50 | + | /// Render TOC entries as an HTML nested list. | |
| 51 | + | pub fn render_toc_html(entries: &[TocEntry]) -> String { | |
| 52 | + | if entries.is_empty() { | |
| 53 | + | return String::new(); | |
| 54 | + | } | |
| 55 | + | let mut html = String::from("<nav class=\"toc\"><ul>\n"); | |
| 56 | + | for entry in entries { | |
| 57 | + | html.push_str(&format!( | |
| 58 | + | "<li class=\"toc-h{}\"><a href=\"#{}\">{}</a></li>\n", | |
| 59 | + | entry.level, | |
| 60 | + | html_escape_attr(&entry.anchor), | |
| 61 | + | html_escape(&entry.text), | |
| 62 | + | )); | |
| 63 | + | } | |
| 64 | + | html.push_str("</ul></nav>"); | |
| 65 | + | html | |
| 66 | + | } | |
| 67 | + | ||
| 68 | + | /// GitHub-style anchor generation: lowercase, spaces to hyphens, strip | |
| 69 | + | /// non-alphanumeric (except hyphens). | |
| 70 | + | fn make_anchor(text: &str) -> String { | |
| 71 | + | text.to_lowercase() | |
| 72 | + | .chars() | |
| 73 | + | .map(|c| if c == ' ' { '-' } else { c }) | |
| 74 | + | .filter(|c| c.is_alphanumeric() || *c == '-') | |
| 75 | + | .collect() | |
| 76 | + | } | |
| 77 | + | ||
| 78 | + | fn html_escape(s: &str) -> String { | |
| 79 | + | s.replace('&', "&") | |
| 80 | + | .replace('<', "<") | |
| 81 | + | .replace('>', ">") | |
| 82 | + | } | |
| 83 | + | ||
| 84 | + | fn html_escape_attr(s: &str) -> String { | |
| 85 | + | s.replace('&', "&") | |
| 86 | + | .replace('"', """) | |
| 87 | + | .replace('<', "<") | |
| 88 | + | .replace('>', ">") | |
| 89 | + | } | |
| 90 | + | ||
| 91 | + | #[cfg(test)] | |
| 92 | + | mod tests { | |
| 93 | + | use super::*; | |
| 94 | + | ||
| 95 | + | #[test] | |
| 96 | + | fn extract_basic_headings() { | |
| 97 | + | let md = "# Title\n\n## Section One\n\nBody\n\n## Section Two\n\nMore"; | |
| 98 | + | let toc = extract_toc(md); | |
| 99 | + | assert_eq!(toc.len(), 3); | |
| 100 | + | assert_eq!(toc[0].level, 1); | |
| 101 | + | assert_eq!(toc[0].text, "Title"); | |
| 102 | + | assert_eq!(toc[0].anchor, "title"); | |
| 103 | + | assert_eq!(toc[1].level, 2); | |
| 104 | + | assert_eq!(toc[1].text, "Section One"); | |
| 105 | + | assert_eq!(toc[1].anchor, "section-one"); | |
| 106 | + | assert_eq!(toc[2].level, 2); | |
| 107 | + | assert_eq!(toc[2].text, "Section Two"); | |
| 108 | + | } | |
| 109 | + | ||
| 110 | + | #[test] | |
| 111 | + | fn anchor_strips_special_chars() { | |
| 112 | + | assert_eq!(make_anchor("Hello, World!"), "hello-world"); | |
| 113 | + | assert_eq!(make_anchor("C++ & Rust"), "c--rust"); | |
| 114 | + | assert_eq!(make_anchor("Version 2.0"), "version-20"); | |
| 115 | + | } | |
| 116 | + | ||
| 117 | + | #[test] | |
| 118 | + | fn extract_empty() { | |
| 119 | + | let toc = extract_toc("No headings here, just text."); | |
| 120 | + | assert!(toc.is_empty()); | |
| 121 | + | } | |
| 122 | + | ||
| 123 | + | #[test] | |
| 124 | + | fn extract_nested_levels() { | |
| 125 | + | let md = "# H1\n## H2\n### H3\n#### H4"; | |
| 126 | + | let toc = extract_toc(md); | |
| 127 | + | assert_eq!(toc.len(), 4); | |
| 128 | + | assert_eq!(toc[0].level, 1); | |
| 129 | + | assert_eq!(toc[1].level, 2); | |
| 130 | + | assert_eq!(toc[2].level, 3); | |
| 131 | + | assert_eq!(toc[3].level, 4); | |
| 132 | + | } | |
| 133 | + | ||
| 134 | + | #[test] | |
| 135 | + | fn heading_with_inline_code() { | |
| 136 | + | let md = "## Using `render()` function"; | |
| 137 | + | let toc = extract_toc(md); | |
| 138 | + | assert_eq!(toc.len(), 1); | |
| 139 | + | assert_eq!(toc[0].text, "Using render() function"); | |
| 140 | + | assert_eq!(toc[0].anchor, "using-render-function"); | |
| 141 | + | } | |
| 142 | + | ||
| 143 | + | #[test] | |
| 144 | + | fn render_toc_html_basic() { | |
| 145 | + | let entries = vec![ | |
| 146 | + | TocEntry { | |
| 147 | + | level: 1, | |
| 148 | + | text: "Title".to_string(), | |
| 149 | + | anchor: "title".to_string(), | |
| 150 | + | }, | |
| 151 | + | TocEntry { | |
| 152 | + | level: 2, | |
| 153 | + | text: "Section".to_string(), | |
| 154 | + | anchor: "section".to_string(), | |
| 155 | + | }, | |
| 156 | + | ]; | |
| 157 | + | let html = render_toc_html(&entries); | |
| 158 | + | assert!(html.contains("<nav class=\"toc\">")); | |
| 159 | + | assert!(html.contains("toc-h1")); | |
| 160 | + | assert!(html.contains("toc-h2")); | |
| 161 | + | assert!(html.contains(r##"href="#title""##)); | |
| 162 | + | assert!(html.contains(r##"href="#section""##)); | |
| 163 | + | } | |
| 164 | + | ||
| 165 | + | #[test] | |
| 166 | + | fn render_toc_empty() { | |
| 167 | + | assert_eq!(render_toc_html(&[]), ""); | |
| 168 | + | } | |
| 169 | + | ||
| 170 | + | #[test] | |
| 171 | + | fn toc_escapes_html_in_text() { | |
| 172 | + | let entries = vec![TocEntry { | |
| 173 | + | level: 2, | |
| 174 | + | text: "A & B <C>".to_string(), | |
| 175 | + | anchor: "a--b-c".to_string(), | |
| 176 | + | }]; | |
| 177 | + | let html = render_toc_html(&entries); | |
| 178 | + | assert!(html.contains("A & B <C>")); | |
| 179 | + | } | |
| 180 | + | } |