max / docengine
14 files changed,
+1203 insertions,
-38 deletions
| @@ -92,6 +92,7 @@ dependencies = [ | |||
| 92 | 92 | "regex-lite", | |
| 93 | 93 | "serde", | |
| 94 | 94 | "toml", | |
| 95 | + | "tracing", | |
| 95 | 96 | "uuid", | |
| 96 | 97 | ] | |
| 97 | 98 | ||
| @@ -498,6 +499,12 @@ dependencies = [ | |||
| 498 | 499 | ] | |
| 499 | 500 | ||
| 500 | 501 | [[package]] | |
| 502 | + | name = "pin-project-lite" | |
| 503 | + | version = "0.2.17" | |
| 504 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 505 | + | checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" | |
| 506 | + | ||
| 507 | + | [[package]] | |
| 501 | 508 | name = "potential_utf" | |
| 502 | 509 | version = "0.1.4" | |
| 503 | 510 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| @@ -822,6 +829,37 @@ source = "registry+https://github.com/rust-lang/crates.io-index" | |||
| 822 | 829 | checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801" | |
| 823 | 830 | ||
| 824 | 831 | [[package]] | |
| 832 | + | name = "tracing" | |
| 833 | + | version = "0.1.44" | |
| 834 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 835 | + | checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" | |
| 836 | + | dependencies = [ | |
| 837 | + | "pin-project-lite", | |
| 838 | + | "tracing-attributes", | |
| 839 | + | "tracing-core", | |
| 840 | + | ] | |
| 841 | + | ||
| 842 | + | [[package]] | |
| 843 | + | name = "tracing-attributes" | |
| 844 | + | version = "0.1.31" | |
| 845 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 846 | + | checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" | |
| 847 | + | dependencies = [ | |
| 848 | + | "proc-macro2", | |
| 849 | + | "quote", | |
| 850 | + | "syn", | |
| 851 | + | ] | |
| 852 | + | ||
| 853 | + | [[package]] | |
| 854 | + | name = "tracing-core" | |
| 855 | + | version = "0.1.36" | |
| 856 | + | source = "registry+https://github.com/rust-lang/crates.io-index" | |
| 857 | + | checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a" | |
| 858 | + | dependencies = [ | |
| 859 | + | "once_cell", | |
| 860 | + | ] | |
| 861 | + | ||
| 862 | + | [[package]] | |
| 825 | 863 | name = "unicase" | |
| 826 | 864 | version = "2.9.0" | |
| 827 | 865 | source = "registry+https://github.com/rust-lang/crates.io-index" |
| @@ -5,11 +5,13 @@ edition = "2021" | |||
| 5 | 5 | ||
| 6 | 6 | [features] | |
| 7 | 7 | default = [] | |
| 8 | - | doc-loader = ["dep:regex"] | |
| 8 | + | doc-loader = ["dep:regex", "dep:tracing"] | |
| 9 | + | directives = ["dep:regex-lite"] | |
| 9 | 10 | mentions = ["dep:regex-lite"] | |
| 10 | 11 | quotes = ["dep:regex-lite", "dep:uuid"] | |
| 11 | - | frontmatter = ["dep:toml"] | |
| 12 | - | full = ["doc-loader", "mentions", "quotes", "frontmatter"] | |
| 12 | + | frontmatter = ["dep:toml", "dep:tracing"] | |
| 13 | + | media-urls = ["dep:regex-lite"] | |
| 14 | + | full = ["doc-loader", "directives", "mentions", "quotes", "frontmatter", "media-urls"] | |
| 13 | 15 | ||
| 14 | 16 | [dependencies] | |
| 15 | 17 | pulldown-cmark = "0.12" | |
| @@ -20,3 +22,4 @@ regex = { version = "1", optional = true } | |||
| 20 | 22 | regex-lite = { version = "0.1", optional = true } | |
| 21 | 23 | uuid = { version = "1", features = ["serde", "v4"], optional = true } | |
| 22 | 24 | toml = { version = "0.8", optional = true } | |
| 25 | + | tracing = { version = "0.1", optional = true } |
| @@ -0,0 +1,119 @@ | |||
| 1 | + | # DocEngine | |
| 2 | + | ||
| 3 | + | Configurable markdown-to-HTML rendering library with sanitization presets. Built on pulldown-cmark (GFM) and ammonia. | |
| 4 | + | ||
| 5 | + | Used by MNW (site docs, blog posts, user-generated content), Multithreaded (forum posts), and the desktop apps (descriptions, notes). | |
| 6 | + | ||
| 7 | + | ## Presets | |
| 8 | + | ||
| 9 | + | Four rendering presets, each with different security/feature tradeoffs: | |
| 10 | + | ||
| 11 | + | | Preset | Use case | Tables | Images | Raw HTML | Dangerous scheme filter | Sanitization | | |
| 12 | + | |--------|----------|:------:|:------:|:--------:|:-----------------------:|--------------| | |
| 13 | + | | **Permissive** | Docs, blog posts (trusted) | Y | Y | Y | N | Default ammonia | | |
| 14 | + | | **Standard** | App text fields (descriptions) | Y | N | Y | N | Default ammonia | | |
| 15 | + | | **Strict** | User-generated content (forums) | N | N | N | Y | nofollow on links | | |
| 16 | + | | **Sanitize-only** | External HTML (RSS feeds) | -- | -- | -- | -- | Default ammonia, no markdown parsing | | |
| 17 | + | ||
| 18 | + | ```rust | |
| 19 | + | use docengine::{render_permissive, render_standard, render_strict, sanitize_html}; | |
| 20 | + | ||
| 21 | + | // Convenience functions | |
| 22 | + | let html = render_permissive("# Hello\n\n**Bold** text"); | |
| 23 | + | let html = render_standard("A description with [link](https://example.com)"); | |
| 24 | + | let html = render_strict("User post with @mentions and `code`"); | |
| 25 | + | let html = sanitize_html("<p>Pre-rendered</p><script>stripped</script>"); | |
| 26 | + | ||
| 27 | + | // Builder pattern for custom configurations | |
| 28 | + | use docengine::{Renderer, SanitizePreset}; | |
| 29 | + | ||
| 30 | + | let html = Renderer::permissive() | |
| 31 | + | .with_strip_images(true) // override: strip images even in permissive | |
| 32 | + | .with_footnotes(false) | |
| 33 | + | .render("# Custom config"); | |
| 34 | + | ||
| 35 | + | // Render with metadata (word count, reading time) | |
| 36 | + | let result = Renderer::standard().render_with_meta("Some article text..."); | |
| 37 | + | println!("{} words, ~{} min read", result.word_count, result.reading_time_minutes); | |
| 38 | + | ``` | |
| 39 | + | ||
| 40 | + | ## Feature Flags | |
| 41 | + | ||
| 42 | + | All optional features are off by default. Enable what you need: | |
| 43 | + | ||
| 44 | + | | Flag | Dependencies | Provides | | |
| 45 | + | |------|-------------|----------| | |
| 46 | + | | `doc-loader` | regex | `DocLoader` -- load a directory of `.md` files into an in-memory page store | | |
| 47 | + | | `directives` | regex-lite | `post_process_directives` -- `[!NOTE]`/`[!TIP]`/`[!TABS]` blockquote alerts and code tabs | | |
| 48 | + | | `frontmatter` | toml | `parse_frontmatter` -- extract TOML frontmatter delimited by `+++` | | |
| 49 | + | | `mentions` | regex-lite | `extract_mentions`, `resolve_mentions` -- `@username` parsing and linking | | |
| 50 | + | | `quotes` | regex-lite, uuid | `post_process_quotes` -- replace `[quote:POST_ID:HASH]` markers with author attribution | | |
| 51 | + | | `media-urls` | regex-lite | `rewrite_media_paths`, `img_to_video` -- CDN path rewriting and video tag conversion | | |
| 52 | + | | `full` | all of the above | Enable everything | | |
| 53 | + | ||
| 54 | + | ```toml | |
| 55 | + | # In Cargo.toml | |
| 56 | + | docengine = { path = "../Shared/docengine" } # Core only | |
| 57 | + | docengine = { path = "../Shared/docengine", features = ["full"] } # Everything | |
| 58 | + | ``` | |
| 59 | + | ||
| 60 | + | ## Core API | |
| 61 | + | ||
| 62 | + | ### Types | |
| 63 | + | ||
| 64 | + | - **`Renderer`** -- configurable markdown renderer with builder pattern | |
| 65 | + | - **`RenderResult`** -- rendered HTML plus `word_count` and `reading_time_minutes` | |
| 66 | + | - **`SanitizePreset`** -- `Permissive`, `Standard`, `Strict`, `Minimal` | |
| 67 | + | - **`TocEntry`** -- heading level, text, and anchor for table of contents | |
| 68 | + | ||
| 69 | + | ### Functions | |
| 70 | + | ||
| 71 | + | | Function | Description | | |
| 72 | + | |----------|-------------| | |
| 73 | + | | `render_permissive(md)` | Render with full GFM features | | |
| 74 | + | | `render_standard(md)` | Render without images | | |
| 75 | + | | `render_strict(md)` | Render with all restrictions (UGC-safe) | | |
| 76 | + | | `sanitize_html(html)` | Clean pre-rendered HTML without markdown parsing | | |
| 77 | + | | `word_count(text)` | Count words in raw text | | |
| 78 | + | | `reading_time_minutes(wc)` | Estimate reading time (200 wpm) | | |
| 79 | + | | `extract_title(md)` | Pull the first `# Heading` from markdown | | |
| 80 | + | | `strip_first_heading(md)` | Remove the first `# Heading` (for template-rendered titles) | | |
| 81 | + | | `extract_toc(md)` | Build a `Vec<TocEntry>` from all headings | | |
| 82 | + | | `render_toc_html(entries)` | Render TOC entries as a `<nav class="toc">` HTML list | | |
| 83 | + | ||
| 84 | + | ### Feature-gated | |
| 85 | + | ||
| 86 | + | | Function / Type | Feature | Description | | |
| 87 | + | |-----------------|---------|-------------| | |
| 88 | + | | `DocLoader::load(path, config)` | `doc-loader` | Load `.md` files from disk, render to HTML, build searchable index | | |
| 89 | + | | `DocPage`, `DocIndexEntry` | `doc-loader` | Page and index entry types | | |
| 90 | + | | `post_process_directives(html)` | `directives` | Convert `[!NOTE]`/`[!TIP]`/etc. blockquotes to alert divs, `[!TABS]` to tabbed code blocks | | |
| 91 | + | | `parse_frontmatter(input)` | `frontmatter` | Parse `+++`-delimited TOML frontmatter | | |
| 92 | + | | `Frontmatter` | `frontmatter` | Struct with `title`, `date`, `tags`, `section`, `draft`, `extra` | | |
| 93 | + | | `extract_mentions(md)` | `mentions` | Find unique `@username` mentions (skips code blocks) | | |
| 94 | + | | `resolve_mentions(md, valid, template)` | `mentions` | Replace `@user` with `[@user](/path/to/user)` for known usernames | | |
| 95 | + | | `post_process_quotes(html, authors)` | `quotes` | Replace `[quote:UUID:HASH]` with clickable attribution | | |
| 96 | + | | `rewrite_media_paths(md, base, user)` | `media-urls` | Rewrite relative image paths to absolute CDN URLs | | |
| 97 | + | | `img_to_video(html)` | `media-urls` | Convert `<img>` tags pointing to video files into `<video>` elements | | |
| 98 | + | ||
| 99 | + | ## Consumers | |
| 100 | + | ||
| 101 | + | | Project | Features used | Preset | | |
| 102 | + | |---------|--------------|--------| | |
| 103 | + | | MNW | `doc-loader`, `directives`, `frontmatter`, `media-urls` | Permissive (docs/blog), Standard (descriptions) | | |
| 104 | + | | Multithreaded | `mentions`, `quotes` | Strict (forum posts) | | |
| 105 | + | | GoingsOn | core only | Standard (notes, descriptions) | | |
| 106 | + | | Balanced Breakfast | core only | Sanitize-only (RSS feed content) | | |
| 107 | + | ||
| 108 | + | ## Security | |
| 109 | + | ||
| 110 | + | All presets sanitize output through ammonia. The strict preset additionally: | |
| 111 | + | - Strips all raw HTML and images at the parser level (before ammonia) | |
| 112 | + | - Replaces `javascript:`, `data:`, `vbscript:` URLs with `#` | |
| 113 | + | - Adds `rel="noopener noreferrer nofollow"` to all links | |
| 114 | + | ||
| 115 | + | Zero unsafe code. | |
| 116 | + | ||
| 117 | + | ## License | |
| 118 | + | ||
| 119 | + | PolyForm Noncommercial 1.0.0 |
| @@ -0,0 +1,80 @@ | |||
| 1 | + | # DocEngine Architecture | |
| 2 | + | ||
| 3 | + | ## Overview | |
| 4 | + | ||
| 5 | + | DocEngine is a markdown rendering library that wraps pulldown-cmark (parsing) and ammonia (sanitization) behind a preset system. Each preset configures which markdown features are enabled and how aggressively the output is sanitized. | |
| 6 | + | ||
| 7 | + | ## Module Map | |
| 8 | + | ||
| 9 | + | ``` | |
| 10 | + | src/ | |
| 11 | + | lib.rs Crate root, re-exports, convenience functions | |
| 12 | + | render.rs Renderer struct (builder pattern, 4 presets, render/render_with_meta) | |
| 13 | + | sanitize.rs SanitizePreset enum (Permissive, Standard, Strict, Minimal) | |
| 14 | + | text.rs Text utilities (word_count, reading_time, extract_title, strip_first_heading) | |
| 15 | + | toc.rs Table of contents extraction and HTML rendering | |
| 16 | + | escape.rs HTML entity escaping for safe string interpolation | |
| 17 | + | code_spans.rs Code span/block byte range detection (used by mentions to skip code) | |
| 18 | + | directives.rs [directives] Alert/tabs blockquote post-processing | |
| 19 | + | doc_loader.rs [doc-loader] Load .md files from disk into in-memory page store | |
| 20 | + | frontmatter.rs [frontmatter] Parse +++delimited TOML frontmatter | |
| 21 | + | media_urls.rs [media-urls] CDN path rewriting for images, img-to-video conversion | |
| 22 | + | mentions.rs [mentions] @username extraction and resolution | |
| 23 | + | quotes.rs [quotes] [quote:UUID:HASH] post-processing for forum attribution | |
| 24 | + | ``` | |
| 25 | + | ||
| 26 | + | ## Design Decisions | |
| 27 | + | ||
| 28 | + | ### Presets over configuration | |
| 29 | + | ||
| 30 | + | Rather than exposing every pulldown-cmark option, DocEngine provides named presets that bundle markdown features with sanitization levels. This prevents misconfiguration -- you can't accidentally enable raw HTML without appropriate sanitization. | |
| 31 | + | ||
| 32 | + | Custom configurations are still possible via the builder pattern (`Renderer::permissive().with_strip_images(true)`). | |
| 33 | + | ||
| 34 | + | ### Two-phase rendering | |
| 35 | + | ||
| 36 | + | Rendering happens in two phases: | |
| 37 | + | 1. **pulldown-cmark** parses markdown to HTML events, with optional filtering (strip images, strip raw HTML, neutralize dangerous URL schemes) | |
| 38 | + | 2. **ammonia** sanitizes the resulting HTML string | |
| 39 | + | ||
| 40 | + | This means even the permissive preset strips `<script>` tags -- ammonia always runs. | |
| 41 | + | ||
| 42 | + | Post-processing steps (directives, mentions, quotes, media URLs) are applied after sanitization by consumers, not built into the render pipeline. | |
| 43 | + | ||
| 44 | + | ### Feature-gated modules | |
| 45 | + | ||
| 46 | + | DocEngine has zero required dependencies beyond pulldown-cmark, ammonia, and serde. Consumers that only need rendering don't pull in regex, toml, or uuid. The `full` feature enables everything. | |
| 47 | + | ||
| 48 | + | The `regex` vs `regex-lite` split is intentional -- doc-loader's link rewriting needs the full regex engine while simpler patterns in directives, mentions, quotes, and media-urls use the lighter variant. | |
| 49 | + | ||
| 50 | + | ### DocLoader loads once at startup | |
| 51 | + | ||
| 52 | + | `DocLoader::load()` reads all `.md` files from disk, renders them to HTML, and stores them in a `HashMap<String, DocPage>`. This happens once at application boot (MNW calls it during startup). Pages are served from memory with no disk I/O on request. | |
| 53 | + | ||
| 54 | + | Link rewriting converts relative `.md` references to the configured URL prefix (e.g., `./faq.md` becomes `/docs/faq`). Links to unpublished docs are stripped to plain text. | |
| 55 | + | ||
| 56 | + | ### Mention resolution skips code | |
| 57 | + | ||
| 58 | + | `extract_mentions` and `resolve_mentions` detect inline code (backticks) and fenced code blocks, skipping any @mentions inside them. This prevents false positives from code examples. | |
| 59 | + | ||
| 60 | + | ### Directive post-processing | |
| 61 | + | ||
| 62 | + | Directives (`[!NOTE]`, `[!TIP]`, `[!TABS]`, etc.) are implemented as HTML post-processing rather than markdown parsing extensions. This keeps the core render pipeline simple and makes directives composable with any preset. | |
| 63 | + | ||
| 64 | + | ## Consumers | |
| 65 | + | ||
| 66 | + | | Consumer | Features | How it's used | | |
| 67 | + | |----------|----------|---------------| | |
| 68 | + | | MNW | doc-loader, directives, frontmatter, media-urls | Site docs loaded at boot, blog posts with frontmatter, user descriptions (standard), item markdown (standard), CDN image rewriting | | |
| 69 | + | | Multithreaded | mentions, quotes | Forum posts (strict), @username linking, quote attribution | | |
| 70 | + | | GoingsOn | core | Task/event descriptions (standard) | | |
| 71 | + | | Balanced Breakfast | core | RSS feed content (sanitize_only) | | |
| 72 | + | | audiofiles | core | Sample descriptions (standard) | | |
| 73 | + | ||
| 74 | + | ## Key Paths | |
| 75 | + | ||
| 76 | + | - `src/render.rs` -- the core rendering logic | |
| 77 | + | - `src/sanitize.rs` -- ammonia preset configurations | |
| 78 | + | - `src/directives.rs` -- alert and code tab processing | |
| 79 | + | - `src/doc_loader.rs` -- document loading and link rewriting | |
| 80 | + | - `src/media_urls.rs` -- CDN path rewriting |
| @@ -0,0 +1,114 @@ | |||
| 1 | + | # DocEngine — Code Review | |
| 2 | + | ||
| 3 | + | **Date:** 2026-04-12 | |
| 4 | + | **Version:** 0.3.0 | |
| 5 | + | **Reviewer:** Claude (Opus 4.6) | |
| 6 | + | **Scope:** Full codebase review — all Rust source, Cargo.toml, README, docs | |
| 7 | + | ||
| 8 | + | ## Summary | |
| 9 | + | ||
| 10 | + | DocEngine is a markdown-to-HTML rendering library (~2,550 source LOC across 13 modules) built on pulldown-cmark and ammonia. Preset-based configuration system bundles markdown features with sanitization levels. 6 cargo feature gates keep the dependency tree minimal. Used by 5 consumers across the ecosystem (MNW, Multithreaded, GO, BB, AF). 141 tests, 0 clippy warnings, 0 unsafe code. | |
| 11 | + | ||
| 12 | + | **Overall: A** — clean, well-tested, security-conscious. No bugs found. Findings are documentation gaps and minor observations. | |
| 13 | + | ||
| 14 | + | --- | |
| 15 | + | ||
| 16 | + | ## Findings | |
| 17 | + | ||
| 18 | + | ### [MEDIUM] README and architecture.md missing `directives` and `media-urls` features | |
| 19 | + | ||
| 20 | + | The README feature flag table (lines 42-51) lists `doc-loader`, `frontmatter`, `mentions`, and `quotes` but omits `directives` and `media-urls`. These features are defined in Cargo.toml, included in `full`, and used by MNW. The `full` description says "all of the above" but the unlisted features make this misleading. | |
| 21 | + | ||
| 22 | + | Similarly, `architecture.md` module map (lines 9-21) does not include `directives.rs`, `media_urls.rs`, or `escape.rs`. The consumers table also doesn't mention `directives` or `media-urls` for MNW. | |
| 23 | + | ||
| 24 | + | ### [MEDIUM] Missing `docs/todo.md` and `docs/audit_review.md` | |
| 25 | + | ||
| 26 | + | Per cross-cutting conventions, each project should have `todo.md` and `audit_review.md` in `docs/`. Only `architecture.md` exists. | |
| 27 | + | ||
| 28 | + | ### [LOW] `Permissive` and `Standard` sanitize presets are identical | |
| 29 | + | ||
| 30 | + | In `sanitize.rs:17`, both `Permissive` and `Standard` map to `ammonia::clean(html)`. The doc comment for `Standard` says "Same as Permissive." The distinction is intentional — they differ in the `Renderer`'s markdown settings (Standard strips images, Permissive doesn't) — but the sanitize preset enum having two identical variants with a comment that says "Same" could confuse contributors. A comment clarifying "Same sanitization; markdown-level differences are configured in the Renderer" would help. | |
| 31 | + | ||
| 32 | + | ### [LOW] `strip_html_tags` in doc_loader.rs does not decode HTML entities | |
| 33 | + | ||
| 34 | + | The search index generator (`strip_html_tags`, lines 162-182) strips tags but leaves HTML entities (`&`, `<`, etc.) intact. Searching for "A & B" won't match content rendered as "A & B". Low impact since doc search is client-side and search terms are unlikely to contain entities, but worth noting. | |
| 35 | + | ||
| 36 | + | ### [LOW] `render.rs` at 511 lines | |
| 37 | + | ||
| 38 | + | Technically exceeds the 500-line guideline, but 268 lines are tests. The logic is ~235 lines. Within the spirit of the rule. If the test suite grows further, consider moving tests to a submodule. | |
| 39 | + | ||
| 40 | + | ### [INFO] `rewrite_links` regex is naive about nested brackets | |
| 41 | + | ||
| 42 | + | The regex `\[([^\]]+)\]\(([^)]+)\)` in doc_loader.rs cannot handle nested brackets in link text or parentheses in URLs. Low risk since doc files are authored by the project owner, but edge cases like `[text](url_(with_parens))` would be malformed. | |
| 43 | + | ||
| 44 | + | ### [INFO] `extract_title` silently fails on frontmatter-prefixed documents | |
| 45 | + | ||
| 46 | + | If called on raw markdown that starts with `+++` TOML frontmatter, `extract_title` returns `None` because the `+++` line is neither empty nor `---`. This is the correct behavior (frontmatter should be stripped first via `parse_frontmatter`), but the interaction is documented nowhere. | |
| 47 | + | ||
| 48 | + | ### [INFO] `html_escape` uses sequential string replacements | |
| 49 | + | ||
| 50 | + | Five sequential `.replace()` calls, each allocating a new String. A single-pass approach would be more efficient, but this function is only called in template contexts (TOC, quotes, video tags), not in the hot render path. Negligible impact. | |
| 51 | + | ||
| 52 | + | --- | |
| 53 | + | ||
| 54 | + | ## Strengths | |
| 55 | + | ||
| 56 | + | - **Preset system is the right abstraction.** Bundles markdown features with matching sanitization levels, preventing dangerous misconfigurations (e.g., raw HTML without sanitization). Builder pattern still allows per-instance overrides. | |
| 57 | + | - **Feature gate design.** Zero unnecessary dependencies for core-only consumers. Smart split of `regex` (doc-loader, complex patterns) vs `regex-lite` (directives/mentions/quotes/media-urls, simple patterns). | |
| 58 | + | - **Security-conscious.** All paths go through ammonia sanitization. Dangerous URL schemes detected case-insensitively. Path traversal blocked in media URL rewriting. HTML escaping on all user-supplied strings interpolated into HTML. Zero unsafe code. | |
| 59 | + | - **Test quality.** 141 tests at ~1.09:1 test-to-logic ratio. Tests cover happy paths, edge cases, security scenarios. All co-located with implementation. | |
| 60 | + | - **Clean module boundaries.** Each module does one thing. No circular dependencies. Feature gates cleanly gate whole modules. | |
| 61 | + | - **Directive system is extensible.** Any `[!UPPERCASE]` blockquote becomes an alert div. Code tabs auto-detect language labels. Both are implemented as HTML post-processing, keeping the core render pipeline simple. | |
| 62 | + | ||
| 63 | + | ## Security Checklist | |
| 64 | + | ||
| 65 | + | | Check | Status | | |
| 66 | + | |-------|--------| | |
| 67 | + | | XSS via raw HTML | Pass — ammonia sanitization on all presets | | |
| 68 | + | | XSS via markdown | Pass — strict preset strips raw HTML at parser level + sanitizes | | |
| 69 | + | | javascript:/data:/vbscript: URLs | Pass — detected case-insensitively, neutralized to `#` | | |
| 70 | + | | Path traversal in media URLs | Pass — `..` paths rejected | | |
| 71 | + | | User string injection in HTML | Pass — `html_escape()` applied in quotes, TOC, media tags | | |
| 72 | + | | Unsafe code | Pass — zero `unsafe` blocks | | |
| 73 | + | ||
| 74 | + | ## Metrics | |
| 75 | + | ||
| 76 | + | | Metric | Value | | |
| 77 | + | |--------|-------| | |
| 78 | + | | Source LOC (logic) | ~1,310 | | |
| 79 | + | | Source LOC (tests) | ~1,235 | | |
| 80 | + | | Source LOC (total) | ~2,550 | | |
| 81 | + | | Source files | 13 | | |
| 82 | + | | Test count | 141 | | |
| 83 | + | | Tests/KLOC (logic) | ~108 | | |
| 84 | + | | Clippy warnings | 0 | | |
| 85 | + | | Unsafe blocks | 0 | | |
| 86 | + | | Cargo features | 6 (+full) | | |
| 87 | + | | Direct dependencies | 7 (3 always, 4 optional) | | |
| 88 | + | | Consumers | 5 (MNW, Multithreaded, GO, BB, AF) | | |
| 89 | + | | Audit advisories | 0 (1 allowed warning) | | |
| 90 | + | ||
| 91 | + | ## Module Heatmap | |
| 92 | + | ||
| 93 | + | | Module | Code | Test | Security | Docs | | |
| 94 | + | |--------|:----:|:----:|:--------:|:----:| | |
| 95 | + | | render.rs | A | A | A | A | | |
| 96 | + | | directives.rs | A | A | A- | B (not in README/arch) | | |
| 97 | + | | doc_loader.rs | A | A- | A | A- | | |
| 98 | + | | media_urls.rs | A | A | A | B (not in README/arch) | | |
| 99 | + | | toc.rs | A | A- | A | A | | |
| 100 | + | | mentions.rs | A | A | A | A | | |
| 101 | + | | frontmatter.rs | A | A | A | A | | |
| 102 | + | | code_spans.rs | A | A- | A | A- | | |
| 103 | + | | sanitize.rs | A- | A- | A | B+ (confusing "Same" comment) | | |
| 104 | + | | text.rs | A | A | A | A | | |
| 105 | + | | escape.rs | A | A- | A | A | | |
| 106 | + | | quotes.rs | A | A- | A | A | | |
| 107 | + | | lib.rs | A | — | A | A | | |
| 108 | + | ||
| 109 | + | ## Action Items | |
| 110 | + | ||
| 111 | + | 1. ~~**[MEDIUM]** Update README feature flag table to include `directives` and `media-urls`~~ — Done. Feature table, feature-gated API table, and consumers table all updated. | |
| 112 | + | 2. ~~**[MEDIUM]** Update `architecture.md` module map and consumers table~~ — Done. Added directives.rs, media_urls.rs, escape.rs to module map. Updated consumers, key paths, and added directive design decision. | |
| 113 | + | 3. ~~**[LOW]** Clarify `Standard` sanitize preset doc comment~~ — Done. Explains the difference is at the Renderer level. | |
| 114 | + | 4. ~~**[LOW]** Consider HTML entity decoding in `strip_html_tags` for search index accuracy~~ — Done. Decodes `&`, `<`, `>`, `"`, `'`, `'` after tag stripping. |
| @@ -0,0 +1,443 @@ | |||
| 1 | + | //! Post-process rendered HTML to convert blockquote-based directives into | |
| 2 | + | //! styled elements. | |
| 3 | + | //! | |
| 4 | + | //! **Alerts:** `> [!NOTE]`, `> [!TIP]`, `> [!WARNING]`, `> [!CAUTION]`, | |
| 5 | + | //! `> [!IMPORTANT]`, and any custom `> [!TYPE]` marker become styled | |
| 6 | + | //! `<div class="alert alert-{type}">` callout elements. | |
| 7 | + | //! | |
| 8 | + | //! **Code tabs:** `> [!TABS]` followed by fenced code blocks become a tabbed | |
| 9 | + | //! interface with language-labelled tabs. | |
| 10 | + | ||
| 11 | + | use std::sync::LazyLock; | |
| 12 | + | ||
| 13 | + | /// Matches any `[!TYPE]` alert marker inside a blockquote paragraph. | |
| 14 | + | /// Accepts any uppercase word (letters, digits, hyphens, underscores). | |
| 15 | + | static ALERT_RE: LazyLock<regex_lite::Regex> = LazyLock::new(|| { | |
| 16 | + | regex_lite::Regex::new( | |
| 17 | + | r"<blockquote>\s*<p>\[!([A-Z][A-Z0-9_-]*)\](?:<br\s*/?>)?\s*", | |
| 18 | + | ) | |
| 19 | + | .expect("valid alert regex") | |
| 20 | + | }); | |
| 21 | + | ||
| 22 | + | /// Process all directives: code tabs first, then alerts. | |
| 23 | + | pub fn post_process_directives(html: &str) -> String { | |
| 24 | + | let with_tabs = process_tabs(html); | |
| 25 | + | process_alerts(&with_tabs) | |
| 26 | + | } | |
| 27 | + | ||
| 28 | + | /// Replace alert blockquotes with styled `<div class="alert ...">` elements. | |
| 29 | + | fn process_alerts(html: &str) -> String { | |
| 30 | + | // First pass: replace opening markers. | |
| 31 | + | let opened = ALERT_RE.replace_all(html, |caps: ®ex_lite::Captures| { | |
| 32 | + | let kind = &caps[1]; | |
| 33 | + | // Skip TABS — already handled by process_tabs. | |
| 34 | + | if kind == "TABS" { | |
| 35 | + | return caps[0].to_string(); | |
| 36 | + | } | |
| 37 | + | let label = title_case(kind); | |
| 38 | + | format!( | |
| 39 | + | "<div class=\"alert alert-{kind}\"><p class=\"alert-title\">{label}</p><p>", | |
| 40 | + | kind = kind.to_ascii_lowercase(), | |
| 41 | + | label = label, | |
| 42 | + | ) | |
| 43 | + | }); | |
| 44 | + | ||
| 45 | + | // Second pass: close any opened alerts. | |
| 46 | + | let alert_count = ALERT_RE | |
| 47 | + | .captures_iter(html) | |
| 48 | + | .filter(|c| &c[1] != "TABS") | |
| 49 | + | .count(); | |
| 50 | + | if alert_count == 0 { | |
| 51 | + | return opened.into_owned(); | |
| 52 | + | } | |
| 53 | + | ||
| 54 | + | let mut result = String::with_capacity(opened.len()); | |
| 55 | + | let mut remaining = opened.as_ref(); | |
| 56 | + | let mut replaced = 0; | |
| 57 | + | ||
| 58 | + | while replaced < alert_count { | |
| 59 | + | if let Some(pos) = remaining.find("</blockquote>") { | |
| 60 | + | result.push_str(&remaining[..pos]); | |
| 61 | + | result.push_str("</div>"); | |
| 62 | + | remaining = &remaining[(pos + "</blockquote>".len())..]; | |
| 63 | + | replaced += 1; | |
| 64 | + | } else { | |
| 65 | + | break; | |
| 66 | + | } | |
| 67 | + | } | |
| 68 | + | result.push_str(remaining); | |
| 69 | + | result | |
| 70 | + | } | |
| 71 | + | ||
| 72 | + | /// Process `[!TABS]` blockquotes into tabbed code-block interfaces. | |
| 73 | + | fn process_tabs(html: &str) -> String { | |
| 74 | + | if !html.contains("[!TABS]") { | |
| 75 | + | return html.to_string(); | |
| 76 | + | } | |
| 77 | + | ||
| 78 | + | let mut result = String::with_capacity(html.len()); | |
| 79 | + | let mut remaining = html; | |
| 80 | + | ||
| 81 | + | while let Some(bq_pos) = remaining.find("<blockquote>") { | |
| 82 | + | let after_bq_start = bq_pos + "<blockquote>".len(); | |
| 83 | + | ||
| 84 | + | // Find the closing </blockquote> for this blockquote. | |
| 85 | + | let close_pos = match remaining[bq_pos..].find("</blockquote>") { | |
| 86 | + | Some(p) => bq_pos + p, | |
| 87 | + | None => break, | |
| 88 | + | }; | |
| 89 | + | ||
| 90 | + | let inner = &remaining[after_bq_start..close_pos]; | |
| 91 | + | ||
| 92 | + | // Check if the first <p> in the blockquote contains [!TABS]. | |
| 93 | + | let is_tabs = { | |
| 94 | + | let trimmed = inner.trim_start(); | |
| 95 | + | trimmed.starts_with("<p>") && { | |
| 96 | + | let first_p_end = trimmed.find("</p>").unwrap_or(trimmed.len()); | |
| 97 | + | trimmed[..first_p_end].contains("[!TABS]") | |
| 98 | + | } | |
| 99 | + | }; | |
| 100 | + | ||
| 101 | + | if !is_tabs { | |
| 102 | + | // Not a TABS blockquote — copy through the opening tag and continue. | |
| 103 | + | result.push_str(&remaining[..after_bq_start]); | |
| 104 | + | remaining = &remaining[after_bq_start..]; | |
| 105 | + | continue; | |
| 106 | + | } | |
| 107 | + | ||
| 108 | + | // Copy everything before this blockquote. | |
| 109 | + | result.push_str(&remaining[..bq_pos]); | |
| 110 | + | ||
| 111 | + | // Extract code blocks from the inner HTML. | |
| 112 | + | let tabs = extract_code_blocks(inner); | |
| 113 | + | ||
| 114 | + | if tabs.is_empty() { | |
| 115 | + | // No code blocks found — wrap content in a plain div. | |
| 116 | + | result.push_str("<div class=\"code-tabs\">"); | |
| 117 | + | result.push_str(inner); | |
| 118 | + | result.push_str("</div>"); | |
| 119 | + | } else { | |
| 120 | + | result.push_str(&build_tabs_html(&tabs)); | |
| 121 | + | } | |
| 122 | + | ||
| 123 | + | remaining = &remaining[close_pos + "</blockquote>".len()..]; | |
| 124 | + | } | |
| 125 | + | ||
| 126 | + | result.push_str(remaining); | |
| 127 | + | result | |
| 128 | + | } | |
| 129 | + | ||
| 130 | + | /// Extract `(language, full_html_block)` pairs from HTML containing | |
| 131 | + | /// `<pre><code>` elements. | |
| 132 | + | fn extract_code_blocks(html: &str) -> Vec<(String, String)> { | |
| 133 | + | let mut blocks = Vec::new(); | |
| 134 | + | let mut search_from = 0; | |
| 135 | + | let end_marker = "</code></pre>"; | |
| 136 | + | ||
| 137 | + | while let Some(pre_pos) = html[search_from..].find("<pre><code") { | |
| 138 | + | let abs_pos = search_from + pre_pos; | |
| 139 | + | ||
| 140 | + | let end_pos = match html[abs_pos..].find(end_marker) { | |
| 141 | + | Some(p) => abs_pos + p + end_marker.len(), | |
| 142 | + | None => break, | |
| 143 | + | }; | |
| 144 | + | ||
| 145 | + | let full_block = &html[abs_pos..end_pos]; | |
| 146 | + | ||
| 147 | + | // Extract language from class="language-X". | |
| 148 | + | let lang = if let Some(class_start) = full_block.find("class=\"language-") { | |
| 149 | + | let after = &full_block[class_start + "class=\"language-".len()..]; | |
| 150 | + | after.split('"').next().unwrap_or("code").to_string() | |
| 151 | + | } else { | |
| 152 | + | "code".to_string() | |
| 153 | + | }; | |
| 154 | + | ||
| 155 | + | blocks.push((lang, full_block.to_string())); | |
| 156 | + | search_from = end_pos; | |
| 157 | + | } | |
| 158 | + | ||
| 159 | + | blocks | |
| 160 | + | } | |
| 161 | + | ||
| 162 | + | /// Build tabbed HTML from extracted code blocks. | |
| 163 | + | fn build_tabs_html(tabs: &[(String, String)]) -> String { | |
| 164 | + | let mut html = String::from("<div class=\"code-tabs\">\n<div class=\"code-tabs-bar\">"); | |
| 165 | + | ||
| 166 | + | for (i, (lang, _)) in tabs.iter().enumerate() { | |
| 167 | + | let active = if i == 0 { " active" } else { "" }; | |
| 168 | + | let label = code_language_label(lang); | |
| 169 | + | html.push_str(&format!( | |
| 170 | + | "<button class=\"code-tab{active}\" data-tab-index=\"{i}\">{label}</button>" | |
| 171 | + | )); | |
| 172 | + | } | |
| 173 | + | ||
| 174 | + | html.push_str("</div>\n"); | |
| 175 | + | ||
| 176 | + | for (i, (_, block)) in tabs.iter().enumerate() { | |
| 177 | + | let active = if i == 0 { " active" } else { "" }; | |
| 178 | + | html.push_str(&format!( | |
| 179 | + | "<div class=\"code-tab-panel{active}\" data-tab-index=\"{i}\">{block}</div>\n" | |
| 180 | + | )); | |
| 181 | + | } | |
| 182 | + | ||
| 183 | + | html.push_str("</div>"); | |
| 184 | + | html | |
| 185 | + | } | |
| 186 | + | ||
| 187 | + | /// Human-readable label for a code language identifier. | |
| 188 | + | fn code_language_label(lang: &str) -> String { | |
| 189 | + | match lang { | |
| 190 | + | "js" | "javascript" => "JavaScript".into(), | |
| 191 | + | "ts" | "typescript" => "TypeScript".into(), | |
| 192 | + | "sh" | "bash" | "zsh" | "shell" => "Shell".into(), | |
| 193 | + | "json" => "JSON".into(), | |
| 194 | + | "html" => "HTML".into(), | |
| 195 | + | "css" => "CSS".into(), | |
| 196 | + | "sql" => "SQL".into(), | |
| 197 | + | "toml" => "TOML".into(), | |
| 198 | + | "yaml" | "yml" => "YAML".into(), | |
| 199 | + | "xml" => "XML".into(), | |
| 200 | + | other => title_case(other), | |
| 201 | + | } | |
| 202 | + | } | |
| 203 | + | ||
| 204 | + | fn title_case(s: &str) -> String { | |
| 205 | + | let mut chars = s.chars(); | |
| 206 | + | match chars.next() { | |
| 207 | + | Some(c) => { | |
| 208 | + | let mut out = c.to_uppercase().to_string(); | |
| 209 | + | out.extend(chars.map(|c| c.to_ascii_lowercase())); | |
| 210 | + | out | |
| 211 | + | } | |
| 212 | + | None => String::new(), | |
| 213 | + | } | |
| 214 | + | } | |
| 215 | + | ||
| 216 | + | #[cfg(test)] | |
| 217 | + | mod tests { | |
| 218 | + | use super::*; | |
| 219 | + | ||
| 220 | + | // ===== Alert directives ===== | |
| 221 | + | ||
| 222 | + | #[test] | |
| 223 | + | fn note_alert() { | |
| 224 | + | let html = "<blockquote>\n<p>[!NOTE]<br>\nThis is a note.</p>\n</blockquote>"; | |
| 225 | + | let result = post_process_directives(html); | |
| 226 | + | assert!(result.contains("alert alert-note")); | |
| 227 | + | assert!(result.contains("<p class=\"alert-title\">Note</p>")); | |
| 228 | + | assert!(result.contains("This is a note.")); | |
| 229 | + | assert!(!result.contains("<blockquote>")); | |
| 230 | + | } | |
| 231 | + | ||
| 232 | + | #[test] | |
| 233 | + | fn tip_alert() { | |
| 234 | + | let html = "<blockquote>\n<p>[!TIP]<br>\nHelpful tip here.</p>\n</blockquote>"; | |
| 235 | + | let result = post_process_directives(html); | |
| 236 | + | assert!(result.contains("alert alert-tip")); | |
| 237 | + | assert!(result.contains("<p class=\"alert-title\">Tip</p>")); | |
| 238 | + | } | |
| 239 | + | ||
| 240 | + | #[test] | |
| 241 | + | fn important_alert() { | |
| 242 | + | let html = "<blockquote>\n<p>[!IMPORTANT]<br>\nDo this.</p>\n</blockquote>"; | |
| 243 | + | let result = post_process_directives(html); | |
| 244 | + | assert!(result.contains("alert alert-important")); | |
| 245 | + | assert!(result.contains("<p class=\"alert-title\">Important</p>")); | |
| 246 | + | } | |
| 247 | + | ||
| 248 | + | #[test] | |
| 249 | + | fn warning_alert() { | |
| 250 | + | let html = "<blockquote>\n<p>[!WARNING]<br>\nBe careful.</p>\n</blockquote>"; | |
| 251 | + | let result = post_process_directives(html); | |
| 252 | + | assert!(result.contains("alert alert-warning")); | |
| 253 | + | assert!(result.contains("<p class=\"alert-title\">Warning</p>")); | |
| 254 | + | } | |
| 255 | + | ||
| 256 | + | #[test] | |
| 257 | + | fn caution_alert() { | |
| 258 | + | let html = "<blockquote>\n<p>[!CAUTION]<br/>\nDanger zone.</p>\n</blockquote>"; | |
| 259 | + | let result = post_process_directives(html); | |
| 260 | + | assert!(result.contains("alert alert-caution")); | |
| 261 | + | assert!(result.contains("<p class=\"alert-title\">Caution</p>")); | |
| 262 | + | } | |
| 263 | + | ||
| 264 | + | #[test] | |
| 265 | + | fn multi_paragraph_alert() { | |
| 266 | + | let html = "<blockquote>\n<p>[!NOTE]<br>\nFirst paragraph.</p>\n<p>Second paragraph.</p>\n</blockquote>"; | |
| 267 | + | let result = post_process_directives(html); | |
| 268 | + | assert!(result.contains("alert alert-note")); | |
| 269 | + | assert!(result.contains("First paragraph.")); | |
| 270 | + | assert!(result.contains("Second paragraph.")); | |
| 271 | + | assert!(result.contains("</div>")); | |
| 272 | + | assert!(!result.contains("</blockquote>")); | |
| 273 | + | } | |
| 274 | + | ||
| 275 | + | #[test] | |
| 276 | + | fn regular_blockquote_unchanged() { | |
| 277 | + | let html = "<blockquote>\n<p>Just a normal quote.</p>\n</blockquote>"; | |
| 278 | + | let result = post_process_directives(html); | |
| 279 | + | assert_eq!(result, html); | |
| 280 | + | } | |
| 281 | + | ||
| 282 | + | #[test] | |
| 283 | + | fn mixed_alerts_and_blockquotes() { | |
| 284 | + | let html = concat!( | |
| 285 | + | "<blockquote>\n<p>[!WARNING]<br>\nWatch out!</p>\n</blockquote>\n", | |
| 286 | + | "<blockquote>\n<p>Normal quote.</p>\n</blockquote>" | |
| 287 | + | ); | |
| 288 | + | let result = post_process_directives(html); | |
| 289 | + | assert!(result.contains("alert alert-warning")); | |
| 290 | + | assert!(result.contains("Watch out!")); | |
| 291 | + | // The normal blockquote remains unchanged. | |
| 292 | + | assert!(result.contains("<blockquote>")); | |
| 293 | + | assert!(result.contains("Normal quote.")); | |
| 294 | + | } | |
| 295 | + | ||
| 296 | + | // ===== Custom alert types ===== | |
| 297 | + | ||
| 298 | + | #[test] | |
| 299 | + | fn custom_example_alert() { | |
| 300 | + | let html = "<blockquote>\n<p>[!EXAMPLE]<br>\nHere is an example.</p>\n</blockquote>"; | |
| 301 | + | let result = post_process_directives(html); | |
| 302 | + | assert!(result.contains("alert alert-example")); | |
| 303 | + | assert!(result.contains("<p class=\"alert-title\">Example</p>")); | |
| 304 | + | assert!(result.contains("Here is an example.")); | |
| 305 | + | assert!(!result.contains("<blockquote>")); | |
| 306 | + | } | |
| 307 | + | ||
| 308 | + | #[test] | |
| 309 | + | fn custom_definition_alert() { | |
| 310 | + | let html = "<blockquote>\n<p>[!DEFINITION]<br>\nA term and its meaning.</p>\n</blockquote>"; | |
| 311 | + | let result = post_process_directives(html); | |
| 312 | + | assert!(result.contains("alert alert-definition")); | |
| 313 | + | assert!(result.contains("<p class=\"alert-title\">Definition</p>")); | |
| 314 | + | } | |
| 315 | + | ||
| 316 | + | #[test] | |
| 317 | + | fn custom_alert_with_hyphen() { | |
| 318 | + | let html = | |
| 319 | + | "<blockquote>\n<p>[!SEE-ALSO]<br>\nRelated topics.</p>\n</blockquote>"; | |
| 320 | + | let result = post_process_directives(html); | |
| 321 | + | assert!(result.contains("alert alert-see-also")); | |
| 322 | + | assert!(result.contains("<p class=\"alert-title\">See-also</p>")); | |
| 323 | + | } | |
| 324 | + | ||
| 325 | + | // ===== Code tabs ===== | |
| 326 | + | ||
| 327 | + | #[test] | |
| 328 | + | fn tabs_two_languages() { | |
| 329 | + | let html = concat!( | |
| 330 | + | "<blockquote>\n<p>[!TABS]</p>\n", | |
| 331 | + | "<pre><code class=\"language-rust\">fn main() {}\n</code></pre>\n", | |
| 332 | + | "<pre><code class=\"language-python\">def main(): pass\n</code></pre>\n", | |
| 333 | + | "</blockquote>" | |
| 334 | + | ); | |
| 335 | + | let result = post_process_directives(html); | |
| 336 | + | assert!(result.contains("code-tabs")); | |
| 337 | + | assert!(result.contains("code-tabs-bar")); | |
| 338 | + | assert!(result.contains("Rust")); | |
| 339 | + | assert!(result.contains("Python")); | |
| 340 | + | assert!(result.contains("fn main() {}")); | |
| 341 | + | assert!(result.contains("def main(): pass")); | |
| 342 | + | assert!(!result.contains("<blockquote>")); | |
| 343 | + | // First tab is active. | |
| 344 | + | assert!(result.contains("code-tab active")); | |
| 345 | + | assert!(result.contains("code-tab-panel active")); | |
| 346 | + | } | |
| 347 | + | ||
| 348 | + | #[test] | |
| 349 | + | fn tabs_three_languages() { | |
| 350 | + | let html = concat!( | |
| 351 | + | "<blockquote>\n<p>[!TABS]</p>\n", | |
| 352 | + | "<pre><code class=\"language-bash\">curl https://api.example.com\n</code></pre>\n", | |
| 353 | + | "<pre><code class=\"language-js\">fetch('https://api.example.com')\n</code></pre>\n", | |
| 354 | + | "<pre><code class=\"language-python\">requests.get('https://api.example.com')\n</code></pre>\n", | |
| 355 | + | "</blockquote>" | |
| 356 | + | ); | |
| 357 | + | let result = post_process_directives(html); | |
| 358 | + | assert!(result.contains("Shell")); // bash → Shell | |
| 359 | + | assert!(result.contains("JavaScript")); // js → JavaScript | |
| 360 | + | assert!(result.contains("Python")); | |
| 361 | + | assert!(result.contains("data-tab-index=\"0\"")); | |
| 362 | + | assert!(result.contains("data-tab-index=\"1\"")); | |
| 363 | + | assert!(result.contains("data-tab-index=\"2\"")); | |
| 364 | + | } | |
| 365 | + | ||
| 366 | + | #[test] | |
| 367 | + | fn tabs_no_language_specified() { | |
| 368 | + | let html = concat!( | |
| 369 | + | "<blockquote>\n<p>[!TABS]</p>\n", | |
| 370 | + | "<pre><code>some code\n</code></pre>\n", | |
| 371 | + | "<pre><code class=\"language-rust\">let x = 1;\n</code></pre>\n", | |
| 372 | + | "</blockquote>" | |
| 373 | + | ); | |
| 374 | + | let result = post_process_directives(html); | |
| 375 | + | assert!(result.contains("Code")); // fallback label | |
| 376 | + | assert!(result.contains("Rust")); | |
| 377 | + | } | |
| 378 | + | ||
| 379 | + | #[test] | |
| 380 | + | fn tabs_with_br_marker() { | |
| 381 | + | let html = concat!( | |
| 382 | + | "<blockquote>\n<p>[!TABS]<br>\n</p>\n", | |
| 383 | + | "<pre><code class=\"language-toml\">[package]\n</code></pre>\n", | |
| 384 | + | "<pre><code class=\"language-json\">{}\n</code></pre>\n", | |
| 385 | + | "</blockquote>" | |
| 386 | + | ); | |
| 387 | + | let result = post_process_directives(html); | |
| 388 | + | assert!(result.contains("TOML")); | |
| 389 | + | assert!(result.contains("JSON")); | |
| 390 | + | } | |
| 391 | + | ||
| 392 | + | #[test] | |
| 393 | + | fn tabs_mixed_with_alert_and_blockquote() { | |
| 394 | + | let html = concat!( | |
| 395 | + | "<blockquote>\n<p>[!NOTE]<br>\nA note.</p>\n</blockquote>\n", | |
| 396 | + | "<blockquote>\n<p>[!TABS]</p>\n", | |
| 397 | + | "<pre><code class=\"language-rust\">let x = 1;\n</code></pre>\n", | |
| 398 | + | "</blockquote>\n", | |
| 399 | + | "<blockquote>\n<p>Normal quote.</p>\n</blockquote>" | |
| 400 | + | ); | |
| 401 | + | let result = post_process_directives(html); | |
| 402 | + | // Alert processed. | |
| 403 | + | assert!(result.contains("alert alert-note")); | |
| 404 | + | // Tabs processed. | |
| 405 | + | assert!(result.contains("code-tabs")); | |
| 406 | + | assert!(result.contains("Rust")); | |
| 407 | + | // Normal blockquote unchanged. | |
| 408 | + | assert!(result.contains("<blockquote>")); | |
| 409 | + | assert!(result.contains("Normal quote.")); | |
| 410 | + | } | |
| 411 | + | ||
| 412 | + | #[test] | |
| 413 | + | fn tabs_no_code_blocks() { | |
| 414 | + | let html = concat!( | |
| 415 | + | "<blockquote>\n<p>[!TABS]</p>\n", | |
| 416 | + | "<p>Just text, no code.</p>\n", | |
| 417 | + | "</blockquote>" | |
| 418 | + | ); | |
| 419 | + | let result = post_process_directives(html); | |
| 420 | + | assert!(result.contains("code-tabs")); | |
| 421 | + | assert!(result.contains("Just text, no code.")); | |
| 422 | + | assert!(!result.contains("<blockquote>")); | |
| 423 | + | } | |
| 424 | + | ||
| 425 | + | // ===== Language label mapping ===== | |
| 426 | + | ||
| 427 | + | #[test] | |
| 428 | + | fn language_labels() { | |
| 429 | + | assert_eq!(code_language_label("js"), "JavaScript"); | |
| 430 | + | assert_eq!(code_language_label("typescript"), "TypeScript"); | |
| 431 | + | assert_eq!(code_language_label("bash"), "Shell"); | |
| 432 | + | assert_eq!(code_language_label("json"), "JSON"); | |
| 433 | + | assert_eq!(code_language_label("html"), "HTML"); | |
| 434 | + | assert_eq!(code_language_label("css"), "CSS"); | |
| 435 | + | assert_eq!(code_language_label("sql"), "SQL"); | |
| 436 | + | assert_eq!(code_language_label("toml"), "TOML"); | |
| 437 | + | assert_eq!(code_language_label("yaml"), "YAML"); | |
| 438 | + | assert_eq!(code_language_label("xml"), "XML"); | |
| 439 | + | assert_eq!(code_language_label("rust"), "Rust"); | |
| 440 | + | assert_eq!(code_language_label("python"), "Python"); | |
| 441 | + | assert_eq!(code_language_label("go"), "Go"); | |
| 442 | + | } | |
| 443 | + | } |
| @@ -35,6 +35,15 @@ pub struct DocIndexEntry { | |||
| 35 | 35 | pub section: String, | |
| 36 | 36 | } | |
| 37 | 37 | ||
| 38 | + | /// Entry in the full-text search index, serialised to JSON for client-side search. | |
| 39 | + | #[derive(Clone, Debug, serde::Serialize)] | |
| 40 | + | pub struct DocSearchEntry { | |
| 41 | + | pub slug: String, | |
| 42 | + | pub title: String, | |
| 43 | + | pub section: String, | |
| 44 | + | pub body_text: String, | |
| 45 | + | } | |
| 46 | + | ||
| 38 | 47 | /// In-memory store of rendered documentation pages, built once at startup. | |
| 39 | 48 | #[derive(Clone, Debug)] | |
| 40 | 49 | pub struct DocLoader { | |
| @@ -56,9 +65,15 @@ impl DocLoader { | |||
| 56 | 65 | continue; | |
| 57 | 66 | } | |
| 58 | 67 | ||
| 59 | - | let mut entries: Vec<_> = std::fs::read_dir(§ion_path) | |
| 60 | - | .into_iter() | |
| 61 | - | .flatten() | |
| 68 | + | let read_dir = match std::fs::read_dir(§ion_path) { | |
| 69 | + | Ok(rd) => rd, | |
| 70 | + | Err(e) => { | |
| 71 | + | tracing::warn!(path = %section_path.display(), error = %e, "Failed to read docs section directory"); | |
| 72 | + | continue; | |
| 73 | + | } | |
| 74 | + | }; | |
| 75 | + | ||
| 76 | + | let mut entries: Vec<_> = read_dir | |
| 62 | 77 | .filter_map(|e| e.ok()) | |
| 63 | 78 | .filter(|e| { | |
| 64 | 79 | e.path() | |
| @@ -92,21 +107,24 @@ impl DocLoader { | |||
| 92 | 107 | ); | |
| 93 | 108 | let md_without_title = crate::text::strip_first_heading(&rewritten_md); | |
| 94 | 109 | let html_content = crate::render_permissive(&md_without_title); | |
| 110 | + | #[cfg(feature = "directives")] | |
| 111 | + | let html_content = crate::directives::post_process_directives(&html_content); | |
| 95 | 112 | ||
| 96 | 113 | let page = DocPage { | |
| 97 | - | title: title.clone(), | |
| 98 | - | slug: slug.clone(), | |
| 114 | + | title, | |
| 115 | + | slug, | |
| 99 | 116 | section: section_display.clone(), | |
| 100 | 117 | html_content, | |
| 101 | 118 | }; | |
| 102 | 119 | ||
| 103 | 120 | index.push(DocIndexEntry { | |
| 104 | - | title: title.clone(), | |
| 105 | - | slug: slug.clone(), | |
| 106 | - | section: section_display.clone(), | |
| 121 | + | title: page.title.clone(), | |
| 122 | + | slug: page.slug.clone(), | |
| 123 | + | section: page.section.clone(), | |
| 107 | 124 | }); | |
| 108 | 125 | ||
| 109 | - | pages.insert(slug, page); | |
| 126 | + | let slug_key = page.slug.clone(); | |
| 127 | + | pages.insert(slug_key, page); | |
| 110 | 128 | } | |
| 111 | 129 | } | |
| 112 | 130 | ||
| @@ -122,6 +140,53 @@ impl DocLoader { | |||
| 122 | 140 | pub fn index(&self) -> &[DocIndexEntry] { | |
| 123 | 141 | &self.index | |
| 124 | 142 | } | |
| 143 | + | ||
| 144 | + | /// Build a search index with HTML stripped to plain text. | |
| 145 | + | pub fn search_index(&self) -> Vec<DocSearchEntry> { | |
| 146 | + | self.index | |
| 147 | + | .iter() | |
| 148 | + | .filter_map(|entry| { | |
| 149 | + | let page = self.pages.get(&entry.slug)?; | |
| 150 | + | Some(DocSearchEntry { | |
| 151 | + | slug: entry.slug.clone(), | |
| 152 | + | title: entry.title.clone(), | |
| 153 | + | section: entry.section.clone(), | |
| 154 | + | body_text: strip_html_tags(&page.html_content), | |
| 155 | + | }) | |
| 156 | + | }) | |
| 157 | + | .collect() | |
| 158 | + | } | |
| 159 | + | } | |
| 160 | + | ||
| 161 | + | /// Strip HTML tags from a string, returning plain text. | |
| 162 | + | /// Decodes common HTML entities so search indexes match plain-text queries. | |
| 163 | + | fn strip_html_tags(html: &str) -> String { | |
| 164 | + | let mut out = String::with_capacity(html.len()); | |
| 165 | + | let mut in_tag = false; | |
| 166 | + | for ch in html.chars() { | |
| 167 | + | match ch { | |
| 168 | + | '<' => in_tag = true, | |
| 169 | + | '>' => { | |
| 170 | + | in_tag = false; | |
| 171 | + | // Add a space after closing tags to separate words. | |
| 172 | + | if !out.ends_with(' ') { | |
| 173 | + | out.push(' '); | |
| 174 | + | } | |
| 175 | + | } | |
| 176 | + | _ if !in_tag => out.push(ch), | |
| 177 | + | _ => {} | |
| 178 | + | } | |
| 179 | + | } | |
| 180 | + | // Collapse runs of whitespace. | |
| 181 | + | let collapsed: String = out.split_whitespace().collect::<Vec<_>>().join(" "); | |
| 182 | + | // Decode common HTML entities for search index accuracy. | |
| 183 | + | collapsed | |
| 184 | + | .replace("&", "&") | |
| 185 | + | .replace("<", "<") | |
| 186 | + | .replace(">", ">") | |
| 187 | + | .replace(""", "\"") | |
| 188 | + | .replace("'", "'") | |
| 189 | + | .replace("'", "'") | |
| 125 | 190 | } | |
| 126 | 191 | ||
| 127 | 192 | /// Rewrite relative `.md` links to the configured prefix. | |
| @@ -257,4 +322,33 @@ mod tests { | |||
| 257 | 322 | let result = rewrite_links(md, "/docs", None); | |
| 258 | 323 | assert_eq!(result, md); | |
| 259 | 324 | } | |
| 325 | + | ||
| 326 | + | #[test] | |
| 327 | + | fn strip_html_tags_removes_tags() { | |
| 328 | + | let html = "<p>Hello <strong>world</strong></p>"; | |
| 329 | + | assert_eq!(strip_html_tags(html), "Hello world"); | |
| 330 | + | } | |
| 331 | + | ||
| 332 | + | #[test] | |
| 333 | + | fn strip_html_tags_empty_input() { | |
| 334 | + | assert_eq!(strip_html_tags(""), ""); | |
| 335 | + | } | |
| 336 | + | ||
| 337 | + | #[test] | |
| 338 | + | fn strip_html_tags_decodes_entities() { | |
| 339 | + | let html = "<p>Price: $10 & free</p>"; | |
| 340 | + | assert_eq!(strip_html_tags(html), "Price: $10 & free"); | |
| 341 | + | ||
| 342 | + | let html2 = "<p>a < b > c</p>"; | |
| 343 | + | assert_eq!(strip_html_tags(html2), "a < b > c"); | |
| 344 | + | ||
| 345 | + | let html3 = "<p>"hello" & 'world'</p>"; | |
| 346 | + | assert_eq!(strip_html_tags(html3), "\"hello\" & 'world'"); | |
| 347 | + | } | |
| 348 | + | ||
| 349 | + | #[test] | |
| 350 | + | fn strip_html_tags_nested_tags() { | |
| 351 | + | let html = "<div><p>A <em>nested <strong>deep</strong></em> tag</p></div>"; | |
| 352 | + | assert_eq!(strip_html_tags(html), "A nested deep tag"); | |
| 353 | + | } | |
| 260 | 354 | } |
| @@ -0,0 +1,33 @@ | |||
| 1 | + | /// HTML-escape a string for safe interpolation into element content or attributes. | |
| 2 | + | /// | |
| 3 | + | /// Escapes all five HTML-significant characters: `& < > " '`. | |
| 4 | + | pub(crate) fn html_escape(s: &str) -> String { | |
| 5 | + | s.replace('&', "&") | |
| 6 | + | .replace('<', "<") | |
| 7 | + | .replace('>', ">") | |
| 8 | + | .replace('"', """) | |
| 9 | + | .replace('\'', "'") | |
| 10 | + | } | |
| 11 | + | ||
| 12 | + | #[cfg(test)] | |
| 13 | + | mod tests { | |
| 14 | + | use super::*; | |
| 15 | + | ||
| 16 | + | #[test] | |
| 17 | + | fn escapes_all_five_chars() { | |
| 18 | + | assert_eq!( | |
| 19 | + | html_escape("A & B < C > D \" E ' F"), | |
| 20 | + | "A & B < C > D " E ' F" | |
| 21 | + | ); | |
| 22 | + | } | |
| 23 | + | ||
| 24 | + | #[test] | |
| 25 | + | fn no_change_for_safe_string() { | |
| 26 | + | assert_eq!(html_escape("hello world"), "hello world"); | |
| 27 | + | } | |
| 28 | + | ||
| 29 | + | #[test] | |
| 30 | + | fn empty_string() { | |
| 31 | + | assert_eq!(html_escape(""), ""); | |
| 32 | + | } | |
| 33 | + | } |
| @@ -40,7 +40,10 @@ pub fn parse_frontmatter(input: &str) -> (Option<Frontmatter>, &str) { | |||
| 40 | 40 | ||
| 41 | 41 | match toml::from_str::<Frontmatter>(toml_content) { | |
| 42 | 42 | Ok(fm) => (Some(fm), rest_slice), | |
| 43 | - | Err(_) => (None, input), | |
| 43 | + | Err(e) => { | |
| 44 | + | tracing::warn!(error = %e, "Failed to parse TOML frontmatter"); | |
| 45 | + | (None, input) | |
| 46 | + | } | |
| 44 | 47 | } | |
| 45 | 48 | } else { | |
| 46 | 49 | (None, input) |
| @@ -1,10 +1,24 @@ | |||
| 1 | + | //! Configurable markdown-to-HTML rendering with sanitization presets. | |
| 2 | + | //! | |
| 3 | + | //! Provides four rendering presets for different trust levels: | |
| 4 | + | //! - **Permissive** -- full GFM (tables, footnotes, images, raw HTML). For trusted content. | |
| 5 | + | //! - **Standard** -- GFM without images. For app text fields. | |
| 6 | + | //! - **Strict** -- no images, no raw HTML, dangerous scheme filtering, nofollow. For UGC. | |
| 7 | + | //! - **Sanitize-only** -- ammonia cleaning without markdown parsing. For external HTML. | |
| 8 | + | //! | |
| 9 | + | //! Optional features add document loading, TOML frontmatter, @mention resolution, | |
| 10 | + | //! and quote attribution post-processing. | |
| 11 | + | ||
| 1 | 12 | #[cfg(any(feature = "mentions", test))] | |
| 2 | 13 | mod code_spans; | |
| 14 | + | mod escape; | |
| 3 | 15 | mod render; | |
| 4 | 16 | mod sanitize; | |
| 5 | 17 | mod text; | |
| 6 | 18 | mod toc; | |
| 7 | 19 | ||
| 20 | + | #[cfg(feature = "directives")] | |
| 21 | + | mod directives; | |
| 8 | 22 | #[cfg(feature = "doc-loader")] | |
| 9 | 23 | mod doc_loader; | |
| 10 | 24 | #[cfg(feature = "frontmatter")] | |
| @@ -13,6 +27,8 @@ mod frontmatter; | |||
| 13 | 27 | mod mentions; | |
| 14 | 28 | #[cfg(feature = "quotes")] | |
| 15 | 29 | mod quotes; | |
| 30 | + | #[cfg(feature = "media-urls")] | |
| 31 | + | mod media_urls; | |
| 16 | 32 | ||
| 17 | 33 | // Re-export core types | |
| 18 | 34 | pub use render::{RenderResult, Renderer}; | |
| @@ -21,14 +37,18 @@ pub use text::{extract_title, reading_time_minutes, strip_first_heading, word_co | |||
| 21 | 37 | pub use toc::{TocEntry, extract_toc, render_toc_html}; | |
| 22 | 38 | ||
| 23 | 39 | // Re-export feature-gated types | |
| 40 | + | #[cfg(feature = "directives")] | |
| 41 | + | pub use directives::post_process_directives; | |
| 24 | 42 | #[cfg(feature = "doc-loader")] | |
| 25 | - | pub use doc_loader::{DocIndexEntry, DocLoader, DocLoaderConfig, DocPage}; | |
| 43 | + | pub use doc_loader::{DocIndexEntry, DocLoader, DocLoaderConfig, DocPage, DocSearchEntry}; | |
| 26 | 44 | #[cfg(feature = "frontmatter")] | |
| 27 | 45 | pub use frontmatter::{Frontmatter, parse_frontmatter}; | |
| 28 | 46 | #[cfg(feature = "mentions")] | |
| 29 | 47 | pub use mentions::{extract_mentions, resolve_mentions}; | |
| 30 | 48 | #[cfg(feature = "quotes")] | |
| 31 | 49 | pub use quotes::{QuoteAuthor, post_process_quotes}; | |
| 50 | + | #[cfg(feature = "media-urls")] | |
| 51 | + | pub use media_urls::{img_to_video, rewrite_media_paths}; | |
| 32 | 52 | ||
| 33 | 53 | /// Render markdown with the permissive preset (GFM features, default ammonia). | |
| 34 | 54 | pub fn render_permissive(markdown: &str) -> String { |
| @@ -0,0 +1,235 @@ | |||
| 1 | + | //! Pre-process and post-process markdown/HTML for media file references. | |
| 2 | + | //! | |
| 3 | + | //! Two-stage pipeline: | |
| 4 | + | //! 1. **Pre-process markdown** — rewrite `` to | |
| 5 | + | //! ``. | |
| 6 | + | //! 2. **Post-process HTML** — convert `<img src="...file.mp4">` to | |
| 7 | + | //! `<video controls src="..."></video>`. | |
| 8 | + | ||
| 9 | + | use std::sync::LazyLock; | |
| 10 | + | ||
| 11 | + | /// Matches markdown image syntax: `` | |
| 12 | + | /// Captures: group 1 = alt text, group 2 = URL path | |
| 13 | + | static MD_IMAGE_RE: LazyLock<regex_lite::Regex> = LazyLock::new(|| { | |
| 14 | + | regex_lite::Regex::new(r"!\[([^\]]*)\]\(([^)]+)\)").expect("valid markdown image regex") | |
| 15 | + | }); | |
| 16 | + | ||
| 17 | + | /// Matches `<img` tags with a src pointing to a video extension. | |
| 18 | + | static IMG_VIDEO_RE: LazyLock<regex_lite::Regex> = LazyLock::new(|| { | |
| 19 | + | regex_lite::Regex::new( | |
| 20 | + | r#"<img\s+([^>]*?)src="([^"]*\.(?:mp4|webm|mov))"([^>]*?)\s*/?>"#, | |
| 21 | + | ) | |
| 22 | + | .expect("valid img video regex") | |
| 23 | + | }); | |
| 24 | + | ||
| 25 | + | /// Matches `alt="..."` in an img tag's attributes. | |
| 26 | + | static ALT_RE: LazyLock<regex_lite::Regex> = LazyLock::new(|| { | |
| 27 | + | regex_lite::Regex::new(r#"alt="([^"]*)""#).expect("valid alt regex") | |
| 28 | + | }); | |
| 29 | + | ||
| 30 | + | /// Rewrite relative image paths in markdown to absolute CDN URLs. | |
| 31 | + | /// | |
| 32 | + | /// Skips: | |
| 33 | + | /// - Absolute URLs (`http://`, `https://`, `data:`) | |
| 34 | + | /// - Absolute paths starting with `/` | |
| 35 | + | /// - Paths containing `..` (path traversal) | |
| 36 | + | /// | |
| 37 | + | /// Rewrites relative paths to: `{cdn_base}/{user_id}/media/{path}` | |
| 38 | + | pub fn rewrite_media_paths(markdown: &str, cdn_base: &str, user_id: &str) -> String { | |
| 39 | + | let cdn_base = cdn_base.trim_end_matches('/'); | |
| 40 | + | ||
| 41 | + | MD_IMAGE_RE | |
| 42 | + | .replace_all(markdown, |caps: ®ex_lite::Captures| { | |
| 43 | + | let alt = &caps[1]; | |
| 44 | + | let path = &caps[2]; | |
| 45 | + | ||
| 46 | + | // Skip absolute URLs and data URIs | |
| 47 | + | if path.starts_with("http://") | |
| 48 | + | || path.starts_with("https://") | |
| 49 | + | || path.starts_with("data:") | |
| 50 | + | || path.starts_with('/') | |
| 51 | + | { | |
| 52 | + | return caps[0].to_string(); | |
| 53 | + | } | |
| 54 | + | ||
| 55 | + | // Reject path traversal | |
| 56 | + | if path.contains("..") { | |
| 57 | + | return caps[0].to_string(); | |
| 58 | + | } | |
| 59 | + | ||
| 60 | + | format!("", alt, cdn_base, user_id, path) | |
| 61 | + | }) | |
| 62 | + | .into_owned() | |
| 63 | + | } | |
| 64 | + | ||
| 65 | + | /// Convert `<img>` tags with video extensions (.mp4, .webm, .mov) to `<video>` elements. | |
| 66 | + | /// | |
| 67 | + | /// Preserves alt text as fallback content inside the `<video>` tag. | |
| 68 | + | pub fn img_to_video(html: &str) -> String { | |
| 69 | + | IMG_VIDEO_RE | |
| 70 | + | .replace_all(html, |caps: ®ex_lite::Captures| { | |
| 71 | + | let before_src = &caps[1]; | |
| 72 | + | let src = &caps[2]; | |
| 73 | + | let after_src = &caps[3]; | |
| 74 | + | ||
| 75 | + | // Extract alt text if present | |
| 76 | + | let attrs = format!("{}{}", before_src, after_src); | |
| 77 | + | let alt = ALT_RE | |
| 78 | + | .captures(&attrs) | |
| 79 | + | .map(|c| c[1].to_string()) | |
| 80 | + | .unwrap_or_default(); | |
| 81 | + | ||
| 82 | + | if alt.is_empty() { | |
| 83 | + | format!(r#"<video controls src="{}">Your browser does not support video.</video>"#, src) | |
| 84 | + | } else { | |
| 85 | + | format!( | |
| 86 | + | r#"<video controls src="{}">{}</video>"#, | |
| 87 | + | src, | |
| 88 | + | crate::escape::html_escape(&alt) | |
| 89 | + | ) | |
| 90 | + | } | |
| 91 | + | }) | |
| 92 | + | .into_owned() | |
| 93 | + | } | |
| 94 | + | ||
| 95 | + | #[cfg(test)] | |
| 96 | + | mod tests { | |
| 97 | + | use super::*; | |
| 98 | + | ||
| 99 | + | #[test] | |
| 100 | + | fn relative_path_rewritten() { | |
| 101 | + | let md = ""; | |
| 102 | + | let result = rewrite_media_paths(md, "https://cdn.makenot.work", "user-123"); | |
| 103 | + | assert_eq!( | |
| 104 | + | result, | |
| 105 | + | "" | |
| 106 | + | ); | |
| 107 | + | } | |
| 108 | + | ||
| 109 | + | #[test] | |
| 110 | + | fn absolute_url_unchanged() { | |
| 111 | + | let md = ""; | |
| 112 | + | let result = rewrite_media_paths(md, "https://cdn.makenot.work", "user-123"); | |
| 113 | + | assert_eq!(result, md); | |
| 114 | + | } | |
| 115 | + | ||
| 116 | + | #[test] | |
| 117 | + | fn http_url_unchanged() { | |
| 118 | + | let md = ""; | |
| 119 | + | let result = rewrite_media_paths(md, "https://cdn.makenot.work", "user-123"); | |
| 120 | + | assert_eq!(result, md); | |
| 121 | + | } | |
| 122 | + | ||
| 123 | + | #[test] | |
| 124 | + | fn data_uri_unchanged() { | |
| 125 | + | let md = ""; | |
| 126 | + | let result = rewrite_media_paths(md, "https://cdn.makenot.work", "user-123"); | |
| 127 | + | assert_eq!(result, md); | |
| 128 | + | } | |
| 129 | + | ||
| 130 | + | #[test] | |
| 131 | + | fn absolute_path_unchanged() { | |
| 132 | + | let md = ""; | |
| 133 | + | let result = rewrite_media_paths(md, "https://cdn.makenot.work", "user-123"); | |
| 134 | + | assert_eq!(result, md); | |
| 135 | + | } | |
| 136 | + | ||
| 137 | + | #[test] | |
| 138 | + | fn path_traversal_unchanged() { | |
| 139 | + | let md = ""; | |
| 140 | + | let result = rewrite_media_paths(md, "https://cdn.makenot.work", "user-123"); | |
| 141 | + | assert_eq!(result, md); | |
| 142 | + | } | |
| 143 | + | ||
| 144 | + | #[test] | |
| 145 | + | fn root_folder_file() { | |
| 146 | + | let md = ""; | |
| 147 | + | let result = rewrite_media_paths(md, "https://cdn.makenot.work", "user-123"); | |
| 148 | + | assert_eq!( | |
| 149 | + | result, | |
| 150 | + | "" | |
| 151 | + | ); | |
| 152 | + | } | |
| 153 | + | ||
| 154 | + | #[test] | |
| 155 | + | fn cdn_base_trailing_slash_stripped() { | |
| 156 | + | let md = ""; | |
| 157 | + | let result = rewrite_media_paths(md, "https://cdn.makenot.work/", "user-123"); | |
| 158 | + | assert_eq!( | |
| 159 | + | result, | |
| 160 | + | "" | |
| 161 | + | ); | |
| 162 | + | } | |
| 163 | + | ||
| 164 | + | #[test] | |
| 165 | + | fn video_extension_to_video_tag() { | |
| 166 | + | let html = r#"<img src="https://cdn.makenot.work/u/media/demo.mp4" alt="Demo">"#; | |
| 167 | + | let result = img_to_video(html); | |
| 168 | + | assert!(result.contains("<video controls")); | |
| 169 | + | assert!(result.contains(r#"src="https://cdn.makenot.work/u/media/demo.mp4""#)); | |
| 170 | + | assert!(result.contains("Demo")); | |
| 171 | + | assert!(result.contains("</video>")); | |
| 172 | + | assert!(!result.contains("<img")); | |
| 173 | + | } | |
| 174 | + | ||
| 175 | + | #[test] | |
| 176 | + | fn non_video_image_unchanged() { | |
| 177 | + | let html = r#"<img src="https://cdn.makenot.work/u/media/photo.png" alt="Photo">"#; | |
| 178 | + | let result = img_to_video(html); | |
| 179 | + | assert_eq!(result, html); | |
| 180 | + | } | |
| 181 | + | ||
| 182 | + | #[test] | |
| 183 | + | fn webm_converted() { | |
| 184 | + | let html = r#"<img src="clip.webm">"#; | |
| 185 | + | let result = img_to_video(html); | |
| 186 | + | assert!(result.contains("<video controls")); | |
| 187 | + | assert!(result.contains("</video>")); | |
| 188 | + | } | |
| 189 | + | ||
| 190 | + | #[test] | |
| 191 | + | fn mov_converted() { | |
| 192 | + | let html = r#"<img src="clip.mov" alt="Clip">"#; | |
| 193 | + | let result = img_to_video(html); | |
| 194 | + | assert!(result.contains("<video controls")); | |
| 195 | + | assert!(result.contains("Clip")); | |
| 196 | + | } | |
| 197 | + | ||
| 198 | + | #[test] | |
| 199 | + | fn mixed_content() { | |
| 200 | + | let md = "Text before\n\n\n\nMore text\n\n\n\n"; | |
| 201 | + | let rewritten = rewrite_media_paths(md, "https://cdn.makenot.work", "u1"); | |
| 202 | + | assert!(rewritten.contains("https://cdn.makenot.work/u1/media/folder/img.png")); | |
| 203 | + | assert!(rewritten.contains("https://cdn.makenot.work/u1/media/folder/vid.mp4")); | |
| 204 | + | assert!(rewritten.contains("https://example.com/pic.jpg")); | |
| 205 | + | } | |
| 206 | + | ||
| 207 | + | #[test] | |
| 208 | + | fn empty_alt_text() { | |
| 209 | + | let md = ""; | |
| 210 | + | let result = rewrite_media_paths(md, "https://cdn.makenot.work", "u1"); | |
| 211 | + | assert_eq!( | |
| 212 | + | result, | |
| 213 | + | "" | |
| 214 | + | ); | |
| 215 | + | } | |
| 216 | + | ||
| 217 | + | #[test] | |
| 218 | + | fn video_tag_no_alt() { | |
| 219 | + | let html = r#"<img src="demo.mp4">"#; | |
| 220 | + | let result = img_to_video(html); | |
| 221 | + | assert!(result.contains("Your browser does not support video.")); | |
| 222 | + | } | |
| 223 | + | ||
| 224 | + | #[test] | |
| 225 | + | fn multiple_images_in_html() { | |
| 226 | + | let html = r#"<img src="a.png" alt="A"><img src="b.mp4" alt="B"><img src="c.webm">"#; | |
| 227 | + | let result = img_to_video(html); | |
| 228 | + | // a.png stays as img | |
| 229 | + | assert!(result.contains(r#"<img src="a.png""#)); | |
| 230 | + | // b.mp4 becomes video | |
| 231 | + | assert!(result.contains(r#"<video controls src="b.mp4">B</video>"#)); | |
| 232 | + | // c.webm becomes video | |
| 233 | + | assert!(result.contains(r#"<video controls src="c.webm">"#)); | |
| 234 | + | } | |
| 235 | + | } |
| @@ -1,5 +1,7 @@ | |||
| 1 | 1 | use std::collections::HashMap; | |
| 2 | 2 | ||
| 3 | + | use crate::escape::html_escape; | |
| 4 | + | ||
| 3 | 5 | /// Quote author info for attribution rendering. | |
| 4 | 6 | pub struct QuoteAuthor { | |
| 5 | 7 | pub username: String, | |
| @@ -7,15 +9,6 @@ pub struct QuoteAuthor { | |||
| 7 | 9 | pub is_removed: bool, | |
| 8 | 10 | } | |
| 9 | 11 | ||
| 10 | - | /// HTML-escape a string for safe interpolation into raw HTML. | |
| 11 | - | fn html_escape(s: &str) -> String { | |
| 12 | - | s.replace('&', "&") | |
| 13 | - | .replace('<', "<") | |
| 14 | - | .replace('>', ">") | |
| 15 | - | .replace('"', """) | |
| 16 | - | .replace('\'', "'") | |
| 17 | - | } | |
| 18 | - | ||
| 19 | 12 | /// Post-process rendered HTML to replace `[quote:POST_ID:HASH]` markers with | |
| 20 | 13 | /// clickable author attribution. | |
| 21 | 14 | pub fn post_process_quotes( |
| @@ -3,7 +3,8 @@ | |||
| 3 | 3 | pub enum SanitizePreset { | |
| 4 | 4 | /// Default ammonia settings. Allows most safe HTML. | |
| 5 | 5 | Permissive, | |
| 6 | - | /// Default ammonia settings. Same as Permissive. | |
| 6 | + | /// Default ammonia settings (same sanitization as Permissive; the difference | |
| 7 | + | /// is at the Renderer level — Standard strips images, Permissive doesn't). | |
| 7 | 8 | Standard, | |
| 8 | 9 | /// Adds `rel="noopener noreferrer nofollow"` to all links. | |
| 9 | 10 | Strict, |
| @@ -1,5 +1,7 @@ | |||
| 1 | 1 | use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd}; | |
| 2 | 2 | ||
| 3 | + | use crate::escape::html_escape; | |
| 4 | + | ||
| 3 | 5 | /// A single entry in a table of contents. | |
| 4 | 6 | #[derive(Debug, Clone, PartialEq, Eq)] | |
| 5 | 7 | pub struct TocEntry { | |
| @@ -57,7 +59,7 @@ pub fn render_toc_html(entries: &[TocEntry]) -> String { | |||
| 57 | 59 | html.push_str(&format!( | |
| 58 | 60 | "<li class=\"toc-h{}\"><a href=\"#{}\">{}</a></li>\n", | |
| 59 | 61 | entry.level, | |
| 60 | - | html_escape_attr(&entry.anchor), | |
| 62 | + | html_escape(&entry.anchor), | |
| 61 | 63 | html_escape(&entry.text), | |
| 62 | 64 | )); | |
| 63 | 65 | } | |
| @@ -75,19 +77,6 @@ fn make_anchor(text: &str) -> String { | |||
| 75 | 77 | .collect() | |
| 76 | 78 | } | |
| 77 | 79 | ||
| 78 | - | fn html_escape(s: &str) -> String { | |
| 79 | - | s.replace('&', "&") | |
| 80 | - | .replace('<', "<") | |
| 81 | - | .replace('>', ">") | |
| 82 | - | } | |
| 83 | - | ||
| 84 | - | fn html_escape_attr(s: &str) -> String { | |
| 85 | - | s.replace('&', "&") | |
| 86 | - | .replace('"', """) | |
| 87 | - | .replace('<', "<") | |
| 88 | - | .replace('>', ">") | |
| 89 | - | } | |
| 90 | - | ||
| 91 | 80 | #[cfg(test)] | |
| 92 | 81 | mod tests { | |
| 93 | 82 | use super::*; |