use pulldown_cmark::{CowStr, Event, Options, Parser, Tag, TagEnd, html}; use crate::sanitize::SanitizePreset; /// Returns true if the URL uses a scheme not in the safe allowlist. /// /// Safe schemes: `http`, `https`, `mailto`, `ftp`. Relative URLs (no scheme) are safe. fn has_dangerous_scheme(url: &str) -> bool { let trimmed = url.trim(); if let Some(colon_pos) = trimmed.find(':') { let before_colon = &trimmed[..colon_pos]; if before_colon.contains('/') || before_colon.contains('#') || before_colon.contains('?') { return false; } let scheme = before_colon.to_ascii_lowercase(); !matches!(scheme.as_str(), "http" | "https" | "mailto" | "ftp") } else { false } } /// Result of rendering markdown with metadata. #[derive(Debug, Clone)] pub struct RenderResult { pub html: String, pub word_count: u32, pub reading_time_minutes: u32, } /// Configurable markdown renderer with builder pattern. pub struct Renderer { tables: bool, strikethrough: bool, footnotes: bool, smart_punctuation: bool, tasklists: bool, strip_images: bool, strip_raw_html: bool, dangerous_scheme_filter: bool, sanitize: SanitizePreset, } impl Renderer { /// GFM features, default ammonia sanitization. Suitable for trusted content /// like docs and blog posts. pub fn permissive() -> Self { Self { tables: true, strikethrough: true, footnotes: true, smart_punctuation: true, tasklists: true, strip_images: false, strip_raw_html: false, dangerous_scheme_filter: false, sanitize: SanitizePreset::Permissive, } } /// GFM features, no images. Suitable for app text fields (descriptions, /// notes). pub fn standard() -> Self { Self { tables: true, strikethrough: true, footnotes: false, smart_punctuation: true, tasklists: true, strip_images: true, strip_raw_html: false, dangerous_scheme_filter: false, sanitize: SanitizePreset::Standard, } } /// No images, no raw HTML, dangerous scheme blocking, nofollow on links. /// Suitable for user-generated content (forum posts). pub fn strict() -> Self { Self { tables: false, strikethrough: false, footnotes: false, smart_punctuation: false, tasklists: false, strip_images: true, strip_raw_html: true, dangerous_scheme_filter: true, sanitize: SanitizePreset::Strict, } } /// No markdown parsing, only ammonia sanitization. Suitable for HTML from /// external sources (RSS feeds). pub fn sanitize_only() -> Self { Self { tables: false, strikethrough: false, footnotes: false, smart_punctuation: false, tasklists: false, strip_images: false, strip_raw_html: false, dangerous_scheme_filter: false, sanitize: SanitizePreset::Permissive, } } pub fn with_tables(mut self, enabled: bool) -> Self { self.tables = enabled; self } pub fn with_strikethrough(mut self, enabled: bool) -> Self { self.strikethrough = enabled; self } pub fn with_footnotes(mut self, enabled: bool) -> Self { self.footnotes = enabled; self } pub fn with_smart_punctuation(mut self, enabled: bool) -> Self { self.smart_punctuation = enabled; self } pub fn with_tasklists(mut self, enabled: bool) -> Self { self.tasklists = enabled; self } pub fn with_strip_images(mut self, enabled: bool) -> Self { self.strip_images = enabled; self } pub fn with_strip_raw_html(mut self, enabled: bool) -> Self { self.strip_raw_html = enabled; self } pub fn with_dangerous_scheme_filter(mut self, enabled: bool) -> Self { self.dangerous_scheme_filter = enabled; self } pub fn with_sanitize(mut self, preset: SanitizePreset) -> Self { self.sanitize = preset; self } fn build_options(&self) -> Options { let mut opts = Options::empty(); if self.tables { opts.insert(Options::ENABLE_TABLES); } if self.strikethrough { opts.insert(Options::ENABLE_STRIKETHROUGH); } if self.footnotes { opts.insert(Options::ENABLE_FOOTNOTES); } if self.smart_punctuation { opts.insert(Options::ENABLE_SMART_PUNCTUATION); } if self.tasklists { opts.insert(Options::ENABLE_TASKLISTS); } opts } /// Render markdown to sanitized HTML. pub fn render(&self, input: &str) -> String { if input.is_empty() { return String::new(); } let html_output = self.render_raw(input); self.sanitize.clean(&html_output) } /// Render markdown to sanitized HTML with metadata. pub fn render_with_meta(&self, input: &str) -> RenderResult { let html = self.render(input); let wc = crate::text::word_count(input); RenderResult { html, word_count: wc, reading_time_minutes: crate::text::reading_time_minutes(wc), } } /// Sanitize pre-rendered HTML without markdown parsing. pub fn sanitize_html(&self, html: &str) -> String { self.sanitize.clean(html) } fn render_raw(&self, input: &str) -> String { let options = self.build_options(); let parser = Parser::new_ext(input, options); let strip_images = self.strip_images; let strip_raw_html = self.strip_raw_html; let scheme_filter = self.dangerous_scheme_filter; let filtered = parser.filter_map(move |event| match event { // Strip raw HTML events Event::Html(_) | Event::InlineHtml(_) if strip_raw_html => None, // Neutralize dangerous schemes on links Event::Start(Tag::Link { link_type, dest_url, title, id, }) if scheme_filter && has_dangerous_scheme(&dest_url) => { Some(Event::Start(Tag::Link { link_type, dest_url: CowStr::Borrowed("#"), title, id, })) } // Strip images entirely (alt text passes through as plain text) Event::Start(Tag::Image { .. }) | Event::End(TagEnd::Image) if strip_images => None, other => Some(other), }); let mut output = String::new(); html::push_html(&mut output, filtered); output } } #[cfg(test)] mod tests { use super::*; // ===== has_dangerous_scheme ===== #[test] fn safe_schemes() { assert!(!has_dangerous_scheme("https://example.com")); assert!(!has_dangerous_scheme("http://example.com")); assert!(!has_dangerous_scheme("mailto:user@example.com")); assert!(!has_dangerous_scheme("ftp://files.example.com")); } #[test] fn dangerous_schemes() { assert!(has_dangerous_scheme("javascript:alert(1)")); assert!(has_dangerous_scheme("data:text/html, world"); assert!(!html.contains(""); assert!(!html.contains("")); } #[test] fn strict_strips_inline_html() { let r = Renderer::strict(); let html = r.render("hello bold world"); assert!(!html.contains("")); assert!(html.contains("hello")); assert!(html.contains("world")); } #[test] fn strict_strips_images() { let r = Renderer::strict(); let html = r.render("![alt text](https://example.com/img.png)"); assert!(!html.contains("alert(1))"); assert!(!html.contains("data:text")); } #[test] fn strict_neutralizes_vbscript() { let r = Renderer::strict(); let html = r.render("[xss](vbscript:msgbox)"); assert!(!html.contains("vbscript:")); } #[test] fn strict_preserves_safe_urls() { let r = Renderer::strict(); let html = r.render("[link](https://example.com)"); assert!(html.contains(r#"href="https://example.com""#)); let html = r.render("[mail](mailto:user@example.com)"); assert!(html.contains(r#"href="mailto:user@example.com""#)); } #[test] fn strict_preserves_relative_urls() { let r = Renderer::strict(); let html = r.render("[page](/about)"); assert!(html.contains(r#"href="/about""#)); let html = r.render("[section](#heading)"); assert!(html.contains(r##"href="#heading""##)); } #[test] fn strict_links_have_nofollow() { let r = Renderer::strict(); let html = r.render("[example](https://example.com)"); assert!(result_has_rel(&html, "nofollow")); assert!(result_has_rel(&html, "noopener")); } #[test] fn strict_bold_italic() { let r = Renderer::strict(); let html = r.render("**bold** and *italic*"); assert!(html.contains("bold")); assert!(html.contains("italic")); } #[test] fn strict_inline_code() { let r = Renderer::strict(); let html = r.render("use `foo()` here"); assert!(html.contains("foo()")); } #[test] fn strict_code_block() { let r = Renderer::strict(); let html = r.render("```\nlet x = 1;\n```"); assert!(html.contains("
"));
        assert!(html.contains("let x = 1;"));
    }

    #[test]
    fn strict_blockquote() {
        let r = Renderer::strict();
        let html = r.render("> quoted text");
        assert!(html.contains("
")); assert!(html.contains("quoted text")); } #[test] fn strict_unordered_list() { let r = Renderer::strict(); let html = r.render("- item one\n- item two"); assert!(html.contains("
    ")); assert!(html.contains("
  • item one
  • ")); } #[test] fn strict_heading() { let r = Renderer::strict(); let html = r.render("## Section Title"); assert!(html.contains("

    Section Title

    ")); } #[test] fn strict_plain_text() { let r = Renderer::strict(); assert_eq!(r.render("hello world"), "

    hello world

    \n"); } #[test] fn strict_empty_input() { assert_eq!(Renderer::strict().render(""), ""); } // ===== Sanitize-only preset ===== #[test] fn sanitize_only_cleans_html() { let r = Renderer::sanitize_only(); let html = r.sanitize_html("

    Hello

    "); assert!(html.contains("

    Hello

    ")); assert!(!html.contains("