//! Custom Pages sanitization (Phase 1 foundation). //! //! Creators author raw HTML and CSS for their profile and project pages. This //! module turns that input into safe, closed-system page content: no scripting, //! no off-platform references, and CSS that cannot escape the user canvas to //! touch platform chrome. See `plans/custom-pages.md` for the full design. //! //! Three layers, one gate: //! - [`url_filter`] — the single rule that every URL (HTML attribute or CSS //! `url()`) must resolve to MNW itself. //! - [`html_sanitizer`] — an `ammonia` allowlist (structure, text, media; no //! script/embed/form/inline-style). //! - [`css_sanitizer`] — a `lightningcss` pass that scopes all selectors to the //! canvas, filters at-rules, validates `url()`, and strips system-slot hiding. //! //! Sanitization is **render-time**, not write-time: the editor stores the //! creator's *raw* HTML/CSS, and `sanitize_page` runs on every render of the //! public page (on the cookieless, `default-src 'none'` host). The save path //! runs the sanitizer only to *count* what would be stripped, for the editor's //! blocked-references panel — it does not persist sanitized output. So the XSS //! boundary is the render call, not the database: never inline stored //! `custom_html`/`custom_css` anywhere without running them through this module //! first. mod css_sanitizer; mod html_sanitizer; mod url_filter; pub use css_sanitizer::{sanitize_css, sanitize_item_css}; pub use html_sanitizer::sanitize_html; pub use url_filter::UrlPolicy; /// Why a single reference was stripped. Surfaced in the editor's /// blocked-references panel -- the primary teaching surface for the /// closed-system rule. #[derive(Debug, Clone, PartialEq, Eq)] pub enum RejectionKind { /// URL resolved to an off-platform host. ExternalUrl, /// URL carried a non-https scheme (`data:`, `javascript:`, `mailto:`, ...). DisallowedScheme, /// URL could not be parsed. MalformedUrl, /// A CSS at-rule outside the allowlist (`@import`, `@namespace`, ...). BlockedAtRule, /// A dangerous CSS function (`expression()`). BlockedFunction, /// A property that would hide a non-removable system slot (`.mnw-*`). HidingProperty, /// A fast infinite animation (strobe guard). AnimationBudget, /// Stylesheet exceeded a complexity cap (DoS guard). ComplexityLimit, /// CSS that could not be parsed at all. MalformedCss, } /// One stripped reference, with enough context for the editor to point at it. #[derive(Debug, Clone, PartialEq, Eq)] pub struct Rejection { pub kind: RejectionKind, /// Human-readable origin, e.g. `"img src"`, `"css url()"`, `"@import"`. pub location: String, /// The value as the creator wrote it. pub original_value: String, /// One-line explanation shown to the creator. pub reason: String, } /// Maximum style rules in a sanitized sheet (quadratic-matching DoS guard). /// Far above any reasonable page. pub(crate) const MAX_RULES: usize = 5000; /// Maximum selectors across a sanitized sheet. pub(crate) const MAX_SELECTORS: usize = 10000; /// Sanitize a full custom page (HTML + CSS together). /// /// `owner_scope` is the id woven into the canvas selector /// `.user-canvas#uc-{owner_scope}` that all CSS is confined to -- pass the /// owner's UUID. Returns sanitized HTML, sanitized CSS, and every reference the /// sanitizer stripped (deduplicated only by being appended in order). pub fn sanitize_page( html: &str, css: &str, owner_scope: &str, policy: &UrlPolicy, ) -> (String, String, Vec) { let (clean_html, mut rejections) = sanitize_html(html, policy); let (clean_css, css_rejections) = sanitize_css(css, owner_scope, policy); rejections.extend(css_rejections); (clean_html, clean_css, rejections) } #[cfg(test)] mod tests { use super::*; fn policy() -> UrlPolicy { UrlPolicy::new( "https://u.makenot.work/alice/proj", ["makenot.work".to_string(), "u.makenot.work".to_string(), "cdn.makenot.work".to_string()], ) .unwrap() } #[test] fn page_sanitizes_both_and_collects_rejections() { let (html, css, rej) = sanitize_page( "

hi

", "body { color: red } .x { background: url(https://evil.com/y) }", "11111111-1111-1111-1111-111111111111", &policy(), ); assert!(html.contains("hi")); assert!(!html.contains("evil")); // CSS is scoped to the canvas. assert!(css.contains(".user-canvas#uc-11111111-1111-1111-1111-111111111111")); // body got neutralized into the canvas; off-platform url stripped. assert!(!css.contains("evil.com")); // At least the two external refs were recorded. assert!(rej.len() >= 2, "expected rejections, got {rej:?}"); } }