Skip to main content

max / makenotwork

4.8 KB · 124 lines History Blame Raw
1 //! Custom Pages sanitization (Phase 1 foundation).
2 //!
3 //! Creators author raw HTML and CSS for their profile and project pages. This
4 //! module turns that input into safe, closed-system page content: no scripting,
5 //! no off-platform references, and CSS that cannot escape the user canvas to
6 //! touch platform chrome. See `plans/custom-pages.md` for the full design.
7 //!
8 //! Three layers, one gate:
9 //! - [`url_filter`] — the single rule that every URL (HTML attribute or CSS
10 //! `url()`) must resolve to MNW itself.
11 //! - [`html_sanitizer`] — an `ammonia` allowlist (structure, text, media; no
12 //! script/embed/form/inline-style).
13 //! - [`css_sanitizer`] — a `lightningcss` pass that scopes all selectors to the
14 //! canvas, filters at-rules, validates `url()`, and strips system-slot hiding.
15 //!
16 //! Sanitization is **render-time**, not write-time: the editor stores the
17 //! creator's *raw* HTML/CSS, and `sanitize_page` runs on every render of the
18 //! public page (on the cookieless, `default-src 'none'` host). The save path
19 //! runs the sanitizer only to *count* what would be stripped, for the editor's
20 //! blocked-references panel — it does not persist sanitized output. So the XSS
21 //! boundary is the render call, not the database: never inline stored
22 //! `custom_html`/`custom_css` anywhere without running them through this module
23 //! first.
24
25 mod css_sanitizer;
26 mod html_sanitizer;
27 mod url_filter;
28
29 pub use css_sanitizer::{sanitize_css, sanitize_item_css};
30 pub use html_sanitizer::sanitize_html;
31 pub use url_filter::UrlPolicy;
32
33 /// Why a single reference was stripped. Surfaced in the editor's
34 /// blocked-references panel -- the primary teaching surface for the
35 /// closed-system rule.
36 #[derive(Debug, Clone, PartialEq, Eq)]
37 pub enum RejectionKind {
38 /// URL resolved to an off-platform host.
39 ExternalUrl,
40 /// URL carried a non-https scheme (`data:`, `javascript:`, `mailto:`, ...).
41 DisallowedScheme,
42 /// URL could not be parsed.
43 MalformedUrl,
44 /// A CSS at-rule outside the allowlist (`@import`, `@namespace`, ...).
45 BlockedAtRule,
46 /// A dangerous CSS function (`expression()`).
47 BlockedFunction,
48 /// A property that would hide a non-removable system slot (`.mnw-*`).
49 HidingProperty,
50 /// A fast infinite animation (strobe guard).
51 AnimationBudget,
52 /// Stylesheet exceeded a complexity cap (DoS guard).
53 ComplexityLimit,
54 /// CSS that could not be parsed at all.
55 MalformedCss,
56 }
57
58 /// One stripped reference, with enough context for the editor to point at it.
59 #[derive(Debug, Clone, PartialEq, Eq)]
60 pub struct Rejection {
61 pub kind: RejectionKind,
62 /// Human-readable origin, e.g. `"img src"`, `"css url()"`, `"@import"`.
63 pub location: String,
64 /// The value as the creator wrote it.
65 pub original_value: String,
66 /// One-line explanation shown to the creator.
67 pub reason: String,
68 }
69
70 /// Maximum style rules in a sanitized sheet (quadratic-matching DoS guard).
71 /// Far above any reasonable page.
72 pub(crate) const MAX_RULES: usize = 5000;
73 /// Maximum selectors across a sanitized sheet.
74 pub(crate) const MAX_SELECTORS: usize = 10000;
75
76 /// Sanitize a full custom page (HTML + CSS together).
77 ///
78 /// `owner_scope` is the id woven into the canvas selector
79 /// `.user-canvas#uc-{owner_scope}` that all CSS is confined to -- pass the
80 /// owner's UUID. Returns sanitized HTML, sanitized CSS, and every reference the
81 /// sanitizer stripped (deduplicated only by being appended in order).
82 pub fn sanitize_page(
83 html: &str,
84 css: &str,
85 owner_scope: &str,
86 policy: &UrlPolicy,
87 ) -> (String, String, Vec<Rejection>) {
88 let (clean_html, mut rejections) = sanitize_html(html, policy);
89 let (clean_css, css_rejections) = sanitize_css(css, owner_scope, policy);
90 rejections.extend(css_rejections);
91 (clean_html, clean_css, rejections)
92 }
93
94 #[cfg(test)]
95 mod tests {
96 use super::*;
97
98 fn policy() -> UrlPolicy {
99 UrlPolicy::new(
100 "https://u.makenot.work/alice/proj",
101 ["makenot.work".to_string(), "u.makenot.work".to_string(), "cdn.makenot.work".to_string()],
102 )
103 .unwrap()
104 }
105
106 #[test]
107 fn page_sanitizes_both_and_collects_rejections() {
108 let (html, css, rej) = sanitize_page(
109 "<p>hi</p><script>evil()</script><img src=\"https://evil.com/x\">",
110 "body { color: red } .x { background: url(https://evil.com/y) }",
111 "11111111-1111-1111-1111-111111111111",
112 &policy(),
113 );
114 assert!(html.contains("hi"));
115 assert!(!html.contains("evil"));
116 // CSS is scoped to the canvas.
117 assert!(css.contains(".user-canvas#uc-11111111-1111-1111-1111-111111111111"));
118 // body got neutralized into the canvas; off-platform url stripped.
119 assert!(!css.contains("evil.com"));
120 // At least the two external refs were recorded.
121 assert!(rej.len() >= 2, "expected rejections, got {rej:?}");
122 }
123 }
124