Skip to main content

max / makenotwork

7.9 KB · 236 lines History Blame Raw
1 //! Pre-process and post-process markdown/HTML for media file references.
2 //!
3 //! Two-stage pipeline:
4 //! 1. **Pre-process markdown**: rewrite `![alt](folder/file.png)` to
5 //! `![alt](https://cdn.makenot.work/{user_id}/media/folder/file.png)`.
6 //! 2. **Post-process HTML**: convert `<img src="...file.mp4">` to
7 //! `<video controls src="..."></video>`.
8
9 use std::sync::LazyLock;
10
11 /// Matches markdown image syntax: `![alt text](url)`
12 /// Captures: group 1 = alt text, group 2 = URL path
13 static MD_IMAGE_RE: LazyLock<regex_lite::Regex> = LazyLock::new(|| {
14 regex_lite::Regex::new(r"!\[([^\]]*)\]\(([^)]+)\)").expect("valid markdown image regex")
15 });
16
17 /// Matches `<img` tags with a src pointing to a video extension.
18 static IMG_VIDEO_RE: LazyLock<regex_lite::Regex> = LazyLock::new(|| {
19 regex_lite::Regex::new(
20 r#"<img\s+([^>]*?)src="([^"]*\.(?:mp4|webm|mov))"([^>]*?)\s*/?>"#,
21 )
22 .expect("valid img video regex")
23 });
24
25 /// Matches `alt="..."` in an img tag's attributes.
26 static ALT_RE: LazyLock<regex_lite::Regex> = LazyLock::new(|| {
27 regex_lite::Regex::new(r#"alt="([^"]*)""#).expect("valid alt regex")
28 });
29
30 /// Rewrite relative image paths in markdown to absolute CDN URLs.
31 ///
32 /// Skips:
33 /// - Absolute URLs (`http://`, `https://`, `data:`)
34 /// - Absolute paths starting with `/`
35 /// - Paths containing `..` (path traversal)
36 ///
37 /// Rewrites relative paths to: `{cdn_base}/{user_id}/media/{path}`
38 pub fn rewrite_media_paths(markdown: &str, cdn_base: &str, user_id: &str) -> String {
39 let cdn_base = cdn_base.trim_end_matches('/');
40
41 MD_IMAGE_RE
42 .replace_all(markdown, |caps: &regex_lite::Captures| {
43 let alt = &caps[1];
44 let path = &caps[2];
45
46 // Skip absolute URLs and data URIs
47 if path.starts_with("http://")
48 || path.starts_with("https://")
49 || path.starts_with("data:")
50 || path.starts_with('/')
51 {
52 return caps[0].to_string();
53 }
54
55 // Reject path traversal
56 if path.contains("..") {
57 return caps[0].to_string();
58 }
59
60 format!("![{}]({}/{}/media/{})", alt, cdn_base, user_id, path)
61 })
62 .into_owned()
63 }
64
65 /// Convert `<img>` tags with video extensions (.mp4, .webm, .mov) to `<video>` elements.
66 ///
67 /// Preserves alt text as fallback content inside the `<video>` tag.
68 pub fn img_to_video(html: &str) -> String {
69 IMG_VIDEO_RE
70 .replace_all(html, |caps: &regex_lite::Captures| {
71 let before_src = &caps[1];
72 let src = &caps[2];
73 let after_src = &caps[3];
74
75 // Extract alt text if present
76 let attrs = format!("{}{}", before_src, after_src);
77 let alt = ALT_RE
78 .captures(&attrs)
79 .map(|c| c[1].to_string())
80 .unwrap_or_default();
81
82 if alt.is_empty() {
83 format!(r#"<video controls src="{}">Your browser does not support video.</video>"#, src)
84 } else {
85 format!(
86 r#"<video controls src="{}">{}</video>"#,
87 src,
88 crate::escape::html_escape(&alt)
89 )
90 }
91 })
92 .into_owned()
93 }
94
95 #[cfg(test)]
96 mod tests {
97 use super::*;
98
99 #[test]
100 fn relative_path_rewritten() {
101 let md = "![Screenshot](screenshots/demo.png)";
102 let result = rewrite_media_paths(md, "https://cdn.makenot.work", "user-123");
103 assert_eq!(
104 result,
105 "![Screenshot](https://cdn.makenot.work/user-123/media/screenshots/demo.png)"
106 );
107 }
108
109 #[test]
110 fn absolute_url_unchanged() {
111 let md = "![Logo](https://example.com/logo.png)";
112 let result = rewrite_media_paths(md, "https://cdn.makenot.work", "user-123");
113 assert_eq!(result, md);
114 }
115
116 #[test]
117 fn http_url_unchanged() {
118 let md = "![Logo](http://example.com/logo.png)";
119 let result = rewrite_media_paths(md, "https://cdn.makenot.work", "user-123");
120 assert_eq!(result, md);
121 }
122
123 #[test]
124 fn data_uri_unchanged() {
125 let md = "![Pixel](data:image/png;base64,abc)";
126 let result = rewrite_media_paths(md, "https://cdn.makenot.work", "user-123");
127 assert_eq!(result, md);
128 }
129
130 #[test]
131 fn absolute_path_unchanged() {
132 let md = "![Doc](/static/images/docs/setup.png)";
133 let result = rewrite_media_paths(md, "https://cdn.makenot.work", "user-123");
134 assert_eq!(result, md);
135 }
136
137 #[test]
138 fn path_traversal_unchanged() {
139 let md = "![Hack](../../../etc/passwd)";
140 let result = rewrite_media_paths(md, "https://cdn.makenot.work", "user-123");
141 assert_eq!(result, md);
142 }
143
144 #[test]
145 fn root_folder_file() {
146 let md = "![Photo](photo.jpg)";
147 let result = rewrite_media_paths(md, "https://cdn.makenot.work", "user-123");
148 assert_eq!(
149 result,
150 "![Photo](https://cdn.makenot.work/user-123/media/photo.jpg)"
151 );
152 }
153
154 #[test]
155 fn cdn_base_trailing_slash_stripped() {
156 let md = "![Img](img.png)";
157 let result = rewrite_media_paths(md, "https://cdn.makenot.work/", "user-123");
158 assert_eq!(
159 result,
160 "![Img](https://cdn.makenot.work/user-123/media/img.png)"
161 );
162 }
163
164 #[test]
165 fn video_extension_to_video_tag() {
166 let html = r#"<img src="https://cdn.makenot.work/u/media/demo.mp4" alt="Demo">"#;
167 let result = img_to_video(html);
168 assert!(result.contains("<video controls"));
169 assert!(result.contains(r#"src="https://cdn.makenot.work/u/media/demo.mp4""#));
170 assert!(result.contains("Demo"));
171 assert!(result.contains("</video>"));
172 assert!(!result.contains("<img"));
173 }
174
175 #[test]
176 fn non_video_image_unchanged() {
177 let html = r#"<img src="https://cdn.makenot.work/u/media/photo.png" alt="Photo">"#;
178 let result = img_to_video(html);
179 assert_eq!(result, html);
180 }
181
182 #[test]
183 fn webm_converted() {
184 let html = r#"<img src="clip.webm">"#;
185 let result = img_to_video(html);
186 assert!(result.contains("<video controls"));
187 assert!(result.contains("</video>"));
188 }
189
190 #[test]
191 fn mov_converted() {
192 let html = r#"<img src="clip.mov" alt="Clip">"#;
193 let result = img_to_video(html);
194 assert!(result.contains("<video controls"));
195 assert!(result.contains("Clip"));
196 }
197
198 #[test]
199 fn mixed_content() {
200 let md = "Text before\n\n![Img](folder/img.png)\n\nMore text\n\n![Vid](folder/vid.mp4)\n\n![External](https://example.com/pic.jpg)";
201 let rewritten = rewrite_media_paths(md, "https://cdn.makenot.work", "u1");
202 assert!(rewritten.contains("https://cdn.makenot.work/u1/media/folder/img.png"));
203 assert!(rewritten.contains("https://cdn.makenot.work/u1/media/folder/vid.mp4"));
204 assert!(rewritten.contains("https://example.com/pic.jpg"));
205 }
206
207 #[test]
208 fn empty_alt_text() {
209 let md = "![](photo.jpg)";
210 let result = rewrite_media_paths(md, "https://cdn.makenot.work", "u1");
211 assert_eq!(
212 result,
213 "![](https://cdn.makenot.work/u1/media/photo.jpg)"
214 );
215 }
216
217 #[test]
218 fn video_tag_no_alt() {
219 let html = r#"<img src="demo.mp4">"#;
220 let result = img_to_video(html);
221 assert!(result.contains("Your browser does not support video."));
222 }
223
224 #[test]
225 fn multiple_images_in_html() {
226 let html = r#"<img src="a.png" alt="A"><img src="b.mp4" alt="B"><img src="c.webm">"#;
227 let result = img_to_video(html);
228 // a.png stays as img
229 assert!(result.contains(r#"<img src="a.png""#));
230 // b.mp4 becomes video
231 assert!(result.contains(r#"<video controls src="b.mp4">B</video>"#));
232 // c.webm becomes video
233 assert!(result.contains(r#"<video controls src="c.webm">"#));
234 }
235 }
236