//! Pre-process and post-process markdown/HTML for media file references. //! //! Two-stage pipeline: //! 1. **Pre-process markdown**: rewrite `![alt](folder/file.png)` to //! `![alt](https://cdn.makenot.work/{user_id}/media/folder/file.png)`. //! 2. **Post-process HTML**: convert `` to //! ``. use std::sync::LazyLock; /// Matches markdown image syntax: `![alt text](url)` /// Captures: group 1 = alt text, group 2 = URL path static MD_IMAGE_RE: LazyLock = LazyLock::new(|| { regex_lite::Regex::new(r"!\[([^\]]*)\]\(([^)]+)\)").expect("valid markdown image regex") }); /// Matches ` = LazyLock::new(|| { regex_lite::Regex::new( r#"]*?)src="([^"]*\.(?:mp4|webm|mov))"([^>]*?)\s*/?>"#, ) .expect("valid img video regex") }); /// Matches `alt="..."` in an img tag's attributes. static ALT_RE: LazyLock = LazyLock::new(|| { regex_lite::Regex::new(r#"alt="([^"]*)""#).expect("valid alt regex") }); /// Rewrite relative image paths in markdown to absolute CDN URLs. /// /// Skips: /// - Absolute URLs (`http://`, `https://`, `data:`) /// - Absolute paths starting with `/` /// - Paths containing `..` (path traversal) /// /// Rewrites relative paths to: `{cdn_base}/{user_id}/media/{path}` pub fn rewrite_media_paths(markdown: &str, cdn_base: &str, user_id: &str) -> String { let cdn_base = cdn_base.trim_end_matches('/'); MD_IMAGE_RE .replace_all(markdown, |caps: ®ex_lite::Captures| { let alt = &caps[1]; let path = &caps[2]; // Skip absolute URLs and data URIs if path.starts_with("http://") || path.starts_with("https://") || path.starts_with("data:") || path.starts_with('/') { return caps[0].to_string(); } // Reject path traversal if path.contains("..") { return caps[0].to_string(); } format!("![{}]({}/{}/media/{})", alt, cdn_base, user_id, path) }) .into_owned() } /// Convert `` tags with video extensions (.mp4, .webm, .mov) to `