//! Pre-process and post-process markdown/HTML for media file references.
//!
//! Two-stage pipeline:
//! 1. **Pre-process markdown** — rewrite `` to
//! ``.
//! 2. **Post-process HTML** — convert `
` to
//! ``.
use std::sync::LazyLock;
/// Matches markdown image syntax: ``
/// Captures: group 1 = alt text, group 2 = URL path
static MD_IMAGE_RE: LazyLock = LazyLock::new(|| {
regex_lite::Regex::new(r"!\[([^\]]*)\]\(([^)]+)\)").expect("valid markdown image regex")
});
/// Matches `
= LazyLock::new(|| {
regex_lite::Regex::new(
r#"
]*?)src="([^"]*\.(?:mp4|webm|mov))"([^>]*?)\s*/?>"#,
)
.expect("valid img video regex")
});
/// Matches `alt="..."` in an img tag's attributes.
static ALT_RE: LazyLock = LazyLock::new(|| {
regex_lite::Regex::new(r#"alt="([^"]*)""#).expect("valid alt regex")
});
/// Rewrite relative image paths in markdown to absolute CDN URLs.
///
/// Skips:
/// - Absolute URLs (`http://`, `https://`, `data:`)
/// - Absolute paths starting with `/`
/// - Paths containing `..` (path traversal)
///
/// Rewrites relative paths to: `{cdn_base}/{user_id}/media/{path}`
pub fn rewrite_media_paths(markdown: &str, cdn_base: &str, user_id: &str) -> String {
let cdn_base = cdn_base.trim_end_matches('/');
MD_IMAGE_RE
.replace_all(markdown, |caps: ®ex_lite::Captures| {
let alt = &caps[1];
let path = &caps[2];
// Skip absolute URLs and data URIs
if path.starts_with("http://")
|| path.starts_with("https://")
|| path.starts_with("data:")
|| path.starts_with('/')
{
return caps[0].to_string();
}
// Reject path traversal
if path.contains("..") {
return caps[0].to_string();
}
format!("", alt, cdn_base, user_id, path)
})
.into_owned()
}
/// Convert `
` tags with video extensions (.mp4, .webm, .mov) to `