Skip to main content

max / docengine

15.5 KB · 444 lines History Blame Raw
1 //! Post-process rendered HTML to convert blockquote-based directives into
2 //! styled elements.
3 //!
4 //! **Alerts:** `> [!NOTE]`, `> [!TIP]`, `> [!WARNING]`, `> [!CAUTION]`,
5 //! `> [!IMPORTANT]`, and any custom `> [!TYPE]` marker become styled
6 //! `<div class="alert alert-{type}">` callout elements.
7 //!
8 //! **Code tabs:** `> [!TABS]` followed by fenced code blocks become a tabbed
9 //! interface with language-labelled tabs.
10
11 use std::sync::LazyLock;
12
13 /// Matches any `[!TYPE]` alert marker inside a blockquote paragraph.
14 /// Accepts any uppercase word (letters, digits, hyphens, underscores).
15 static ALERT_RE: LazyLock<regex_lite::Regex> = LazyLock::new(|| {
16 regex_lite::Regex::new(
17 r"<blockquote>\s*<p>\[!([A-Z][A-Z0-9_-]*)\](?:<br\s*/?>)?\s*",
18 )
19 .expect("valid alert regex")
20 });
21
22 /// Process all directives: code tabs first, then alerts.
23 pub fn post_process_directives(html: &str) -> String {
24 let with_tabs = process_tabs(html);
25 process_alerts(&with_tabs)
26 }
27
28 /// Replace alert blockquotes with styled `<div class="alert ...">` elements.
29 fn process_alerts(html: &str) -> String {
30 // First pass: replace opening markers.
31 let opened = ALERT_RE.replace_all(html, |caps: &regex_lite::Captures| {
32 let kind = &caps[1];
33 // Skip TABS — already handled by process_tabs.
34 if kind == "TABS" {
35 return caps[0].to_string();
36 }
37 let label = title_case(kind);
38 format!(
39 "<div class=\"alert alert-{kind}\"><p class=\"alert-title\">{label}</p><p>",
40 kind = kind.to_ascii_lowercase(),
41 label = label,
42 )
43 });
44
45 // Second pass: close any opened alerts.
46 let alert_count = ALERT_RE
47 .captures_iter(html)
48 .filter(|c| &c[1] != "TABS")
49 .count();
50 if alert_count == 0 {
51 return opened.into_owned();
52 }
53
54 let mut result = String::with_capacity(opened.len());
55 let mut remaining = opened.as_ref();
56 let mut replaced = 0;
57
58 while replaced < alert_count {
59 if let Some(pos) = remaining.find("</blockquote>") {
60 result.push_str(&remaining[..pos]);
61 result.push_str("</div>");
62 remaining = &remaining[(pos + "</blockquote>".len())..];
63 replaced += 1;
64 } else {
65 break;
66 }
67 }
68 result.push_str(remaining);
69 result
70 }
71
72 /// Process `[!TABS]` blockquotes into tabbed code-block interfaces.
73 fn process_tabs(html: &str) -> String {
74 if !html.contains("[!TABS]") {
75 return html.to_string();
76 }
77
78 let mut result = String::with_capacity(html.len());
79 let mut remaining = html;
80
81 while let Some(bq_pos) = remaining.find("<blockquote>") {
82 let after_bq_start = bq_pos + "<blockquote>".len();
83
84 // Find the closing </blockquote> for this blockquote.
85 let close_pos = match remaining[bq_pos..].find("</blockquote>") {
86 Some(p) => bq_pos + p,
87 None => break,
88 };
89
90 let inner = &remaining[after_bq_start..close_pos];
91
92 // Check if the first <p> in the blockquote contains [!TABS].
93 let is_tabs = {
94 let trimmed = inner.trim_start();
95 trimmed.starts_with("<p>") && {
96 let first_p_end = trimmed.find("</p>").unwrap_or(trimmed.len());
97 trimmed[..first_p_end].contains("[!TABS]")
98 }
99 };
100
101 if !is_tabs {
102 // Not a TABS blockquote — copy through the opening tag and continue.
103 result.push_str(&remaining[..after_bq_start]);
104 remaining = &remaining[after_bq_start..];
105 continue;
106 }
107
108 // Copy everything before this blockquote.
109 result.push_str(&remaining[..bq_pos]);
110
111 // Extract code blocks from the inner HTML.
112 let tabs = extract_code_blocks(inner);
113
114 if tabs.is_empty() {
115 // No code blocks found — wrap content in a plain div.
116 result.push_str("<div class=\"code-tabs\">");
117 result.push_str(inner);
118 result.push_str("</div>");
119 } else {
120 result.push_str(&build_tabs_html(&tabs));
121 }
122
123 remaining = &remaining[close_pos + "</blockquote>".len()..];
124 }
125
126 result.push_str(remaining);
127 result
128 }
129
130 /// Extract `(language, full_html_block)` pairs from HTML containing
131 /// `<pre><code>` elements.
132 fn extract_code_blocks(html: &str) -> Vec<(String, String)> {
133 let mut blocks = Vec::new();
134 let mut search_from = 0;
135 let end_marker = "</code></pre>";
136
137 while let Some(pre_pos) = html[search_from..].find("<pre><code") {
138 let abs_pos = search_from + pre_pos;
139
140 let end_pos = match html[abs_pos..].find(end_marker) {
141 Some(p) => abs_pos + p + end_marker.len(),
142 None => break,
143 };
144
145 let full_block = &html[abs_pos..end_pos];
146
147 // Extract language from class="language-X".
148 let lang = if let Some(class_start) = full_block.find("class=\"language-") {
149 let after = &full_block[class_start + "class=\"language-".len()..];
150 after.split('"').next().unwrap_or("code").to_string()
151 } else {
152 "code".to_string()
153 };
154
155 blocks.push((lang, full_block.to_string()));
156 search_from = end_pos;
157 }
158
159 blocks
160 }
161
162 /// Build tabbed HTML from extracted code blocks.
163 fn build_tabs_html(tabs: &[(String, String)]) -> String {
164 let mut html = String::from("<div class=\"code-tabs\">\n<div class=\"code-tabs-bar\">");
165
166 for (i, (lang, _)) in tabs.iter().enumerate() {
167 let active = if i == 0 { " active" } else { "" };
168 let label = code_language_label(lang);
169 html.push_str(&format!(
170 "<button class=\"code-tab{active}\" data-tab-index=\"{i}\">{label}</button>"
171 ));
172 }
173
174 html.push_str("</div>\n");
175
176 for (i, (_, block)) in tabs.iter().enumerate() {
177 let active = if i == 0 { " active" } else { "" };
178 html.push_str(&format!(
179 "<div class=\"code-tab-panel{active}\" data-tab-index=\"{i}\">{block}</div>\n"
180 ));
181 }
182
183 html.push_str("</div>");
184 html
185 }
186
187 /// Human-readable label for a code language identifier.
188 fn code_language_label(lang: &str) -> String {
189 match lang {
190 "js" | "javascript" => "JavaScript".into(),
191 "ts" | "typescript" => "TypeScript".into(),
192 "sh" | "bash" | "zsh" | "shell" => "Shell".into(),
193 "json" => "JSON".into(),
194 "html" => "HTML".into(),
195 "css" => "CSS".into(),
196 "sql" => "SQL".into(),
197 "toml" => "TOML".into(),
198 "yaml" | "yml" => "YAML".into(),
199 "xml" => "XML".into(),
200 other => title_case(other),
201 }
202 }
203
204 fn title_case(s: &str) -> String {
205 let mut chars = s.chars();
206 match chars.next() {
207 Some(c) => {
208 let mut out = c.to_uppercase().to_string();
209 out.extend(chars.map(|c| c.to_ascii_lowercase()));
210 out
211 }
212 None => String::new(),
213 }
214 }
215
216 #[cfg(test)]
217 mod tests {
218 use super::*;
219
220 // ===== Alert directives =====
221
222 #[test]
223 fn note_alert() {
224 let html = "<blockquote>\n<p>[!NOTE]<br>\nThis is a note.</p>\n</blockquote>";
225 let result = post_process_directives(html);
226 assert!(result.contains("alert alert-note"));
227 assert!(result.contains("<p class=\"alert-title\">Note</p>"));
228 assert!(result.contains("This is a note."));
229 assert!(!result.contains("<blockquote>"));
230 }
231
232 #[test]
233 fn tip_alert() {
234 let html = "<blockquote>\n<p>[!TIP]<br>\nHelpful tip here.</p>\n</blockquote>";
235 let result = post_process_directives(html);
236 assert!(result.contains("alert alert-tip"));
237 assert!(result.contains("<p class=\"alert-title\">Tip</p>"));
238 }
239
240 #[test]
241 fn important_alert() {
242 let html = "<blockquote>\n<p>[!IMPORTANT]<br>\nDo this.</p>\n</blockquote>";
243 let result = post_process_directives(html);
244 assert!(result.contains("alert alert-important"));
245 assert!(result.contains("<p class=\"alert-title\">Important</p>"));
246 }
247
248 #[test]
249 fn warning_alert() {
250 let html = "<blockquote>\n<p>[!WARNING]<br>\nBe careful.</p>\n</blockquote>";
251 let result = post_process_directives(html);
252 assert!(result.contains("alert alert-warning"));
253 assert!(result.contains("<p class=\"alert-title\">Warning</p>"));
254 }
255
256 #[test]
257 fn caution_alert() {
258 let html = "<blockquote>\n<p>[!CAUTION]<br/>\nDanger zone.</p>\n</blockquote>";
259 let result = post_process_directives(html);
260 assert!(result.contains("alert alert-caution"));
261 assert!(result.contains("<p class=\"alert-title\">Caution</p>"));
262 }
263
264 #[test]
265 fn multi_paragraph_alert() {
266 let html = "<blockquote>\n<p>[!NOTE]<br>\nFirst paragraph.</p>\n<p>Second paragraph.</p>\n</blockquote>";
267 let result = post_process_directives(html);
268 assert!(result.contains("alert alert-note"));
269 assert!(result.contains("First paragraph."));
270 assert!(result.contains("Second paragraph."));
271 assert!(result.contains("</div>"));
272 assert!(!result.contains("</blockquote>"));
273 }
274
275 #[test]
276 fn regular_blockquote_unchanged() {
277 let html = "<blockquote>\n<p>Just a normal quote.</p>\n</blockquote>";
278 let result = post_process_directives(html);
279 assert_eq!(result, html);
280 }
281
282 #[test]
283 fn mixed_alerts_and_blockquotes() {
284 let html = concat!(
285 "<blockquote>\n<p>[!WARNING]<br>\nWatch out!</p>\n</blockquote>\n",
286 "<blockquote>\n<p>Normal quote.</p>\n</blockquote>"
287 );
288 let result = post_process_directives(html);
289 assert!(result.contains("alert alert-warning"));
290 assert!(result.contains("Watch out!"));
291 // The normal blockquote remains unchanged.
292 assert!(result.contains("<blockquote>"));
293 assert!(result.contains("Normal quote."));
294 }
295
296 // ===== Custom alert types =====
297
298 #[test]
299 fn custom_example_alert() {
300 let html = "<blockquote>\n<p>[!EXAMPLE]<br>\nHere is an example.</p>\n</blockquote>";
301 let result = post_process_directives(html);
302 assert!(result.contains("alert alert-example"));
303 assert!(result.contains("<p class=\"alert-title\">Example</p>"));
304 assert!(result.contains("Here is an example."));
305 assert!(!result.contains("<blockquote>"));
306 }
307
308 #[test]
309 fn custom_definition_alert() {
310 let html = "<blockquote>\n<p>[!DEFINITION]<br>\nA term and its meaning.</p>\n</blockquote>";
311 let result = post_process_directives(html);
312 assert!(result.contains("alert alert-definition"));
313 assert!(result.contains("<p class=\"alert-title\">Definition</p>"));
314 }
315
316 #[test]
317 fn custom_alert_with_hyphen() {
318 let html =
319 "<blockquote>\n<p>[!SEE-ALSO]<br>\nRelated topics.</p>\n</blockquote>";
320 let result = post_process_directives(html);
321 assert!(result.contains("alert alert-see-also"));
322 assert!(result.contains("<p class=\"alert-title\">See-also</p>"));
323 }
324
325 // ===== Code tabs =====
326
327 #[test]
328 fn tabs_two_languages() {
329 let html = concat!(
330 "<blockquote>\n<p>[!TABS]</p>\n",
331 "<pre><code class=\"language-rust\">fn main() {}\n</code></pre>\n",
332 "<pre><code class=\"language-python\">def main(): pass\n</code></pre>\n",
333 "</blockquote>"
334 );
335 let result = post_process_directives(html);
336 assert!(result.contains("code-tabs"));
337 assert!(result.contains("code-tabs-bar"));
338 assert!(result.contains("Rust"));
339 assert!(result.contains("Python"));
340 assert!(result.contains("fn main() {}"));
341 assert!(result.contains("def main(): pass"));
342 assert!(!result.contains("<blockquote>"));
343 // First tab is active.
344 assert!(result.contains("code-tab active"));
345 assert!(result.contains("code-tab-panel active"));
346 }
347
348 #[test]
349 fn tabs_three_languages() {
350 let html = concat!(
351 "<blockquote>\n<p>[!TABS]</p>\n",
352 "<pre><code class=\"language-bash\">curl https://api.example.com\n</code></pre>\n",
353 "<pre><code class=\"language-js\">fetch('https://api.example.com')\n</code></pre>\n",
354 "<pre><code class=\"language-python\">requests.get('https://api.example.com')\n</code></pre>\n",
355 "</blockquote>"
356 );
357 let result = post_process_directives(html);
358 assert!(result.contains("Shell")); // bash → Shell
359 assert!(result.contains("JavaScript")); // js → JavaScript
360 assert!(result.contains("Python"));
361 assert!(result.contains("data-tab-index=\"0\""));
362 assert!(result.contains("data-tab-index=\"1\""));
363 assert!(result.contains("data-tab-index=\"2\""));
364 }
365
366 #[test]
367 fn tabs_no_language_specified() {
368 let html = concat!(
369 "<blockquote>\n<p>[!TABS]</p>\n",
370 "<pre><code>some code\n</code></pre>\n",
371 "<pre><code class=\"language-rust\">let x = 1;\n</code></pre>\n",
372 "</blockquote>"
373 );
374 let result = post_process_directives(html);
375 assert!(result.contains("Code")); // fallback label
376 assert!(result.contains("Rust"));
377 }
378
379 #[test]
380 fn tabs_with_br_marker() {
381 let html = concat!(
382 "<blockquote>\n<p>[!TABS]<br>\n</p>\n",
383 "<pre><code class=\"language-toml\">[package]\n</code></pre>\n",
384 "<pre><code class=\"language-json\">{}\n</code></pre>\n",
385 "</blockquote>"
386 );
387 let result = post_process_directives(html);
388 assert!(result.contains("TOML"));
389 assert!(result.contains("JSON"));
390 }
391
392 #[test]
393 fn tabs_mixed_with_alert_and_blockquote() {
394 let html = concat!(
395 "<blockquote>\n<p>[!NOTE]<br>\nA note.</p>\n</blockquote>\n",
396 "<blockquote>\n<p>[!TABS]</p>\n",
397 "<pre><code class=\"language-rust\">let x = 1;\n</code></pre>\n",
398 "</blockquote>\n",
399 "<blockquote>\n<p>Normal quote.</p>\n</blockquote>"
400 );
401 let result = post_process_directives(html);
402 // Alert processed.
403 assert!(result.contains("alert alert-note"));
404 // Tabs processed.
405 assert!(result.contains("code-tabs"));
406 assert!(result.contains("Rust"));
407 // Normal blockquote unchanged.
408 assert!(result.contains("<blockquote>"));
409 assert!(result.contains("Normal quote."));
410 }
411
412 #[test]
413 fn tabs_no_code_blocks() {
414 let html = concat!(
415 "<blockquote>\n<p>[!TABS]</p>\n",
416 "<p>Just text, no code.</p>\n",
417 "</blockquote>"
418 );
419 let result = post_process_directives(html);
420 assert!(result.contains("code-tabs"));
421 assert!(result.contains("Just text, no code."));
422 assert!(!result.contains("<blockquote>"));
423 }
424
425 // ===== Language label mapping =====
426
427 #[test]
428 fn language_labels() {
429 assert_eq!(code_language_label("js"), "JavaScript");
430 assert_eq!(code_language_label("typescript"), "TypeScript");
431 assert_eq!(code_language_label("bash"), "Shell");
432 assert_eq!(code_language_label("json"), "JSON");
433 assert_eq!(code_language_label("html"), "HTML");
434 assert_eq!(code_language_label("css"), "CSS");
435 assert_eq!(code_language_label("sql"), "SQL");
436 assert_eq!(code_language_label("toml"), "TOML");
437 assert_eq!(code_language_label("yaml"), "YAML");
438 assert_eq!(code_language_label("xml"), "XML");
439 assert_eq!(code_language_label("rust"), "Rust");
440 assert_eq!(code_language_label("python"), "Python");
441 assert_eq!(code_language_label("go"), "Go");
442 }
443 }
444