Skip to main content

max / docengine

3.8 KB · 133 lines History Blame Raw
1 /// Strip inline code (backtick) and fenced code blocks, replacing with spaces.
2 pub fn strip_code_spans(input: &str) -> String {
3 let mut out = String::with_capacity(input.len());
4 let mut chars = input.chars().peekable();
5
6 while let Some(ch) = chars.next() {
7 if ch == '`' {
8 let mut tick_count = 1;
9 while chars.peek() == Some(&'`') {
10 tick_count += 1;
11 chars.next();
12 }
13 let mut skipped = 0;
14 while let Some(c) = chars.next() {
15 skipped += 1;
16 if c == '`' {
17 let mut close_count = 1;
18 while chars.peek() == Some(&'`') {
19 close_count += 1;
20 chars.next();
21 }
22 if close_count == tick_count {
23 break;
24 }
25 }
26 }
27 let total = tick_count * 2 + skipped;
28 for _ in 0..total {
29 out.push(' ');
30 }
31 } else {
32 out.push(ch);
33 }
34 }
35 out
36 }
37
38 /// Return byte ranges of inline code spans and fenced code blocks.
39 pub fn code_span_ranges(input: &str) -> Vec<(usize, usize)> {
40 let mut ranges = Vec::new();
41 let bytes = input.as_bytes();
42 let len = bytes.len();
43 let mut i = 0;
44
45 while i < len {
46 if bytes[i] == b'`' {
47 let start = i;
48 let mut tick_count = 0;
49 while i < len && bytes[i] == b'`' {
50 tick_count += 1;
51 i += 1;
52 }
53 let mut found = false;
54 while i < len {
55 if bytes[i] == b'`' {
56 let mut close_count = 0;
57 while i < len && bytes[i] == b'`' {
58 close_count += 1;
59 i += 1;
60 }
61 if close_count == tick_count {
62 ranges.push((start, i));
63 found = true;
64 break;
65 }
66 } else {
67 i += 1;
68 }
69 }
70 if !found {
71 ranges.push((start, len));
72 }
73 } else {
74 i += 1;
75 }
76 }
77 ranges
78 }
79
80 #[cfg(test)]
81 mod tests {
82 use super::*;
83
84 #[test]
85 fn strip_inline_code() {
86 let result = strip_code_spans("hello `code` world");
87 assert!(!result.contains("code"));
88 assert!(result.contains("hello"));
89 assert!(result.contains("world"));
90 }
91
92 #[test]
93 fn strip_fenced_code() {
94 let result = strip_code_spans("text\n```\ncode block\n```\nmore");
95 assert!(!result.contains("code block"));
96 assert!(result.contains("text"));
97 assert!(result.contains("more"));
98 }
99
100 #[test]
101 fn ranges_inline_code() {
102 let input = "hello `code` world";
103 let ranges = code_span_ranges(input);
104 assert_eq!(ranges.len(), 1);
105 let (start, end) = ranges[0];
106 assert_eq!(&input[start..end], "`code`");
107 }
108
109 #[test]
110 fn ranges_fenced_code() {
111 let input = "text\n```\ncode\n```\nmore";
112 let ranges = code_span_ranges(input);
113 assert_eq!(ranges.len(), 1);
114 let (start, end) = ranges[0];
115 assert!(input[start..end].starts_with("```"));
116 assert!(input[start..end].ends_with("```"));
117 }
118
119 #[test]
120 fn ranges_unclosed_backtick() {
121 let input = "hello `unclosed";
122 let ranges = code_span_ranges(input);
123 assert_eq!(ranges.len(), 1);
124 assert_eq!(ranges[0], (6, input.len()));
125 }
126
127 #[test]
128 fn no_code_spans() {
129 assert!(code_span_ranges("no code here").is_empty());
130 assert_eq!(strip_code_spans("no code here"), "no code here");
131 }
132 }
133