Skip to main content

max / makenotwork

6.0 KB · 189 lines History Blame Raw
1 /// Strip inline code (backtick) and fenced code blocks, replacing with spaces.
2 #[cfg_attr(not(any(feature = "mentions", test)), allow(dead_code))]
3 pub fn strip_code_spans(input: &str) -> String {
4 let mut out = String::with_capacity(input.len());
5 let mut chars = input.chars().peekable();
6
7 while let Some(ch) = chars.next() {
8 if ch == '`' {
9 let mut tick_count = 1;
10 while chars.peek() == Some(&'`') {
11 tick_count += 1;
12 chars.next();
13 }
14 let mut skipped = 0;
15 while let Some(c) = chars.next() {
16 skipped += 1;
17 if c == '`' {
18 let mut close_count = 1;
19 while chars.peek() == Some(&'`') {
20 close_count += 1;
21 chars.next();
22 }
23 if close_count == tick_count {
24 break;
25 }
26 }
27 }
28 let total = tick_count * 2 + skipped;
29 for _ in 0..total {
30 out.push(' ');
31 }
32 } else {
33 out.push(ch);
34 }
35 }
36 out
37 }
38
39 /// Return byte ranges of inline code spans and fenced code blocks.
40 pub fn code_span_ranges(input: &str) -> Vec<(usize, usize)> {
41 let mut ranges = Vec::new();
42 let bytes = input.as_bytes();
43 let len = bytes.len();
44 let mut i = 0;
45
46 while i < len {
47 if bytes[i] == b'`' {
48 let start = i;
49 let mut tick_count = 0;
50 while i < len && bytes[i] == b'`' {
51 tick_count += 1;
52 i += 1;
53 }
54 let mut found = false;
55 while i < len {
56 if bytes[i] == b'`' {
57 let mut close_count = 0;
58 while i < len && bytes[i] == b'`' {
59 close_count += 1;
60 i += 1;
61 }
62 if close_count == tick_count {
63 ranges.push((start, i));
64 found = true;
65 break;
66 }
67 } else {
68 i += 1;
69 }
70 }
71 if !found {
72 ranges.push((start, len));
73 }
74 } else {
75 i += 1;
76 }
77 }
78 ranges
79 }
80
81 #[cfg(test)]
82 mod tests {
83 use super::*;
84
85 #[test]
86 fn strip_inline_code() {
87 let result = strip_code_spans("hello `code` world");
88 assert!(!result.contains("code"));
89 assert!(result.contains("hello"));
90 assert!(result.contains("world"));
91 }
92
93 #[test]
94 fn strip_fenced_code() {
95 let result = strip_code_spans("text\n```\ncode block\n```\nmore");
96 assert!(!result.contains("code block"));
97 assert!(result.contains("text"));
98 assert!(result.contains("more"));
99 }
100
101 #[test]
102 fn ranges_inline_code() {
103 let input = "hello `code` world";
104 let ranges = code_span_ranges(input);
105 assert_eq!(ranges.len(), 1);
106 let (start, end) = ranges[0];
107 assert_eq!(&input[start..end], "`code`");
108 }
109
110 #[test]
111 fn ranges_fenced_code() {
112 let input = "text\n```\ncode\n```\nmore";
113 let ranges = code_span_ranges(input);
114 assert_eq!(ranges.len(), 1);
115 let (start, end) = ranges[0];
116 assert!(input[start..end].starts_with("```"));
117 assert!(input[start..end].ends_with("```"));
118 }
119
120 #[test]
121 fn ranges_unclosed_backtick() {
122 let input = "hello `unclosed";
123 let ranges = code_span_ranges(input);
124 assert_eq!(ranges.len(), 1);
125 assert_eq!(ranges[0], (6, input.len()));
126 }
127
128 #[test]
129 fn no_code_spans() {
130 assert!(code_span_ranges("no code here").is_empty());
131 assert_eq!(strip_code_spans("no code here"), "no code here");
132 }
133
134 #[test]
135 fn strip_triple_backtick_exact_space_count() {
136 // For ```ab```: tick_count=3, skipped=3 (a,b,`), total = 3*2 + 3 = 9.
137 // Distinguishes `*` from `+` (3+2=5) and pins `+ skipped` vs `- skipped`.
138 let result = strip_code_spans("```ab```");
139 let spaces = result.chars().filter(|c| *c == ' ').count();
140 assert_eq!(spaces, 9, "expected 3*2 + 3 = 9 spaces, got {:?}", result);
141 }
142
143 #[test]
144 fn strip_single_backtick_exact_space_count() {
145 // For `a`: tick_count=1, skipped=2, total = 1*2 + 2 = 4.
146 // Distinguishes `tick_count * 2` from `tick_count + 2` (3 vs 4).
147 let result = strip_code_spans("`a`");
148 let spaces = result.chars().filter(|c| *c == ' ').count();
149 assert_eq!(spaces, 4, "expected 1*2 + 2 = 4 spaces, got {:?}", result);
150 }
151
152 #[test]
153 fn double_backticks_require_double_close() {
154 // ``a`b`` — single ` inside must NOT close the double-tick span.
155 let input = "``a`b``";
156 let ranges = code_span_ranges(input);
157 assert_eq!(ranges.len(), 1, "the inner single ` must not close");
158 assert_eq!(&input[ranges[0].0..ranges[0].1], "``a`b``");
159 }
160
161 #[test]
162 fn mismatched_tick_counts_dont_close_span() {
163 // Open with 1 tick, close attempt with 3 ticks: close_count=3 != tick_count=1.
164 // Span never closes → runs to EOF. Pins `close_count == tick_count`.
165 let input = "`code```";
166 let ranges = code_span_ranges(input);
167 assert_eq!(ranges.len(), 1);
168 assert_eq!(ranges[0], (0, input.len()));
169 }
170
171 #[test]
172 fn multiple_disjoint_spans_get_separate_ranges() {
173 let input = "a `one` b `two` c";
174 let ranges = code_span_ranges(input);
175 assert_eq!(ranges.len(), 2);
176 assert_eq!(&input[ranges[0].0..ranges[0].1], "`one`");
177 assert_eq!(&input[ranges[1].0..ranges[1].1], "`two`");
178 }
179
180 #[test]
181 fn unclosed_span_range_ends_at_input_len() {
182 // Pins the `if !found { ranges.push((start, len)); }` branch.
183 let input = "abc `unclosed";
184 let ranges = code_span_ranges(input);
185 assert_eq!(ranges.len(), 1);
186 assert_eq!(ranges[0], (4, input.len()));
187 }
188 }
189