Skip to main content

max / pter

3.1 KB · 95 lines History Blame Raw
1 use proptest::prelude::*;
2
3 // Strategy: generate arbitrary HTML-like strings
4 fn html_fragment() -> impl Strategy<Value = String> {
5 let tags = prop::sample::select(vec![
6 "p", "div", "span", "strong", "em", "a", "h1", "h2", "h3",
7 "ul", "ol", "li", "blockquote", "pre", "code", "br", "hr",
8 "img", "table", "tr", "td", "th", "b", "i", "del", "sup", "sub",
9 ]);
10
11 let text = "[a-zA-Z0-9 .,!?]{0,100}";
12
13 prop::collection::vec(
14 prop_oneof![
15 // Plain text
16 text.prop_map(|s| s),
17 // Opening + closing tag with text
18 (tags.clone(), text).prop_map(|(tag, content)| {
19 format!("<{tag}>{content}</{tag}>")
20 }),
21 // Self-closing tag
22 tags.clone().prop_map(|tag| format!("<{tag}/>")),
23 // Nested tags
24 (tags.clone(), tags.clone(), text).prop_map(|(outer, inner, content)| {
25 format!("<{outer}><{inner}>{content}</{inner}></{outer}>")
26 }),
27 ],
28 1..10,
29 )
30 .prop_map(|parts| parts.join(""))
31 }
32
33 proptest! {
34 #[test]
35 fn never_panics(html in html_fragment()) {
36 let _ = pter::convert(&html);
37 }
38
39 #[test]
40 fn never_panics_on_arbitrary_bytes(s in "\\PC{0,500}") {
41 let _ = pter::convert(&s);
42 }
43
44 #[test]
45 fn output_contains_no_html_tags(html in html_fragment()) {
46 let md = pter::convert(&html);
47 // Output should never contain raw HTML tags
48 // (except inside code blocks, which we skip checking)
49 let without_code_blocks: String = md
50 .split("```")
51 .enumerate()
52 .filter(|(i, _)| i % 2 == 0) // only outside code blocks
53 .map(|(_, s)| s)
54 .collect();
55
56 // No <script>, <style>, <div>, etc. should leak through
57 assert!(!without_code_blocks.contains("<script"), "leaked <script> in: {md}");
58 assert!(!without_code_blocks.contains("<style"), "leaked <style> in: {md}");
59 assert!(!without_code_blocks.contains("<head"), "leaked <head> in: {md}");
60 }
61
62 #[test]
63 fn output_is_valid_utf8(html in html_fragment()) {
64 let md = pter::convert(&html);
65 // String type guarantees UTF-8, but verify no replacement chars snuck in
66 // from bad entity decoding
67 assert!(!md.contains('\u{FFFD}'), "replacement char in: {md}");
68 }
69
70 #[test]
71 fn no_excessive_blank_lines(html in html_fragment()) {
72 let md = pter::convert(&html);
73 assert!(!md.contains("\n\n\n"), "triple newline in output: {md}");
74 }
75
76 #[test]
77 fn no_trailing_whitespace_on_lines(html in html_fragment()) {
78 let md = pter::convert(&html);
79 for (i, line) in md.lines().enumerate() {
80 assert!(
81 line == line.trim_end(),
82 "trailing whitespace on line {i}: '{line}'"
83 );
84 }
85 }
86
87 #[test]
88 fn empty_input_returns_empty(s in "\\s{0,20}") {
89 let html = format!("<html><body>{s}</body></html>");
90 let md = pter::convert(&html);
91 // Whitespace-only input should produce empty or whitespace-only output
92 assert!(md.trim().is_empty() || !s.trim().is_empty());
93 }
94 }
95