| 1 |
use pter::convert; |
| 2 |
|
| 3 |
#[test] |
| 4 |
fn empty_string() { |
| 5 |
assert_eq!(convert(""), ""); |
| 6 |
} |
| 7 |
|
| 8 |
#[test] |
| 9 |
fn whitespace_only() { |
| 10 |
assert_eq!(convert(" \n\t "), ""); |
| 11 |
} |
| 12 |
|
| 13 |
#[test] |
| 14 |
fn just_tags_no_content() { |
| 15 |
assert_eq!(convert("<div><p><span></span></p></div>"), ""); |
| 16 |
} |
| 17 |
|
| 18 |
#[test] |
| 19 |
fn deeply_nested_divs() { |
| 20 |
let mut html = String::new(); |
| 21 |
for _ in 0..100 { |
| 22 |
html.push_str("<div>"); |
| 23 |
} |
| 24 |
html.push_str("deep content"); |
| 25 |
for _ in 0..100 { |
| 26 |
html.push_str("</div>"); |
| 27 |
} |
| 28 |
let md = convert(&html); |
| 29 |
assert!(md.contains("deep content")); |
| 30 |
} |
| 31 |
|
| 32 |
#[test] |
| 33 |
fn deeply_nested_blockquotes() { |
| 34 |
let mut html = String::new(); |
| 35 |
for _ in 0..20 { |
| 36 |
html.push_str("<blockquote>"); |
| 37 |
} |
| 38 |
html.push_str("very deep"); |
| 39 |
for _ in 0..20 { |
| 40 |
html.push_str("</blockquote>"); |
| 41 |
} |
| 42 |
let md = convert(&html); |
| 43 |
assert!(md.contains("very deep")); |
| 44 |
|
| 45 |
assert!(md.contains("> > > > >")); |
| 46 |
} |
| 47 |
|
| 48 |
#[test] |
| 49 |
fn deeply_nested_lists() { |
| 50 |
let mut html = String::new(); |
| 51 |
for _ in 0..10 { |
| 52 |
html.push_str("<ul><li>"); |
| 53 |
} |
| 54 |
html.push_str("deep item"); |
| 55 |
for _ in 0..10 { |
| 56 |
html.push_str("</li></ul>"); |
| 57 |
} |
| 58 |
let md = convert(&html); |
| 59 |
assert!(md.contains("deep item")); |
| 60 |
} |
| 61 |
|
| 62 |
#[test] |
| 63 |
fn malformed_unclosed_tags() { |
| 64 |
|
| 65 |
let md = convert("<p>unclosed paragraph<p>another one"); |
| 66 |
assert!(md.contains("unclosed paragraph")); |
| 67 |
assert!(md.contains("another one")); |
| 68 |
} |
| 69 |
|
| 70 |
#[test] |
| 71 |
fn malformed_mismatched_tags() { |
| 72 |
let md = convert("<b><i>crossed</b></i>"); |
| 73 |
assert!(md.contains("crossed")); |
| 74 |
} |
| 75 |
|
| 76 |
#[test] |
| 77 |
fn only_script_content() { |
| 78 |
assert_eq!(convert("<script>alert('xss')</script>"), ""); |
| 79 |
} |
| 80 |
|
| 81 |
#[test] |
| 82 |
fn only_style_content() { |
| 83 |
assert_eq!(convert("<style>.x { color: red; }</style>"), ""); |
| 84 |
} |
| 85 |
|
| 86 |
#[test] |
| 87 |
fn only_tracking_pixels() { |
| 88 |
let html = r#" |
| 89 |
<img src="a.gif" width="1" height="1"> |
| 90 |
<img src="b.gif" width="1" height="1"> |
| 91 |
"#; |
| 92 |
assert_eq!(convert(html), ""); |
| 93 |
} |
| 94 |
|
| 95 |
#[test] |
| 96 |
fn unicode_content() { |
| 97 |
let md = convert("<p>日本語テスト 🎉 émojis café</p>"); |
| 98 |
assert!(md.contains("日本語テスト")); |
| 99 |
assert!(md.contains("🎉")); |
| 100 |
assert!(md.contains("café")); |
| 101 |
} |
| 102 |
|
| 103 |
#[test] |
| 104 |
fn html_entities_numeric() { |
| 105 |
let md = convert("<p>© — ’</p>"); |
| 106 |
assert!(md.contains("©")); |
| 107 |
assert!(md.contains("—")); |
| 108 |
} |
| 109 |
|
| 110 |
#[test] |
| 111 |
fn large_input_doesnt_blow_up() { |
| 112 |
let para = "<p>Hello world. This is a test paragraph with some content.</p>"; |
| 113 |
let html: String = para.repeat(1000); |
| 114 |
let md = convert(&html); |
| 115 |
assert!(md.contains("Hello world")); |
| 116 |
|
| 117 |
assert!(md.len() < html.len()); |
| 118 |
} |
| 119 |
|
| 120 |
#[test] |
| 121 |
fn link_with_nested_formatting() { |
| 122 |
let html = r#"<a href="https://example.com"><strong>bold link</strong></a>"#; |
| 123 |
let md = convert(html); |
| 124 |
assert!(md.contains("[**bold link**](https://example.com)")); |
| 125 |
} |
| 126 |
|
| 127 |
#[test] |
| 128 |
fn image_with_no_alt() { |
| 129 |
let md = convert(r#"<img src="photo.jpg">"#); |
| 130 |
assert!(md.contains("")); |
| 131 |
} |
| 132 |
|
| 133 |
#[test] |
| 134 |
fn consecutive_inline_elements() { |
| 135 |
let md = convert("<b>bold</b><i>italic</i><code>code</code>"); |
| 136 |
assert_eq!(md, "**bold***italic*`code`"); |
| 137 |
} |
| 138 |
|
| 139 |
#[test] |
| 140 |
fn table_with_empty_cells() { |
| 141 |
let html = "<table><tr><th>A</th><th>B</th></tr>\ |
| 142 |
<tr><td></td><td>val</td></tr></table>"; |
| 143 |
let md = convert(html); |
| 144 |
assert!(md.contains("| A | B |")); |
| 145 |
assert!(md.contains("| | val |")); |
| 146 |
} |
| 147 |
|
| 148 |
#[test] |
| 149 |
fn pre_with_html_inside() { |
| 150 |
let html = "<pre><div>not a tag</div></pre>"; |
| 151 |
let md = convert(html); |
| 152 |
assert!(md.contains("```")); |
| 153 |
assert!(md.contains("<div>not a tag</div>")); |
| 154 |
} |
| 155 |
|
| 156 |
#[test] |
| 157 |
fn multiple_spaces_in_source() { |
| 158 |
let md = convert("<p>word1 word2 word3</p>"); |
| 159 |
assert_eq!(md, "word1 word2 word3"); |
| 160 |
} |
| 161 |
|
| 162 |
#[test] |
| 163 |
fn newlines_in_source_collapsed() { |
| 164 |
let md = convert("<p>line1\n\n\nline2</p>"); |
| 165 |
assert_eq!(md, "line1 line2"); |
| 166 |
} |
| 167 |
|
| 168 |
#[test] |
| 169 |
fn full_html_document() { |
| 170 |
let html = r#" |
| 171 |
<!DOCTYPE html> |
| 172 |
<html lang="en"> |
| 173 |
<head> |
| 174 |
<meta charset="UTF-8"> |
| 175 |
<title>Test Email</title> |
| 176 |
<style>body { font-family: sans-serif; }</style> |
| 177 |
</head> |
| 178 |
<body> |
| 179 |
<p>Hello!</p> |
| 180 |
</body> |
| 181 |
</html> |
| 182 |
"#; |
| 183 |
let md = convert(html); |
| 184 |
assert_eq!(md, "Hello!"); |
| 185 |
} |
| 186 |
|
| 187 |
#[test] |
| 188 |
fn data_uri_image_not_tracking_pixel() { |
| 189 |
|
| 190 |
let html = r#"<img src="data:image/png;base64,iVBOR..." alt="inline" width="100">"#; |
| 191 |
let md = convert(html); |
| 192 |
assert!(md.contains("![inline]")); |
| 193 |
} |
| 194 |
|
| 195 |
#[test] |
| 196 |
fn blockquote_with_paragraphs() { |
| 197 |
let html = "<blockquote><p>First para</p><p>Second para</p></blockquote>"; |
| 198 |
let md = convert(html); |
| 199 |
assert!(md.contains("> First para")); |
| 200 |
assert!(md.contains("> ")); |
| 201 |
assert!(md.contains("> Second para")); |
| 202 |
} |
| 203 |
|