| 1 |
use pter::convert; |
| 2 |
|
| 3 |
#[test] |
| 4 |
fn simple_email() { |
| 5 |
let html = r#" |
| 6 |
<html> |
| 7 |
<head><title>Email</title></head> |
| 8 |
<body> |
| 9 |
<h1>Meeting Tomorrow</h1> |
| 10 |
<p>Hi Max,</p> |
| 11 |
<p>Just confirming our meeting tomorrow at <strong>2pm</strong>.</p> |
| 12 |
<p>Best,<br>Alice</p> |
| 13 |
</body> |
| 14 |
</html> |
| 15 |
"#; |
| 16 |
|
| 17 |
let md = convert(html); |
| 18 |
assert!(md.contains("# Meeting Tomorrow")); |
| 19 |
assert!(md.contains("Hi Max,")); |
| 20 |
assert!(md.contains("**2pm**")); |
| 21 |
assert!(md.contains("Best,\nAlice")); |
| 22 |
} |
| 23 |
|
| 24 |
#[test] |
| 25 |
fn email_with_links() { |
| 26 |
let html = r#" |
| 27 |
<body> |
| 28 |
<p>Please review the <a href="https://example.com/doc">document</a>.</p> |
| 29 |
<p>Direct link: <a href="https://example.com">https://example.com</a></p> |
| 30 |
</body> |
| 31 |
"#; |
| 32 |
|
| 33 |
let md = convert(html); |
| 34 |
assert!(md.contains("[document](https://example.com/doc)")); |
| 35 |
|
| 36 |
assert!(md.contains("Direct link: https://example.com")); |
| 37 |
} |
| 38 |
|
| 39 |
#[test] |
| 40 |
fn email_with_tracking_pixels() { |
| 41 |
let html = r#" |
| 42 |
<body> |
| 43 |
<p>Content here</p> |
| 44 |
<img src="https://tracker.example.com/open.gif" width="1" height="1" alt=""> |
| 45 |
<img src="data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7" alt=""> |
| 46 |
<img src="real-image.jpg" alt="A real photo" width="600"> |
| 47 |
</body> |
| 48 |
"#; |
| 49 |
|
| 50 |
let md = convert(html); |
| 51 |
assert!(md.contains("Content here")); |
| 52 |
assert!(!md.contains("tracker")); |
| 53 |
assert!(!md.contains("data:image")); |
| 54 |
assert!(md.contains("")); |
| 55 |
} |
| 56 |
|
| 57 |
#[test] |
| 58 |
fn email_with_quoted_reply() { |
| 59 |
let html = r#" |
| 60 |
<body> |
| 61 |
<p>Thanks, that works for me.</p> |
| 62 |
<blockquote> |
| 63 |
<p>Can we meet at 3pm instead?</p> |
| 64 |
</blockquote> |
| 65 |
</body> |
| 66 |
"#; |
| 67 |
|
| 68 |
let md = convert(html); |
| 69 |
assert!(md.contains("Thanks, that works for me.")); |
| 70 |
assert!(md.contains("> Can we meet at 3pm instead?")); |
| 71 |
} |
| 72 |
|
| 73 |
#[test] |
| 74 |
fn email_with_signature_line() { |
| 75 |
let html = r#" |
| 76 |
<body> |
| 77 |
<p>See you then.</p> |
| 78 |
<hr> |
| 79 |
<p>Alice Smith</p> |
| 80 |
<p>Engineering Lead</p> |
| 81 |
</body> |
| 82 |
"#; |
| 83 |
|
| 84 |
let md = convert(html); |
| 85 |
assert!(md.contains("See you then.")); |
| 86 |
assert!(md.contains("---")); |
| 87 |
assert!(md.contains("Alice Smith")); |
| 88 |
} |
| 89 |
|
| 90 |
#[test] |
| 91 |
fn deeply_nested_blockquotes() { |
| 92 |
let html = r#" |
| 93 |
<body> |
| 94 |
<p>Got it.</p> |
| 95 |
<blockquote> |
| 96 |
<p>Sounds good.</p> |
| 97 |
<blockquote> |
| 98 |
<p>Can we reschedule?</p> |
| 99 |
<blockquote> |
| 100 |
<p>Original message here.</p> |
| 101 |
</blockquote> |
| 102 |
</blockquote> |
| 103 |
</blockquote> |
| 104 |
</body> |
| 105 |
"#; |
| 106 |
|
| 107 |
let md = convert(html); |
| 108 |
assert!(md.contains("Got it.")); |
| 109 |
assert!(md.contains("> Sounds good.")); |
| 110 |
assert!(md.contains("> > Can we reschedule?")); |
| 111 |
assert!(md.contains("> > > Original message here.")); |
| 112 |
} |
| 113 |
|
| 114 |
#[test] |
| 115 |
fn complex_list_structure() { |
| 116 |
let html = r#" |
| 117 |
<body> |
| 118 |
<p>Action items:</p> |
| 119 |
<ol> |
| 120 |
<li>Review the PR |
| 121 |
<ul> |
| 122 |
<li>Check tests</li> |
| 123 |
<li>Check docs</li> |
| 124 |
</ul> |
| 125 |
</li> |
| 126 |
<li>Deploy to staging</li> |
| 127 |
</ol> |
| 128 |
</body> |
| 129 |
"#; |
| 130 |
|
| 131 |
let md = convert(html); |
| 132 |
assert!(md.contains("Action items:")); |
| 133 |
assert!(md.contains("1. Review the PR")); |
| 134 |
assert!(md.contains(" - Check tests")); |
| 135 |
assert!(md.contains("2. Deploy to staging")); |
| 136 |
} |
| 137 |
|
| 138 |
#[test] |
| 139 |
fn pre_block_preserves_formatting() { |
| 140 |
let html = r#" |
| 141 |
<body> |
| 142 |
<p>Here's the code:</p> |
| 143 |
<pre><code>fn main() { |
| 144 |
println!("hello"); |
| 145 |
}</code></pre> |
| 146 |
</body> |
| 147 |
"#; |
| 148 |
|
| 149 |
let md = convert(html); |
| 150 |
assert!(md.contains("Here's the code:")); |
| 151 |
assert!(md.contains("```\nfn main()")); |
| 152 |
assert!(md.contains(" println!")); |
| 153 |
} |
| 154 |
|
| 155 |
#[test] |
| 156 |
fn hidden_content_stripped() { |
| 157 |
let html = r#" |
| 158 |
<body> |
| 159 |
<p>Visible content</p> |
| 160 |
<div style="display: none;"> |
| 161 |
<p>This should not appear</p> |
| 162 |
</div> |
| 163 |
<span style="visibility: hidden;">Also hidden</span> |
| 164 |
<p>More visible</p> |
| 165 |
</body> |
| 166 |
"#; |
| 167 |
|
| 168 |
let md = convert(html); |
| 169 |
assert!(md.contains("Visible content")); |
| 170 |
assert!(!md.contains("should not appear")); |
| 171 |
assert!(!md.contains("Also hidden")); |
| 172 |
assert!(md.contains("More visible")); |
| 173 |
} |
| 174 |
|
| 175 |
#[test] |
| 176 |
fn script_and_style_fully_removed() { |
| 177 |
let html = r#" |
| 178 |
<html> |
| 179 |
<head> |
| 180 |
<style>body { color: red; }</style> |
| 181 |
<script>alert('xss');</script> |
| 182 |
</head> |
| 183 |
<body> |
| 184 |
<p>Safe content</p> |
| 185 |
<script>document.write('injected')</script> |
| 186 |
</body> |
| 187 |
</html> |
| 188 |
"#; |
| 189 |
|
| 190 |
let md = convert(html); |
| 191 |
assert_eq!(md, "Safe content"); |
| 192 |
} |
| 193 |
|
| 194 |
#[test] |
| 195 |
fn newsletter_table_layout() { |
| 196 |
|
| 197 |
let html = r#" |
| 198 |
<html> |
| 199 |
<body> |
| 200 |
<table width="100%" cellpadding="0" cellspacing="0" role="presentation"> |
| 201 |
<tr> |
| 202 |
<td align="center"> |
| 203 |
<table width="600" cellpadding="0" cellspacing="0"> |
| 204 |
<tr> |
| 205 |
<td> |
| 206 |
<h2>Weekly Digest</h2> |
| 207 |
<p>Here are your updates for this week.</p> |
| 208 |
<ul> |
| 209 |
<li>New release v2.0</li> |
| 210 |
<li>Bug fixes</li> |
| 211 |
</ul> |
| 212 |
<p>Thanks for reading!</p> |
| 213 |
</td> |
| 214 |
</tr> |
| 215 |
</table> |
| 216 |
</td> |
| 217 |
</tr> |
| 218 |
</table> |
| 219 |
<img src="https://track.example.com/open.gif" width="1" height="1"> |
| 220 |
</body> |
| 221 |
</html> |
| 222 |
"#; |
| 223 |
|
| 224 |
let md = convert(html); |
| 225 |
assert!(md.contains("## Weekly Digest")); |
| 226 |
assert!(md.contains("Here are your updates for this week.")); |
| 227 |
assert!(md.contains("- New release v2.0")); |
| 228 |
assert!(md.contains("- Bug fixes")); |
| 229 |
assert!(md.contains("Thanks for reading!")); |
| 230 |
assert!(!md.contains("track.example.com")); |
| 231 |
|
| 232 |
assert!(!md.contains("| ")); |
| 233 |
} |
| 234 |
|
| 235 |
#[test] |
| 236 |
fn data_table_preserved() { |
| 237 |
let html = r#" |
| 238 |
<body> |
| 239 |
<p>Order summary:</p> |
| 240 |
<table> |
| 241 |
<thead><tr><th>Item</th><th>Qty</th><th>Price</th></tr></thead> |
| 242 |
<tbody> |
| 243 |
<tr><td>Widget</td><td>3</td><td>$15.00</td></tr> |
| 244 |
<tr><td>Gadget</td><td>1</td><td>$29.99</td></tr> |
| 245 |
</tbody> |
| 246 |
</table> |
| 247 |
</body> |
| 248 |
"#; |
| 249 |
|
| 250 |
let md = convert(html); |
| 251 |
assert!(md.contains("Order summary:")); |
| 252 |
assert!(md.contains("| Item | Qty | Price |")); |
| 253 |
assert!(md.contains("| --- | --- | --- |")); |
| 254 |
assert!(md.contains("| Widget | 3 | $15.00 |")); |
| 255 |
assert!(md.contains("| Gadget | 1 | $29.99 |")); |
| 256 |
} |
| 257 |
|
| 258 |
#[test] |
| 259 |
fn spacer_and_tracking_stripped() { |
| 260 |
let html = r#" |
| 261 |
<body> |
| 262 |
<p>Real content</p> |
| 263 |
<div style="font-size: 0; line-height: 0;"> </div> |
| 264 |
<img src="pixel.gif" width="1" height="1" style="display:none"> |
| 265 |
<div style="height:0;overflow:hidden">invisible</div> |
| 266 |
<p>More content</p> |
| 267 |
</body> |
| 268 |
"#; |
| 269 |
|
| 270 |
let md = convert(html); |
| 271 |
assert!(md.contains("Real content")); |
| 272 |
assert!(md.contains("More content")); |
| 273 |
assert!(!md.contains("invisible")); |
| 274 |
assert!(!md.contains("pixel.gif")); |
| 275 |
} |
| 276 |
|
| 277 |
|
| 278 |
|
| 279 |
#[test] |
| 280 |
fn gmail_reply_chain() { |
| 281 |
let html = r#" |
| 282 |
<body> |
| 283 |
<div dir="ltr"> |
| 284 |
<p>Thanks, that works for me.</p> |
| 285 |
</div> |
| 286 |
<div class="gmail_quote"> |
| 287 |
<div class="gmail_attr">On Mon, Jan 5, 2026 at 3:00 PM Alice <alice@example.com> wrote:</div> |
| 288 |
<blockquote class="gmail_quote"> |
| 289 |
<div dir="ltr"> |
| 290 |
<p>Can we meet at 3pm instead of 2pm?</p> |
| 291 |
</div> |
| 292 |
</blockquote> |
| 293 |
</div> |
| 294 |
</body> |
| 295 |
"#; |
| 296 |
|
| 297 |
let md = convert(html); |
| 298 |
assert!(md.contains("Thanks, that works for me.")); |
| 299 |
|
| 300 |
assert!(md.contains("> ")); |
| 301 |
assert!(md.contains("3pm instead of 2pm")); |
| 302 |
} |
| 303 |
|
| 304 |
#[test] |
| 305 |
fn apple_mail_reply() { |
| 306 |
let html = r#" |
| 307 |
<body> |
| 308 |
<div>Sounds good, see you then.</div> |
| 309 |
<div> |
| 310 |
<br> |
| 311 |
<blockquote type="cite"> |
| 312 |
<div>Hey, are we still on for lunch?</div> |
| 313 |
</blockquote> |
| 314 |
</div> |
| 315 |
</body> |
| 316 |
"#; |
| 317 |
|
| 318 |
let md = convert(html); |
| 319 |
assert!(md.contains("Sounds good, see you then.")); |
| 320 |
assert!(md.contains("> ")); |
| 321 |
assert!(md.contains("still on for lunch")); |
| 322 |
} |
| 323 |
|
| 324 |
#[test] |
| 325 |
fn outlook_reply_with_separator() { |
| 326 |
let html = r#" |
| 327 |
<body> |
| 328 |
<div> |
| 329 |
<p>I'll handle it.</p> |
| 330 |
</div> |
| 331 |
<hr> |
| 332 |
<div> |
| 333 |
<p>From: Alice Smith<br> |
| 334 |
Sent: Monday, January 5, 2026<br> |
| 335 |
To: Bob Jones<br> |
| 336 |
Subject: Action needed</p> |
| 337 |
</div> |
| 338 |
<div> |
| 339 |
<p>Can you take a look at the report?</p> |
| 340 |
</div> |
| 341 |
</body> |
| 342 |
"#; |
| 343 |
|
| 344 |
let md = convert(html); |
| 345 |
assert!(md.contains("I'll handle it.")); |
| 346 |
assert!(md.contains("---")); |
| 347 |
assert!(md.contains("From: Alice Smith")); |
| 348 |
assert!(md.contains("take a look at the report")); |
| 349 |
} |
| 350 |
|
| 351 |
#[test] |
| 352 |
fn nested_gmail_reply_chain() { |
| 353 |
let html = r#" |
| 354 |
<body> |
| 355 |
<div dir="ltr"><p>Got it, thanks!</p></div> |
| 356 |
<div class="gmail_quote"> |
| 357 |
On Tue, Jan 6, Bob wrote: |
| 358 |
<blockquote class="gmail_quote"> |
| 359 |
<div dir="ltr"><p>Here's the update.</p></div> |
| 360 |
<div class="gmail_quote"> |
| 361 |
On Mon, Jan 5, Alice wrote: |
| 362 |
<blockquote class="gmail_quote"> |
| 363 |
<div dir="ltr"><p>What's the status?</p></div> |
| 364 |
</blockquote> |
| 365 |
</div> |
| 366 |
</blockquote> |
| 367 |
</div> |
| 368 |
</body> |
| 369 |
"#; |
| 370 |
|
| 371 |
let md = convert(html); |
| 372 |
assert!(md.contains("Got it, thanks!")); |
| 373 |
|
| 374 |
assert!(md.contains("> ")); |
| 375 |
assert!(md.contains("Here's the update.")); |
| 376 |
assert!(md.contains("What's the status?")); |
| 377 |
} |
| 378 |
|
| 379 |
#[test] |
| 380 |
fn forwarded_message() { |
| 381 |
let html = r#" |
| 382 |
<body> |
| 383 |
<div><p>FYI, see below.</p></div> |
| 384 |
<div class="gmail_quote"> |
| 385 |
---------- Forwarded message ---------- |
| 386 |
<blockquote> |
| 387 |
<p>From: Alice</p> |
| 388 |
<p>The deadline has been moved to Friday.</p> |
| 389 |
</blockquote> |
| 390 |
</div> |
| 391 |
</body> |
| 392 |
"#; |
| 393 |
|
| 394 |
let md = convert(html); |
| 395 |
assert!(md.contains("FYI, see below.")); |
| 396 |
assert!(md.contains("Forwarded message")); |
| 397 |
assert!(md.contains("deadline has been moved")); |
| 398 |
} |
| 399 |
|
| 400 |
#[test] |
| 401 |
fn protonmail_reply() { |
| 402 |
let html = r#" |
| 403 |
<body> |
| 404 |
<div>Will do, thanks.</div> |
| 405 |
<blockquote class="protonmail_quote" type="cite"> |
| 406 |
<div>Please send me the files by EOD.</div> |
| 407 |
</blockquote> |
| 408 |
</body> |
| 409 |
"#; |
| 410 |
|
| 411 |
let md = convert(html); |
| 412 |
assert!(md.contains("Will do, thanks.")); |
| 413 |
assert!(md.contains("> ")); |
| 414 |
assert!(md.contains("send me the files")); |
| 415 |
} |
| 416 |
|
| 417 |
#[test] |
| 418 |
fn attribution_preserved_above_quote() { |
| 419 |
let html = r#" |
| 420 |
<body> |
| 421 |
<p>Agreed.</p> |
| 422 |
<div class="gmail_quote"> |
| 423 |
On Wed, Jan 7, 2026 at 10:00 AM Carol wrote: |
| 424 |
<blockquote> |
| 425 |
<p>Let's go with option B.</p> |
| 426 |
</blockquote> |
| 427 |
</div> |
| 428 |
</body> |
| 429 |
"#; |
| 430 |
|
| 431 |
let md = convert(html); |
| 432 |
assert!(md.contains("Agreed.")); |
| 433 |
|
| 434 |
assert!(md.contains("Carol wrote:")); |
| 435 |
assert!(md.contains("option B")); |
| 436 |
} |
| 437 |
|