Skip to main content

max / pter

11.3 KB · 437 lines History Blame Raw
1 use pter::convert;
2
3 #[test]
4 fn simple_email() {
5 let html = r#"
6 <html>
7 <head><title>Email</title></head>
8 <body>
9 <h1>Meeting Tomorrow</h1>
10 <p>Hi Max,</p>
11 <p>Just confirming our meeting tomorrow at <strong>2pm</strong>.</p>
12 <p>Best,<br>Alice</p>
13 </body>
14 </html>
15 "#;
16
17 let md = convert(html);
18 assert!(md.contains("# Meeting Tomorrow"));
19 assert!(md.contains("Hi Max,"));
20 assert!(md.contains("**2pm**"));
21 assert!(md.contains("Best,\nAlice"));
22 }
23
24 #[test]
25 fn email_with_links() {
26 let html = r#"
27 <body>
28 <p>Please review the <a href="https://example.com/doc">document</a>.</p>
29 <p>Direct link: <a href="https://example.com">https://example.com</a></p>
30 </body>
31 "#;
32
33 let md = convert(html);
34 assert!(md.contains("[document](https://example.com/doc)"));
35 // Link text matches URL — no markdown link syntax
36 assert!(md.contains("Direct link: https://example.com"));
37 }
38
39 #[test]
40 fn email_with_tracking_pixels() {
41 let html = r#"
42 <body>
43 <p>Content here</p>
44 <img src="https://tracker.example.com/open.gif" width="1" height="1" alt="">
45 <img src="data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7" alt="">
46 <img src="real-image.jpg" alt="A real photo" width="600">
47 </body>
48 "#;
49
50 let md = convert(html);
51 assert!(md.contains("Content here"));
52 assert!(!md.contains("tracker"));
53 assert!(!md.contains("data:image"));
54 assert!(md.contains("![A real photo](real-image.jpg)"));
55 }
56
57 #[test]
58 fn email_with_quoted_reply() {
59 let html = r#"
60 <body>
61 <p>Thanks, that works for me.</p>
62 <blockquote>
63 <p>Can we meet at 3pm instead?</p>
64 </blockquote>
65 </body>
66 "#;
67
68 let md = convert(html);
69 assert!(md.contains("Thanks, that works for me."));
70 assert!(md.contains("> Can we meet at 3pm instead?"));
71 }
72
73 #[test]
74 fn email_with_signature_line() {
75 let html = r#"
76 <body>
77 <p>See you then.</p>
78 <hr>
79 <p>Alice Smith</p>
80 <p>Engineering Lead</p>
81 </body>
82 "#;
83
84 let md = convert(html);
85 assert!(md.contains("See you then."));
86 assert!(md.contains("---"));
87 assert!(md.contains("Alice Smith"));
88 }
89
90 #[test]
91 fn deeply_nested_blockquotes() {
92 let html = r#"
93 <body>
94 <p>Got it.</p>
95 <blockquote>
96 <p>Sounds good.</p>
97 <blockquote>
98 <p>Can we reschedule?</p>
99 <blockquote>
100 <p>Original message here.</p>
101 </blockquote>
102 </blockquote>
103 </blockquote>
104 </body>
105 "#;
106
107 let md = convert(html);
108 assert!(md.contains("Got it."));
109 assert!(md.contains("> Sounds good."));
110 assert!(md.contains("> > Can we reschedule?"));
111 assert!(md.contains("> > > Original message here."));
112 }
113
114 #[test]
115 fn complex_list_structure() {
116 let html = r#"
117 <body>
118 <p>Action items:</p>
119 <ol>
120 <li>Review the PR
121 <ul>
122 <li>Check tests</li>
123 <li>Check docs</li>
124 </ul>
125 </li>
126 <li>Deploy to staging</li>
127 </ol>
128 </body>
129 "#;
130
131 let md = convert(html);
132 assert!(md.contains("Action items:"));
133 assert!(md.contains("1. Review the PR"));
134 assert!(md.contains(" - Check tests"));
135 assert!(md.contains("2. Deploy to staging"));
136 }
137
138 #[test]
139 fn pre_block_preserves_formatting() {
140 let html = r#"
141 <body>
142 <p>Here's the code:</p>
143 <pre><code>fn main() {
144 println!("hello");
145 }</code></pre>
146 </body>
147 "#;
148
149 let md = convert(html);
150 assert!(md.contains("Here's the code:"));
151 assert!(md.contains("```\nfn main()"));
152 assert!(md.contains(" println!"));
153 }
154
155 #[test]
156 fn hidden_content_stripped() {
157 let html = r#"
158 <body>
159 <p>Visible content</p>
160 <div style="display: none;">
161 <p>This should not appear</p>
162 </div>
163 <span style="visibility: hidden;">Also hidden</span>
164 <p>More visible</p>
165 </body>
166 "#;
167
168 let md = convert(html);
169 assert!(md.contains("Visible content"));
170 assert!(!md.contains("should not appear"));
171 assert!(!md.contains("Also hidden"));
172 assert!(md.contains("More visible"));
173 }
174
175 #[test]
176 fn script_and_style_fully_removed() {
177 let html = r#"
178 <html>
179 <head>
180 <style>body { color: red; }</style>
181 <script>alert('xss');</script>
182 </head>
183 <body>
184 <p>Safe content</p>
185 <script>document.write('injected')</script>
186 </body>
187 </html>
188 "#;
189
190 let md = convert(html);
191 assert_eq!(md, "Safe content");
192 }
193
194 #[test]
195 fn newsletter_table_layout() {
196 // Typical email newsletter wrapped in layout tables
197 let html = r#"
198 <html>
199 <body>
200 <table width="100%" cellpadding="0" cellspacing="0" role="presentation">
201 <tr>
202 <td align="center">
203 <table width="600" cellpadding="0" cellspacing="0">
204 <tr>
205 <td>
206 <h2>Weekly Digest</h2>
207 <p>Here are your updates for this week.</p>
208 <ul>
209 <li>New release v2.0</li>
210 <li>Bug fixes</li>
211 </ul>
212 <p>Thanks for reading!</p>
213 </td>
214 </tr>
215 </table>
216 </td>
217 </tr>
218 </table>
219 <img src="https://track.example.com/open.gif" width="1" height="1">
220 </body>
221 </html>
222 "#;
223
224 let md = convert(html);
225 assert!(md.contains("## Weekly Digest"));
226 assert!(md.contains("Here are your updates for this week."));
227 assert!(md.contains("- New release v2.0"));
228 assert!(md.contains("- Bug fixes"));
229 assert!(md.contains("Thanks for reading!"));
230 assert!(!md.contains("track.example.com"));
231 // No table markup in output
232 assert!(!md.contains("| "));
233 }
234
235 #[test]
236 fn data_table_preserved() {
237 let html = r#"
238 <body>
239 <p>Order summary:</p>
240 <table>
241 <thead><tr><th>Item</th><th>Qty</th><th>Price</th></tr></thead>
242 <tbody>
243 <tr><td>Widget</td><td>3</td><td>$15.00</td></tr>
244 <tr><td>Gadget</td><td>1</td><td>$29.99</td></tr>
245 </tbody>
246 </table>
247 </body>
248 "#;
249
250 let md = convert(html);
251 assert!(md.contains("Order summary:"));
252 assert!(md.contains("| Item | Qty | Price |"));
253 assert!(md.contains("| --- | --- | --- |"));
254 assert!(md.contains("| Widget | 3 | $15.00 |"));
255 assert!(md.contains("| Gadget | 1 | $29.99 |"));
256 }
257
258 #[test]
259 fn spacer_and_tracking_stripped() {
260 let html = r#"
261 <body>
262 <p>Real content</p>
263 <div style="font-size: 0; line-height: 0;">&nbsp;</div>
264 <img src="pixel.gif" width="1" height="1" style="display:none">
265 <div style="height:0;overflow:hidden">invisible</div>
266 <p>More content</p>
267 </body>
268 "#;
269
270 let md = convert(html);
271 assert!(md.contains("Real content"));
272 assert!(md.contains("More content"));
273 assert!(!md.contains("invisible"));
274 assert!(!md.contains("pixel.gif"));
275 }
276
277 // -- Reply chain tests --
278
279 #[test]
280 fn gmail_reply_chain() {
281 let html = r#"
282 <body>
283 <div dir="ltr">
284 <p>Thanks, that works for me.</p>
285 </div>
286 <div class="gmail_quote">
287 <div class="gmail_attr">On Mon, Jan 5, 2026 at 3:00 PM Alice &lt;alice@example.com&gt; wrote:</div>
288 <blockquote class="gmail_quote">
289 <div dir="ltr">
290 <p>Can we meet at 3pm instead of 2pm?</p>
291 </div>
292 </blockquote>
293 </div>
294 </body>
295 "#;
296
297 let md = convert(html);
298 assert!(md.contains("Thanks, that works for me."));
299 // The gmail_quote div should be rendered as a quote block
300 assert!(md.contains("> "));
301 assert!(md.contains("3pm instead of 2pm"));
302 }
303
304 #[test]
305 fn apple_mail_reply() {
306 let html = r#"
307 <body>
308 <div>Sounds good, see you then.</div>
309 <div>
310 <br>
311 <blockquote type="cite">
312 <div>Hey, are we still on for lunch?</div>
313 </blockquote>
314 </div>
315 </body>
316 "#;
317
318 let md = convert(html);
319 assert!(md.contains("Sounds good, see you then."));
320 assert!(md.contains("> "));
321 assert!(md.contains("still on for lunch"));
322 }
323
324 #[test]
325 fn outlook_reply_with_separator() {
326 let html = r#"
327 <body>
328 <div>
329 <p>I'll handle it.</p>
330 </div>
331 <hr>
332 <div>
333 <p>From: Alice Smith<br>
334 Sent: Monday, January 5, 2026<br>
335 To: Bob Jones<br>
336 Subject: Action needed</p>
337 </div>
338 <div>
339 <p>Can you take a look at the report?</p>
340 </div>
341 </body>
342 "#;
343
344 let md = convert(html);
345 assert!(md.contains("I'll handle it."));
346 assert!(md.contains("---")); // hr separator
347 assert!(md.contains("From: Alice Smith"));
348 assert!(md.contains("take a look at the report"));
349 }
350
351 #[test]
352 fn nested_gmail_reply_chain() {
353 let html = r#"
354 <body>
355 <div dir="ltr"><p>Got it, thanks!</p></div>
356 <div class="gmail_quote">
357 On Tue, Jan 6, Bob wrote:
358 <blockquote class="gmail_quote">
359 <div dir="ltr"><p>Here's the update.</p></div>
360 <div class="gmail_quote">
361 On Mon, Jan 5, Alice wrote:
362 <blockquote class="gmail_quote">
363 <div dir="ltr"><p>What's the status?</p></div>
364 </blockquote>
365 </div>
366 </blockquote>
367 </div>
368 </body>
369 "#;
370
371 let md = convert(html);
372 assert!(md.contains("Got it, thanks!"));
373 // Should have nested quoting
374 assert!(md.contains("> "));
375 assert!(md.contains("Here's the update."));
376 assert!(md.contains("What's the status?"));
377 }
378
379 #[test]
380 fn forwarded_message() {
381 let html = r#"
382 <body>
383 <div><p>FYI, see below.</p></div>
384 <div class="gmail_quote">
385 ---------- Forwarded message ----------
386 <blockquote>
387 <p>From: Alice</p>
388 <p>The deadline has been moved to Friday.</p>
389 </blockquote>
390 </div>
391 </body>
392 "#;
393
394 let md = convert(html);
395 assert!(md.contains("FYI, see below."));
396 assert!(md.contains("Forwarded message"));
397 assert!(md.contains("deadline has been moved"));
398 }
399
400 #[test]
401 fn protonmail_reply() {
402 let html = r#"
403 <body>
404 <div>Will do, thanks.</div>
405 <blockquote class="protonmail_quote" type="cite">
406 <div>Please send me the files by EOD.</div>
407 </blockquote>
408 </body>
409 "#;
410
411 let md = convert(html);
412 assert!(md.contains("Will do, thanks."));
413 assert!(md.contains("> "));
414 assert!(md.contains("send me the files"));
415 }
416
417 #[test]
418 fn attribution_preserved_above_quote() {
419 let html = r#"
420 <body>
421 <p>Agreed.</p>
422 <div class="gmail_quote">
423 On Wed, Jan 7, 2026 at 10:00 AM Carol wrote:
424 <blockquote>
425 <p>Let's go with option B.</p>
426 </blockquote>
427 </div>
428 </body>
429 "#;
430
431 let md = convert(html);
432 assert!(md.contains("Agreed."));
433 // Attribution should appear
434 assert!(md.contains("Carol wrote:"));
435 assert!(md.contains("option B"));
436 }
437