| 11 |
11 |
|
|
| 12 |
12 |
|
type ImapSession = async_imap::Session<tokio_native_tls::TlsStream<TcpStream>>;
|
| 13 |
13 |
|
|
|
14 |
+ |
/// Maximum email size to download (25 MB). Emails exceeding this are skipped during sync.
|
|
15 |
+ |
const MAX_EMAIL_SIZE: u32 = 25 * 1024 * 1024;
|
|
16 |
+ |
|
| 14 |
17 |
|
/// Raw attachment data extracted during IMAP RFC822 parse.
|
| 15 |
18 |
|
#[derive(Debug, Clone)]
|
| 16 |
19 |
|
pub struct AttachmentPart {
|
| 23 |
26 |
|
pub struct ParsedEmail {
|
| 24 |
27 |
|
pub message_id: Option<String>,
|
| 25 |
28 |
|
pub in_reply_to: Option<String>,
|
|
29 |
+ |
/// First entry from the References header (thread root).
|
|
30 |
+ |
pub references_root: Option<String>,
|
| 26 |
31 |
|
pub imap_uid: u32,
|
| 27 |
32 |
|
pub source_folder: String,
|
| 28 |
33 |
|
pub from: String,
|
| 187 |
192 |
|
.collect::<Vec<_>>()
|
| 188 |
193 |
|
.join(",");
|
| 189 |
194 |
|
|
| 190 |
|
- |
// Fetch UID, FLAGS, and RFC822 body
|
|
195 |
+ |
// Pre-filter oversized emails
|
|
196 |
+ |
let mut size_stream = session
|
|
197 |
+ |
.fetch(&sequence_set, "(UID RFC822.SIZE)")
|
|
198 |
+ |
.await
|
|
199 |
+ |
.map_err(|e| format!("Size fetch error: {}", e))?;
|
|
200 |
+ |
|
|
201 |
+ |
let mut safe_seqs: Vec<u32> = Vec::new();
|
|
202 |
+ |
let mut skipped_large = 0usize;
|
|
203 |
+ |
// Build a set of UIDs that are safe to fetch
|
|
204 |
+ |
while let Some(result) = size_stream.next().await {
|
|
205 |
+ |
if let Ok(msg) = result {
|
|
206 |
+ |
let over_limit = msg.size.map_or(false, |s| s > MAX_EMAIL_SIZE);
|
|
207 |
+ |
if over_limit {
|
|
208 |
+ |
skipped_large += 1;
|
|
209 |
+ |
tracing::warn!(uid = ?msg.uid, size = ?msg.size, folder = %folder, "Skipping oversized email");
|
|
210 |
+ |
continue;
|
|
211 |
+ |
}
|
|
212 |
+ |
// Use the sequence number (message index in stream matches input order)
|
|
213 |
+ |
// Re-collect the UIDs we want, then re-fetch by UID
|
|
214 |
+ |
if let Some(uid) = msg.uid {
|
|
215 |
+ |
safe_seqs.push(uid);
|
|
216 |
+ |
}
|
|
217 |
+ |
}
|
|
218 |
+ |
}
|
|
219 |
+ |
drop(size_stream);
|
|
220 |
+ |
|
|
221 |
+ |
if skipped_large > 0 {
|
|
222 |
+ |
debug.push(format!("skipped_large: {}", skipped_large));
|
|
223 |
+ |
}
|
|
224 |
+ |
|
|
225 |
+ |
if safe_seqs.is_empty() {
|
|
226 |
+ |
session.logout().await.ok();
|
|
227 |
+ |
return Ok((Vec::new(), debug.join(", ")));
|
|
228 |
+ |
}
|
|
229 |
+ |
|
|
230 |
+ |
let safe_uid_set = safe_seqs.iter().map(|n| n.to_string()).collect::<Vec<_>>().join(",");
|
|
231 |
+ |
|
|
232 |
+ |
// Fetch full bodies only for safe-sized emails
|
| 191 |
233 |
|
let mut messages = session
|
| 192 |
|
- |
.fetch(&sequence_set, "(UID FLAGS RFC822)")
|
|
234 |
+ |
.uid_fetch(&safe_uid_set, "(UID FLAGS RFC822)")
|
| 193 |
235 |
|
.await
|
| 194 |
236 |
|
.map_err(|e| format!("Fetch error: {}", e))?;
|
| 195 |
237 |
|
|
| 237 |
279 |
|
.find(|h| h.get_key().to_lowercase() == "in-reply-to")
|
| 238 |
280 |
|
.map(|h| h.get_value());
|
| 239 |
281 |
|
|
|
282 |
+ |
let references_root = extract_references_root(&parsed.headers);
|
|
283 |
+ |
|
| 240 |
284 |
|
let from = parsed
|
| 241 |
285 |
|
.headers
|
| 242 |
286 |
|
.iter()
|
| 275 |
319 |
|
emails.push(ParsedEmail {
|
| 276 |
320 |
|
message_id,
|
| 277 |
321 |
|
in_reply_to,
|
|
322 |
+ |
references_root,
|
| 278 |
323 |
|
imap_uid: uid,
|
| 279 |
324 |
|
source_folder: folder_name.clone(),
|
| 280 |
325 |
|
from,
|
| 389 |
434 |
|
});
|
| 390 |
435 |
|
}
|
| 391 |
436 |
|
|
|
437 |
+ |
// Pre-filter oversized emails by fetching sizes first
|
| 392 |
438 |
|
let uid_set = uids.iter().map(|n| n.to_string()).collect::<Vec<_>>().join(",");
|
|
439 |
+ |
let mut size_stream = session
|
|
440 |
+ |
.uid_fetch(&uid_set, "(UID RFC822.SIZE)")
|
|
441 |
+ |
.await
|
|
442 |
+ |
.map_err(|e| format!("UID size fetch error: {}", e))?;
|
|
443 |
+ |
|
|
444 |
+ |
let mut safe_uids: Vec<u32> = Vec::new();
|
|
445 |
+ |
let mut skipped_large = 0usize;
|
|
446 |
+ |
while let Some(result) = size_stream.next().await {
|
|
447 |
+ |
if let Ok(msg) = result {
|
|
448 |
+ |
if let Some(uid) = msg.uid {
|
|
449 |
+ |
if let Some(size) = msg.size {
|
|
450 |
+ |
if size > MAX_EMAIL_SIZE {
|
|
451 |
+ |
skipped_large += 1;
|
|
452 |
+ |
tracing::warn!(uid, size, folder = %folder, "Skipping oversized email ({} bytes)", size);
|
|
453 |
+ |
continue;
|
|
454 |
+ |
}
|
|
455 |
+ |
}
|
|
456 |
+ |
safe_uids.push(uid);
|
|
457 |
+ |
}
|
|
458 |
+ |
}
|
|
459 |
+ |
}
|
|
460 |
+ |
drop(size_stream);
|
|
461 |
+ |
|
|
462 |
+ |
if skipped_large > 0 {
|
|
463 |
+ |
debug.push(format!("skipped_large: {}", skipped_large));
|
|
464 |
+ |
}
|
|
465 |
+ |
|
|
466 |
+ |
if safe_uids.is_empty() {
|
|
467 |
+ |
session.logout().await.ok();
|
|
468 |
+ |
return Ok(FolderFetchResult {
|
|
469 |
+ |
emails: Vec::new(),
|
|
470 |
+ |
uid_validity: server_uid_validity,
|
|
471 |
+ |
max_uid_fetched: uids.iter().copied().max(),
|
|
472 |
+ |
debug_info: debug.join(", "),
|
|
473 |
+ |
});
|
|
474 |
+ |
}
|
|
475 |
+ |
|
|
476 |
+ |
let safe_uid_set = safe_uids.iter().map(|n| n.to_string()).collect::<Vec<_>>().join(",");
|
| 393 |
477 |
|
|
| 394 |
478 |
|
let mut messages = session
|
| 395 |
|
- |
.uid_fetch(&uid_set, "(UID FLAGS RFC822)")
|
|
479 |
+ |
.uid_fetch(&safe_uid_set, "(UID FLAGS RFC822)")
|
| 396 |
480 |
|
.await
|
| 397 |
481 |
|
.map_err(|e| format!("UID fetch error: {}", e))?;
|
| 398 |
482 |
|
|
| 401 |
485 |
|
let mut msg_count = 0;
|
| 402 |
486 |
|
let mut body_count = 0;
|
| 403 |
487 |
|
let mut parse_errors = 0;
|
| 404 |
|
- |
let mut max_uid: Option<u32> = None;
|
|
488 |
+ |
// Seed max_uid from all UIDs (including skipped large ones) so they aren't re-fetched
|
|
489 |
+ |
let mut max_uid: Option<u32> = uids.iter().copied().max();
|
| 405 |
490 |
|
|
| 406 |
491 |
|
while let Some(result) = messages.next().await {
|
| 407 |
492 |
|
msg_count += 1;
|
| 434 |
519 |
|
let in_reply_to = parsed.headers.iter()
|
| 435 |
520 |
|
.find(|h| h.get_key().to_lowercase() == "in-reply-to")
|
| 436 |
521 |
|
.map(|h| h.get_value());
|
|
522 |
+ |
let references_root = extract_references_root(&parsed.headers);
|
| 437 |
523 |
|
let from = parsed.headers.iter()
|
| 438 |
524 |
|
.find(|h| h.get_key().to_lowercase() == "from")
|
| 439 |
525 |
|
.map(|h| h.get_value()).unwrap_or_default();
|
| 457 |
543 |
|
emails.push(ParsedEmail {
|
| 458 |
544 |
|
message_id,
|
| 459 |
545 |
|
in_reply_to,
|
|
546 |
+ |
references_root,
|
| 460 |
547 |
|
imap_uid: uid,
|
| 461 |
548 |
|
source_folder: folder_name.clone(),
|
| 462 |
549 |
|
from,
|
| 609 |
696 |
|
|
| 610 |
697 |
|
Self::collect_body_parts(mail, &mut plain_text, &mut html_body);
|
| 611 |
698 |
|
|
| 612 |
|
- |
// Build final result - prefer plain text, fall back to stripped HTML
|
|
699 |
+ |
// Build final result - prefer plain text, fall back to pter markdown conversion
|
| 613 |
700 |
|
let body_text = if let Some(ref plain) = plain_text {
|
| 614 |
701 |
|
// If we have plain text but it looks like it contains HTML tags,
|
| 615 |
|
- |
// we should strip them (some emails have incorrect content-types)
|
|
702 |
+ |
// we should convert them (some emails have incorrect content-types)
|
| 616 |
703 |
|
if plain.contains("<html") || plain.contains("<body") || plain.contains("<div") {
|
| 617 |
|
- |
Self::strip_html(plain)
|
|
704 |
+ |
pter::convert(plain)
|
| 618 |
705 |
|
} else {
|
| 619 |
706 |
|
plain.clone()
|
| 620 |
707 |
|
}
|
| 621 |
708 |
|
} else if let Some(ref html) = html_body {
|
| 622 |
|
- |
Self::strip_html(html)
|
|
709 |
+ |
pter::convert(html)
|
| 623 |
710 |
|
} else {
|
| 624 |
711 |
|
// Fallback to whatever body is available
|
| 625 |
712 |
|
let body = mail.get_body().unwrap_or_default();
|
| 626 |
713 |
|
if body.contains("<html") || body.contains("<body") || body.contains("<div") {
|
| 627 |
|
- |
Self::strip_html(&body)
|
|
714 |
+ |
pter::convert(&body)
|
| 628 |
715 |
|
} else {
|
| 629 |
716 |
|
body
|
| 630 |
717 |
|
}
|
| 660 |
747 |
|
}
|
| 661 |
748 |
|
}
|
| 662 |
749 |
|
|
| 663 |
|
- |
/// Convert HTML email to clean, readable plain text.
|
| 664 |
|
- |
///
|
| 665 |
|
- |
/// This function:
|
| 666 |
|
- |
/// - Removes script, style, and head content entirely
|
| 667 |
|
- |
/// - Converts links to `text [url]` format for readability
|
| 668 |
|
- |
/// - Converts lists to bullet points
|
| 669 |
|
- |
/// - Adds proper line breaks for block elements
|
| 670 |
|
- |
/// - Decodes HTML entities
|
| 671 |
|
- |
/// - Cleans up excessive whitespace
|
| 672 |
|
- |
fn strip_html(html: &str) -> String {
|
| 673 |
|
- |
let mut text = html.to_string();
|
| 674 |
|
- |
|
| 675 |
|
- |
// Remove content we never want to display
|
| 676 |
|
- |
// Script tags and their content
|
| 677 |
|
- |
while let Some(start) = text.to_lowercase().find("<script") {
|
| 678 |
|
- |
if let Some(end) = text.to_lowercase()[start..].find("</script>") {
|
| 679 |
|
- |
text = format!("{}{}", &text[..start], &text[start + end + 9..]);
|
| 680 |
|
- |
} else {
|
| 681 |
|
- |
// Unclosed script tag — remove everything from here to end
|
| 682 |
|
- |
text.truncate(start);
|
| 683 |
|
- |
break;
|
| 684 |
|
- |
}
|
| 685 |
|
- |
}
|
| 686 |
|
- |
|
| 687 |
|
- |
// Style tags and their content
|
| 688 |
|
- |
while let Some(start) = text.to_lowercase().find("<style") {
|
| 689 |
|
- |
if let Some(end) = text.to_lowercase()[start..].find("</style>") {
|
| 690 |
|
- |
text = format!("{}{}", &text[..start], &text[start + end + 8..]);
|
| 691 |
|
- |
} else {
|
| 692 |
|
- |
text.truncate(start);
|
| 693 |
|
- |
break;
|
| 694 |
|
- |
}
|
| 695 |
|
- |
}
|
| 696 |
|
- |
|
| 697 |
|
- |
// Head section
|
| 698 |
|
- |
if let Some(start) = text.to_lowercase().find("<head") {
|
| 699 |
|
- |
if let Some(end) = text.to_lowercase()[start..].find("</head>") {
|
| 700 |
|
- |
text = format!("{}{}", &text[..start], &text[start + end + 7..]);
|
| 701 |
|
- |
}
|
| 702 |
|
- |
}
|
| 703 |
|
- |
|
| 704 |
|
- |
// Extract links before stripping tags: <a href="url">text</a> -> text [url]
|
| 705 |
|
- |
let mut result = String::new();
|
| 706 |
|
- |
let mut remaining = text.as_str();
|
| 707 |
|
- |
|
| 708 |
|
- |
while let Some(a_start) = remaining.to_lowercase().find("<a ") {
|
| 709 |
|
- |
// Add text before the <a> tag
|
| 710 |
|
- |
result.push_str(&remaining[..a_start]);
|
| 711 |
|
- |
|
| 712 |
|
- |
let after_a = &remaining[a_start..];
|
| 713 |
|
- |
|
| 714 |
|
- |
// Find href attribute
|
| 715 |
|
- |
let href = Self::extract_href(after_a);
|
| 716 |
|
- |
|
| 717 |
|
- |
// Find the closing >
|
| 718 |
|
- |
if let Some(tag_end) = after_a.find('>') {
|
| 719 |
|
- |
let after_tag = &after_a[tag_end + 1..];
|
| 720 |
|
- |
|
| 721 |
|
- |
// Find </a>
|
| 722 |
|
- |
if let Some(close) = after_tag.to_lowercase().find("</a>") {
|
| 723 |
|
- |
let link_text = &after_tag[..close];
|
| 724 |
|
- |
let link_text_clean = Self::strip_tags_simple(link_text);
|
| 725 |
|
- |
|
| 726 |
|
- |
// Format as "text [url]" if we have both, otherwise just text
|
| 727 |
|
- |
if let Some(url) = href {
|
| 728 |
|
- |
let url_trimmed = url.trim();
|
| 729 |
|
- |
// Only add URL if it's meaningful and different from text
|
| 730 |
|
- |
if !url_trimmed.is_empty()
|
| 731 |
|
- |
&& !url_trimmed.starts_with('#')
|
| 732 |
|
- |
&& !url_trimmed.starts_with("javascript:")
|
| 733 |
|
- |
&& url_trimmed != link_text_clean.trim()
|
| 734 |
|
- |
{
|
| 735 |
|
- |
result.push_str(&format!("{} [{}]", link_text_clean.trim(), url_trimmed));
|
| 736 |
|
- |
} else {
|
| 737 |
|
- |
result.push_str(link_text_clean.trim());
|
| 738 |
|
- |
}
|
| 739 |
|
- |
} else {
|
| 740 |
|
- |
result.push_str(link_text_clean.trim());
|
| 741 |
|
- |
}
|
|
750 |
+ |
// strip_html, extract_href, strip_tags_simple, decode_html_entities
|
|
751 |
+ |
// removed — replaced by pter::convert().
|
| 742 |
752 |
|
|
| 743 |
|
- |
remaining = &after_tag[close + 4..];
|
| 744 |
|
- |
} else {
|
| 745 |
|
- |
remaining = after_tag;
|
| 746 |
|
- |
}
|
| 747 |
|
- |
} else {
|
| 748 |
|
- |
remaining = &after_a[3..];
|
| 749 |
|
- |
}
|
| 750 |
|
- |
}
|
| 751 |
|
- |
result.push_str(remaining);
|
| 752 |
|
- |
text = result;
|
| 753 |
|
- |
|
| 754 |
|
- |
// Convert block elements to line breaks
|
| 755 |
|
- |
let block_tags = [
|
| 756 |
|
- |
"</p>", "</div>", "</tr>", "</li>", "</h1>", "</h2>", "</h3>",
|
| 757 |
|
- |
"</h4>", "</h5>", "</h6>", "</blockquote>", "</pre>",
|
| 758 |
|
- |
];
|
| 759 |
|
- |
for tag in block_tags {
|
| 760 |
|
- |
text = text.replace(tag, &format!("{}\n", tag));
|
| 761 |
|
- |
let upper = tag.to_uppercase();
|
| 762 |
|
- |
text = text.replace(&upper, &format!("{}\n", upper));
|
| 763 |
|
- |
}
|
| 764 |
|
- |
|
| 765 |
|
- |
// Convert <br> to newlines
|
| 766 |
|
- |
for br in ["<br>", "<br/>", "<br />", "<BR>", "<BR/>", "<BR />"] {
|
| 767 |
|
- |
text = text.replace(br, "\n");
|
| 768 |
|
- |
}
|
| 769 |
|
- |
|
| 770 |
|
- |
// Convert list items to bullet points
|
| 771 |
|
- |
for li in ["<li>", "<LI>"] {
|
| 772 |
|
- |
text = text.replace(li, "\n• ");
|
| 773 |
|
- |
}
|
| 774 |
|
- |
|
| 775 |
|
- |
// Convert horizontal rules to separator
|
| 776 |
|
- |
for hr in ["<hr>", "<hr/>", "<hr />", "<HR>", "<HR/>", "<HR />"] {
|
| 777 |
|
- |
text = text.replace(hr, "\n---\n");
|
| 778 |
|
- |
}
|
| 779 |
|
- |
|
| 780 |
|
- |
// Strip remaining HTML tags
|
| 781 |
|
- |
text = Self::strip_tags_simple(&text);
|
| 782 |
|
- |
|
| 783 |
|
- |
// Decode HTML entities
|
| 784 |
|
- |
text = Self::decode_html_entities(&text);
|
| 785 |
|
- |
|
| 786 |
|
- |
// Clean up whitespace
|
| 787 |
|
- |
// Collapse multiple spaces to single space
|
| 788 |
|
- |
let mut prev_space = false;
|
| 789 |
|
- |
let mut cleaned = String::new();
|
| 790 |
|
- |
for c in text.chars() {
|
| 791 |
|
- |
if c == ' ' || c == '\t' {
|
| 792 |
|
- |
if !prev_space {
|
| 793 |
|
- |
cleaned.push(' ');
|
| 794 |
|
- |
prev_space = true;
|
| 795 |
|
- |
}
|
| 796 |
|
- |
} else {
|
| 797 |
|
- |
cleaned.push(c);
|
| 798 |
|
- |
prev_space = false;
|
| 799 |
|
- |
}
|
| 800 |
|
- |
}
|
| 801 |
|
- |
|
| 802 |
|
- |
// Trim lines and collapse multiple blank lines
|
| 803 |
|
- |
let lines: Vec<&str> = cleaned.lines().map(|l| l.trim()).collect();
|
| 804 |
|
- |
let mut final_lines: Vec<&str> = Vec::new();
|
| 805 |
|
- |
let mut prev_blank = false;
|
| 806 |
|
- |
|
| 807 |
|
- |
for line in lines {
|
| 808 |
|
- |
if line.is_empty() {
|
| 809 |
|
- |
if !prev_blank && !final_lines.is_empty() {
|
| 810 |
|
- |
final_lines.push("");
|
| 811 |
|
- |
prev_blank = true;
|
| 812 |
|
- |
}
|
| 813 |
|
- |
} else {
|
| 814 |
|
- |
final_lines.push(line);
|
| 815 |
|
- |
prev_blank = false;
|
| 816 |
|
- |
}
|
| 817 |
|
- |
}
|
| 818 |
|
- |
|
| 819 |
|
- |
// Remove leading/trailing blank lines
|
| 820 |
|
- |
while final_lines.first() == Some(&"") {
|
| 821 |
|
- |
final_lines.remove(0);
|
| 822 |
|
- |
}
|
| 823 |
|
- |
while final_lines.last() == Some(&"") {
|
| 824 |
|
- |
final_lines.pop();
|
| 825 |
|
- |
}
|
| 826 |
|
- |
|
| 827 |
|
- |
final_lines.join("\n")
|
| 828 |
|
- |
}
|
| 829 |
|
- |
|
| 830 |
|
- |
/// Extract href attribute value from an <a> tag
|
| 831 |
|
- |
fn extract_href(tag: &str) -> Option<String> {
|
| 832 |
|
- |
// Find href="..." or href='...'
|
| 833 |
|
- |
let lower = tag.to_lowercase();
|
| 834 |
|
- |
let href_pos = lower.find("href=")?;
|
| 835 |
|
- |
let after_href = &tag[href_pos + 5..];
|
| 836 |
|
- |
|
| 837 |
|
- |
let quote_char = after_href.chars().next()?;
|
| 838 |
|
- |
if quote_char != '"' && quote_char != '\'' {
|
| 839 |
|
- |
return None;
|
| 840 |
|
- |
}
|
| 841 |
|
- |
|
| 842 |
|
- |
let url_start = 1;
|
| 843 |
|
- |
let url_end = after_href[url_start..].find(quote_char)?;
|
| 844 |
|
- |
Some(after_href[url_start..url_start + url_end].to_string())
|
| 845 |
|
- |
}
|
| 846 |
|
- |
|
| 847 |
|
- |
/// Simple tag stripping (removes all < > content)
|
| 848 |
|
- |
fn strip_tags_simple(html: &str) -> String {
|
| 849 |
|
- |
let mut result = String::new();
|
| 850 |
|
- |
let mut in_tag = false;
|
| 851 |
|
- |
|
| 852 |
|
- |
for c in html.chars() {
|
| 853 |
|
- |
match c {
|
| 854 |
|
- |
'<' => in_tag = true,
|
| 855 |
|
- |
'>' => in_tag = false,
|
| 856 |
|
- |
_ if !in_tag => result.push(c),
|
| 857 |
|
- |
_ => {}
|
| 858 |
|
- |
}
|
| 859 |
|
- |
}
|
| 860 |
|
- |
result
|
| 861 |
|
- |
}
|
| 862 |
|
- |
|
| 863 |
|
- |
/// Decode common HTML entities
|
| 864 |
|
- |
fn decode_html_entities(text: &str) -> String {
|
| 865 |
|
- |
text.replace(" ", " ")
|
| 866 |
|
- |
.replace(" ", " ")
|
| 867 |
|
- |
.replace(" ", " ")
|
| 868 |
|
- |
.replace(" ", " ")
|
| 869 |
|
- |
.replace("&", "&")
|
| 870 |
|
- |
.replace("<", "<")
|
| 871 |
|
- |
.replace(">", ">")
|
| 872 |
|
- |
.replace(""", "\"")
|
| 873 |
|
- |
.replace(""", "\"")
|
| 874 |
|
- |
.replace("'", "'")
|
| 875 |
|
- |
.replace("'", "'")
|
| 876 |
|
- |
.replace("'", "'")
|
| 877 |
|
- |
.replace("‘", "'")
|
| 878 |
|
- |
.replace("’", "'")
|
| 879 |
|
- |
.replace("“", "\"")
|
| 880 |
|
- |
.replace("”", "\"")
|
| 881 |
|
- |
.replace("–", "–")
|
| 882 |
|
- |
.replace("—", "—")
|
| 883 |
|
- |
.replace("…", "...")
|
| 884 |
|
- |
.replace("•", "•")
|
| 885 |
|
- |
.replace("·", "·")
|
| 886 |
|
- |
.replace("©", "©")
|
| 887 |
|
- |
.replace("®", "®")
|
| 888 |
|
- |
.replace("™", "™")
|
| 889 |
|
- |
.replace("€", "€")
|
| 890 |
|
- |
.replace("£", "£")
|
| 891 |
|
- |
.replace("¥", "¥")
|
| 892 |
|
- |
.replace("¢", "¢")
|
| 893 |
|
- |
.replace("°", "°")
|
| 894 |
|
- |
.replace("±", "±")
|
| 895 |
|
- |
.replace("×", "×")
|
| 896 |
|
- |
.replace("÷", "÷")
|
| 897 |
|
- |
.replace("½", "½")
|
| 898 |
|
- |
.replace("¼", "¼")
|
| 899 |
|
- |
.replace("¾", "¾")
|
| 900 |
|
- |
// Numeric entities (common ones)
|
| 901 |
|
- |
.replace(" ", " ")
|
| 902 |
|
- |
.replace("–", "–")
|
| 903 |
|
- |
.replace("—", "—")
|
| 904 |
|
- |
.replace("‘", "'")
|
| 905 |
|
- |
.replace("’", "'")
|
| 906 |
|
- |
.replace("“", "\"")
|
| 907 |
|
- |
.replace("”", "\"")
|
| 908 |
|
- |
.replace("…", "...")
|
| 909 |
|
- |
}
|
| 910 |
753 |
|
}
|
| 911 |
754 |
|
|
| 912 |
755 |
|
/// Recursively extract attachment parts from a MIME tree.
|
| 913 |
756 |
|
///
|
|
757 |
+ |
/// Extracts the first message-ID from the References header (the thread root).
|
|
758 |
+ |
fn extract_references_root(headers: &[mailparse::MailHeader]) -> Option<String> {
|
|
759 |
+ |
headers
|
|
760 |
+ |
.iter()
|
|
761 |
+ |
.find(|h| h.get_key().to_lowercase() == "references")
|
|
762 |
+ |
.and_then(|h| {
|
|
763 |
+ |
h.get_value()
|
|
764 |
+ |
.split_whitespace()
|
|
765 |
+ |
.find(|s| s.starts_with('<') && s.ends_with('>'))
|
|
766 |
+ |
.map(|s| s.to_string())
|
|
767 |
+ |
})
|
|
768 |
+ |
}
|
|
769 |
+ |
|
| 914 |
770 |
|
/// Walks the MIME structure and collects non-text leaf parts as attachments.
|
| 915 |
771 |
|
/// Skips text/plain and text/html (those are body parts), and parts with empty bodies.
|
| 916 |
772 |
|
fn extract_attachment_parts(mail: &mailparse::ParsedMail) -> Vec<AttachmentPart> {
|
| 970 |
826 |
|
mod tests {
|
| 971 |
827 |
|
use super::ImapClient;
|
| 972 |
828 |
|
|
| 973 |
|
- |
// --- strip_tags_simple ---
|
| 974 |
|
- |
|
| 975 |
|
- |
#[test]
|
| 976 |
|
- |
fn strip_tags_simple_basic() {
|
| 977 |
|
- |
assert_eq!(ImapClient::strip_tags_simple("<b>bold</b>"), "bold");
|
| 978 |
|
- |
}
|
| 979 |
|
- |
|
| 980 |
|
- |
#[test]
|
| 981 |
|
- |
fn strip_tags_simple_nested() {
|
| 982 |
|
- |
assert_eq!(
|
| 983 |
|
- |
ImapClient::strip_tags_simple("<div><p>hello <b>world</b></p></div>"),
|
| 984 |
|
- |
"hello world"
|
| 985 |
|
- |
);
|
| 986 |
|
- |
}
|
| 987 |
|
- |
|
| 988 |
|
- |
#[test]
|
| 989 |
|
- |
fn strip_tags_simple_empty() {
|
| 990 |
|
- |
assert_eq!(ImapClient::strip_tags_simple(""), "");
|
| 991 |
|
- |
}
|
| 992 |
|
- |
|
| 993 |
|
- |
#[test]
|
| 994 |
|
- |
fn strip_tags_simple_no_tags() {
|
| 995 |
|
- |
assert_eq!(ImapClient::strip_tags_simple("plain text"), "plain text");
|
| 996 |
|
- |
}
|
| 997 |
|
- |
|
| 998 |
|
- |
#[test]
|
| 999 |
|
- |
fn strip_tags_simple_self_closing() {
|
| 1000 |
|
- |
assert_eq!(ImapClient::strip_tags_simple("a<br/>b"), "ab");
|
| 1001 |
|
- |
}
|
| 1002 |
|
- |
|
| 1003 |
|
- |
#[test]
|
| 1004 |
|
- |
fn strip_tags_simple_attributes() {
|
| 1005 |
|
- |
assert_eq!(
|
| 1006 |
|
- |
ImapClient::strip_tags_simple(r#"<a href="url">link</a>"#),
|
| 1007 |
|
- |
"link"
|
| 1008 |
|
- |
);
|
| 1009 |
|
- |
}
|
| 1010 |
|
- |
|
| 1011 |
|
- |
// --- extract_href ---
|
| 1012 |
|
- |
|
| 1013 |
|
- |
#[test]
|
| 1014 |
|
- |
fn extract_href_double_quotes() {
|
| 1015 |
|
- |
let tag = r#"<a href="https://example.com">text</a>"#;
|
| 1016 |
|
- |
assert_eq!(
|
| 1017 |
|
- |
ImapClient::extract_href(tag),
|