Skip to main content

max / goingson

17.4 KB · 592 lines History Blame Raw
1 //! Native vCard 3.0/4.0 parser for contact import.
2 //!
3 //! Parses the subset of vCard properties that GO uses. The format is line-based:
4 //! each property is `NAME;PARAMS:VALUE`, with line folding (continuation lines
5 //! starting with space/tab).
6
7 use serde::Serialize;
8
9 /// A parsed email from a vCard.
10 #[derive(Debug, Clone, Serialize)]
11 #[serde(rename_all = "camelCase")]
12 pub struct ParsedEmail {
13 pub address: String,
14 pub label: String,
15 pub is_primary: bool,
16 }
17
18 /// A parsed phone number from a vCard.
19 #[derive(Debug, Clone, Serialize)]
20 #[serde(rename_all = "camelCase")]
21 pub struct ParsedPhone {
22 pub number: String,
23 pub label: String,
24 pub is_primary: bool,
25 }
26
27 /// A parsed social handle from a vCard.
28 #[derive(Debug, Clone, Serialize)]
29 #[serde(rename_all = "camelCase")]
30 pub struct ParsedSocial {
31 pub platform: String,
32 pub handle: String,
33 pub url: Option<String>,
34 }
35
36 /// A parsed custom field from a vCard.
37 #[derive(Debug, Clone, Serialize)]
38 #[serde(rename_all = "camelCase")]
39 pub struct ParsedCustomField {
40 pub label: String,
41 pub value: String,
42 pub url: Option<String>,
43 }
44
45 /// A fully parsed vCard contact.
46 #[derive(Debug, Clone, Serialize)]
47 #[serde(rename_all = "camelCase")]
48 pub struct ParsedVCard {
49 pub display_name: String,
50 pub nickname: Option<String>,
51 pub company: Option<String>,
52 pub title: Option<String>,
53 pub notes: Option<String>,
54 pub birthday: Option<String>,
55 pub timezone: Option<String>,
56 pub tags: Vec<String>,
57 pub emails: Vec<ParsedEmail>,
58 pub phones: Vec<ParsedPhone>,
59 pub social_handles: Vec<ParsedSocial>,
60 pub custom_fields: Vec<ParsedCustomField>,
61 }
62
63 /// Parse a .vcf file content into a list of contacts.
64 pub fn parse_vcf(content: &str) -> Result<Vec<ParsedVCard>, String> {
65 let unfolded = unfold_lines(content);
66 let mut contacts = Vec::new();
67 let mut in_card = false;
68 let mut lines: Vec<&str> = Vec::new();
69
70 for line in unfolded.lines() {
71 let trimmed = line.trim();
72 if trimmed.eq_ignore_ascii_case("BEGIN:VCARD") {
73 in_card = true;
74 lines.clear();
75 } else if trimmed.eq_ignore_ascii_case("END:VCARD") {
76 if in_card {
77 if let Some(card) = parse_single_vcard(&lines) {
78 contacts.push(card);
79 }
80 }
81 in_card = false;
82 } else if in_card {
83 lines.push(line);
84 }
85 }
86
87 Ok(contacts)
88 }
89
90 /// Unfold continuation lines (RFC 6350 §3.2): lines starting with a space or tab
91 /// are continuations of the previous line.
92 fn unfold_lines(content: &str) -> String {
93 let mut result = String::with_capacity(content.len());
94 for line in content.lines() {
95 if line.starts_with(' ') || line.starts_with('\t') {
96 // Continuation: strip exactly one fold character (space or tab)
97 result.push_str(&line[1..]);
98 } else {
99 if !result.is_empty() {
100 result.push('\n');
101 }
102 result.push_str(line);
103 }
104 }
105 result
106 }
107
108 /// Parse a single vCard from its property lines.
109 fn parse_single_vcard(lines: &[&str]) -> Option<ParsedVCard> {
110 let mut display_name = String::new();
111 let mut nickname = None;
112 let mut company = None;
113 let mut title = None;
114 let mut notes = None;
115 let mut birthday = None;
116 let mut timezone = None;
117 let mut tags = Vec::new();
118 let mut emails = Vec::new();
119 let mut phones = Vec::new();
120 let mut social_handles = Vec::new();
121 let mut custom_fields = Vec::new();
122
123 // Fallback name components from N property
124 let mut family_name = String::new();
125 let mut given_name = String::new();
126
127 for line in lines {
128 let (prop_name, params, value) = parse_property_line(line);
129 let prop_upper = prop_name.to_uppercase();
130
131 match prop_upper.as_str() {
132 "FN" => {
133 display_name = decode_value(&value, &params);
134 }
135 "N" => {
136 // N:Family;Given;Middle;Prefix;Suffix
137 let parts: Vec<&str> = value.split(';').collect();
138 if let Some(f) = parts.first() {
139 family_name = decode_value(f, &params);
140 }
141 if let Some(g) = parts.get(1) {
142 given_name = decode_value(g, &params);
143 }
144 }
145 "NICKNAME" => {
146 let v = decode_value(&value, &params);
147 if !v.is_empty() {
148 nickname = Some(v);
149 }
150 }
151 "ORG" => {
152 // ORG:Company;Division
153 let v = decode_value(&value, &params);
154 let org = v.split(';').next().unwrap_or("").trim().to_string();
155 if !org.is_empty() {
156 company = Some(org);
157 }
158 }
159 "TITLE" => {
160 let v = decode_value(&value, &params);
161 if !v.is_empty() {
162 title = Some(v);
163 }
164 }
165 "NOTE" => {
166 let v = decode_value(&value, &params);
167 if !v.is_empty() {
168 notes = Some(v);
169 }
170 }
171 "BDAY" => {
172 let v = value.trim();
173 // Normalize YYYYMMDD to YYYY-MM-DD
174 let normalized = if v.len() == 8 && v.chars().all(|c| c.is_ascii_digit()) {
175 format!("{}-{}-{}", &v[0..4], &v[4..6], &v[6..8])
176 } else {
177 v.to_string()
178 };
179 if normalized.len() >= 10 {
180 birthday = Some(normalized[..10].to_string());
181 }
182 }
183 "TZ" => {
184 let v = value.trim().to_string();
185 if !v.is_empty() {
186 timezone = Some(v);
187 }
188 }
189 "CATEGORIES" => {
190 for cat in value.split(',') {
191 let cat = decode_value(cat.trim(), &params);
192 if !cat.is_empty() {
193 tags.push(cat);
194 }
195 }
196 }
197 "EMAIL" => {
198 let address = decode_value(&value, &params);
199 if !address.is_empty() {
200 let label = extract_type_param(&params);
201 let is_primary = params_contain(&params, "PREF")
202 || params_contain_key_value(&params, "TYPE", "PREF");
203 emails.push(ParsedEmail {
204 address,
205 label,
206 is_primary,
207 });
208 }
209 }
210 "TEL" => {
211 let number = decode_value(&value, &params);
212 if !number.is_empty() {
213 let label = extract_type_param(&params);
214 let is_primary = params_contain(&params, "PREF")
215 || params_contain_key_value(&params, "TYPE", "PREF");
216 phones.push(ParsedPhone {
217 number,
218 label,
219 is_primary,
220 });
221 }
222 }
223 "URL" => {
224 let url = decode_value(&value, &params);
225 if !url.is_empty() {
226 custom_fields.push(ParsedCustomField {
227 label: "Website".to_string(),
228 value: url.clone(),
229 url: Some(url),
230 });
231 }
232 }
233 s if s.starts_with("X-SOCIALPROFILE") || s == "X-SOCIALPROFILE" => {
234 let url_val = decode_value(&value, &params);
235 let platform = extract_type_param(&params);
236 // Try to extract handle from URL
237 let handle = url_val
238 .rsplit('/')
239 .find(|s| !s.is_empty())
240 .unwrap_or(&url_val)
241 .to_string();
242 if !handle.is_empty() {
243 social_handles.push(ParsedSocial {
244 platform,
245 handle,
246 url: if url_val.starts_with("http") {
247 Some(url_val)
248 } else {
249 None
250 },
251 });
252 }
253 }
254 _ => {}
255 }
256 }
257
258 // Use FN, fall back to N components
259 if display_name.is_empty() {
260 display_name = format!("{} {}", given_name, family_name).trim().to_string();
261 }
262
263 // Skip contacts with no name at all
264 if display_name.is_empty() {
265 return None;
266 }
267
268 Some(ParsedVCard {
269 display_name,
270 nickname,
271 company,
272 title,
273 notes,
274 birthday,
275 timezone,
276 tags,
277 emails,
278 phones,
279 social_handles,
280 custom_fields,
281 })
282 }
283
284 /// Parse a property line into (name, params, value).
285 /// Format: `NAME;PARAM1=val1;PARAM2=val2:VALUE`
286 fn parse_property_line(line: &str) -> (String, Vec<String>, String) {
287 // Find the colon that separates property name+params from value.
288 // Be careful: values can contain colons (e.g., URLs).
289 // The property name cannot contain colons, but params might contain quoted colons.
290 let mut colon_idx = None;
291 let mut in_quotes = false;
292 for (i, ch) in line.char_indices() {
293 match ch {
294 '"' => in_quotes = !in_quotes,
295 ':' if !in_quotes => {
296 colon_idx = Some(i);
297 break;
298 }
299 _ => {}
300 }
301 }
302
303 let (name_params, value) = match colon_idx {
304 Some(i) => (&line[..i], &line[i + 1..]),
305 None => (line, ""),
306 };
307
308 let mut parts = name_params.split(';');
309 let name = parts.next().unwrap_or("").to_string();
310 let params: Vec<String> = parts.map(|s| s.to_string()).collect();
311
312 (name, params, value.to_string())
313 }
314
315 /// Decode a value, handling quoted-printable encoding if indicated by params.
316 fn decode_value(value: &str, params: &[String]) -> String {
317 let is_qp = params.iter().any(|p| {
318 let upper = p.to_uppercase();
319 upper == "ENCODING=QUOTED-PRINTABLE" || upper == "QUOTED-PRINTABLE"
320 });
321
322 if is_qp {
323 decode_quoted_printable(value)
324 } else {
325 // Handle vCard escaped characters
326 value
327 .replace("\\n", "\n")
328 .replace("\\N", "\n")
329 .replace("\\,", ",")
330 .replace("\\;", ";")
331 .replace("\\\\", "\\")
332 }
333 }
334
335 /// Decode quoted-printable encoded text.
336 fn decode_quoted_printable(input: &str) -> String {
337 let mut decoded_bytes = Vec::new();
338 let bytes = input.as_bytes();
339 let mut i = 0;
340 while i < bytes.len() {
341 if bytes[i] == b'=' {
342 // Soft line break: =\r\n or =\n — skip continuation
343 if i + 2 < bytes.len() && bytes[i + 1] == b'\r' && bytes[i + 2] == b'\n' {
344 i += 3;
345 continue;
346 }
347 if i + 1 < bytes.len() && bytes[i + 1] == b'\n' {
348 i += 2;
349 continue;
350 }
351 // Hex-encoded byte: =XX
352 if i + 2 < bytes.len() {
353 if let (Some(h), Some(l)) = (
354 hex_val(bytes[i + 1]),
355 hex_val(bytes[i + 2]),
356 ) {
357 decoded_bytes.push(h << 4 | l);
358 i += 3;
359 continue;
360 }
361 }
362 }
363 decoded_bytes.push(bytes[i]);
364 i += 1;
365 }
366 String::from_utf8_lossy(&decoded_bytes).into_owned()
367 }
368
369 fn hex_val(b: u8) -> Option<u8> {
370 match b {
371 b'0'..=b'9' => Some(b - b'0'),
372 b'A'..=b'F' => Some(b - b'A' + 10),
373 b'a'..=b'f' => Some(b - b'a' + 10),
374 _ => None,
375 }
376 }
377
378 /// Extract a TYPE parameter value for labeling (e.g., "WORK", "HOME", "CELL").
379 fn extract_type_param(params: &[String]) -> String {
380 for p in params {
381 let upper = p.to_uppercase();
382 if upper.starts_with("TYPE=") {
383 // TYPE=WORK,VOICE → take the first meaningful one
384 let val = &p[5..];
385 return val
386 .split(',')
387 .find(|v| {
388 let u = v.to_uppercase();
389 u != "PREF" && u != "VOICE" && u != "INTERNET"
390 })
391 .unwrap_or(val.split(',').next().unwrap_or(""))
392 .to_string();
393 }
394 // Bare type params (vCard 2.1 style): e.g., just "WORK" or "CELL"
395 if matches!(
396 upper.as_str(),
397 "WORK" | "HOME" | "CELL" | "FAX" | "PAGER" | "MAIN" | "OTHER"
398 ) {
399 return p.clone();
400 }
401 }
402 String::new()
403 }
404
405 /// Check if params contain a specific bare value (case-insensitive).
406 fn params_contain(params: &[String], target: &str) -> bool {
407 params
408 .iter()
409 .any(|p| p.eq_ignore_ascii_case(target))
410 }
411
412 /// Check if params contain a KEY=VALUE where value includes target.
413 fn params_contain_key_value(params: &[String], key: &str, target: &str) -> bool {
414 let prefix = format!("{}=", key);
415 params.iter().any(|p| {
416 let upper = p.to_uppercase();
417 upper.starts_with(&prefix.to_uppercase())
418 && upper[prefix.len()..].split(',').any(|v| v == target.to_uppercase())
419 })
420 }
421
422 #[cfg(test)]
423 mod tests {
424 use super::*;
425
426 #[test]
427 fn test_parse_simple_vcard() {
428 let vcf = "\
429 BEGIN:VCARD\r\n\
430 VERSION:3.0\r\n\
431 FN:Jane Smith\r\n\
432 N:Smith;Jane;;;\r\n\
433 EMAIL;TYPE=WORK:jane@example.com\r\n\
434 TEL;TYPE=CELL:+1-555-0100\r\n\
435 ORG:Acme Corp\r\n\
436 TITLE:Engineer\r\n\
437 END:VCARD\r\n";
438
439 let cards = parse_vcf(vcf).unwrap();
440 assert_eq!(cards.len(), 1);
441
442 let c = &cards[0];
443 assert_eq!(c.display_name, "Jane Smith");
444 assert_eq!(c.company.as_deref(), Some("Acme Corp"));
445 assert_eq!(c.title.as_deref(), Some("Engineer"));
446 assert_eq!(c.emails.len(), 1);
447 assert_eq!(c.emails[0].address, "jane@example.com");
448 assert_eq!(c.emails[0].label, "WORK");
449 assert_eq!(c.phones.len(), 1);
450 assert_eq!(c.phones[0].number, "+1-555-0100");
451 assert_eq!(c.phones[0].label, "CELL");
452 }
453
454 #[test]
455 fn test_parse_multiple_vcards() {
456 let vcf = "\
457 BEGIN:VCARD\r\n\
458 VERSION:3.0\r\n\
459 FN:Alice\r\n\
460 END:VCARD\r\n\
461 BEGIN:VCARD\r\n\
462 VERSION:3.0\r\n\
463 FN:Bob\r\n\
464 END:VCARD\r\n";
465
466 let cards = parse_vcf(vcf).unwrap();
467 assert_eq!(cards.len(), 2);
468 assert_eq!(cards[0].display_name, "Alice");
469 assert_eq!(cards[1].display_name, "Bob");
470 }
471
472 #[test]
473 fn test_fallback_to_n_property() {
474 let vcf = "\
475 BEGIN:VCARD\r\n\
476 VERSION:3.0\r\n\
477 N:Doe;John;;;\r\n\
478 END:VCARD\r\n";
479
480 let cards = parse_vcf(vcf).unwrap();
481 assert_eq!(cards.len(), 1);
482 assert_eq!(cards[0].display_name, "John Doe");
483 }
484
485 #[test]
486 fn test_birthday_formats() {
487 let vcf = "\
488 BEGIN:VCARD\r\n\
489 VERSION:3.0\r\n\
490 FN:Test\r\n\
491 BDAY:19900115\r\n\
492 END:VCARD\r\n";
493
494 let cards = parse_vcf(vcf).unwrap();
495 assert_eq!(cards[0].birthday.as_deref(), Some("1990-01-15"));
496
497 let vcf2 = "\
498 BEGIN:VCARD\r\n\
499 VERSION:4.0\r\n\
500 FN:Test2\r\n\
501 BDAY:1990-01-15\r\n\
502 END:VCARD\r\n";
503
504 let cards2 = parse_vcf(vcf2).unwrap();
505 assert_eq!(cards2[0].birthday.as_deref(), Some("1990-01-15"));
506 }
507
508 #[test]
509 fn test_pref_email() {
510 let vcf = "\
511 BEGIN:VCARD\r\n\
512 VERSION:3.0\r\n\
513 FN:Test\r\n\
514 EMAIL;TYPE=WORK:work@example.com\r\n\
515 EMAIL;TYPE=HOME,PREF:home@example.com\r\n\
516 END:VCARD\r\n";
517
518 let cards = parse_vcf(vcf).unwrap();
519 assert!(!cards[0].emails[0].is_primary);
520 assert!(cards[0].emails[1].is_primary);
521 }
522
523 #[test]
524 fn test_categories() {
525 let vcf = "\
526 BEGIN:VCARD\r\n\
527 VERSION:3.0\r\n\
528 FN:Test\r\n\
529 CATEGORIES:Friend,Coworker\r\n\
530 END:VCARD\r\n";
531
532 let cards = parse_vcf(vcf).unwrap();
533 assert_eq!(cards[0].tags, vec!["Friend", "Coworker"]);
534 }
535
536 #[test]
537 fn test_line_folding() {
538 // In vCard, line folding splits content and prepends a single space/tab to continuation.
539 // The fold indicator (leading space) is stripped; the space in "continues " is content.
540 let vcf = "BEGIN:VCARD\r\nVERSION:3.0\r\nFN:Test\r\nNOTE:This is a long note that continues \r\n on the next line\r\nEND:VCARD\r\n";
541
542 let cards = parse_vcf(vcf).unwrap();
543 assert_eq!(
544 cards[0].notes.as_deref(),
545 Some("This is a long note that continues on the next line")
546 );
547 }
548
549 #[test]
550 fn test_url_as_custom_field() {
551 let vcf = "\
552 BEGIN:VCARD\r\n\
553 VERSION:3.0\r\n\
554 FN:Test\r\n\
555 URL:https://example.com\r\n\
556 END:VCARD\r\n";
557
558 let cards = parse_vcf(vcf).unwrap();
559 assert_eq!(cards[0].custom_fields.len(), 1);
560 assert_eq!(cards[0].custom_fields[0].label, "Website");
561 assert_eq!(cards[0].custom_fields[0].value, "https://example.com");
562 }
563
564 #[test]
565 fn test_skip_empty_name() {
566 let vcf = "\
567 BEGIN:VCARD\r\n\
568 VERSION:3.0\r\n\
569 EMAIL:orphan@example.com\r\n\
570 END:VCARD\r\n";
571
572 let cards = parse_vcf(vcf).unwrap();
573 assert_eq!(cards.len(), 0);
574 }
575
576 #[test]
577 fn test_escaped_characters() {
578 let vcf = "\
579 BEGIN:VCARD\r\n\
580 VERSION:3.0\r\n\
581 FN:Test\r\n\
582 NOTE:Line 1\\nLine 2\\, with comma\r\n\
583 END:VCARD\r\n";
584
585 let cards = parse_vcf(vcf).unwrap();
586 assert_eq!(
587 cards[0].notes.as_deref(),
588 Some("Line 1\nLine 2, with comma")
589 );
590 }
591 }
592