]) -> String {
if headers.is_empty() {
return String::new();
}
let col_count = headers.len();
let mut out = String::new();
// Header row
out.push('|');
for h in headers {
out.push(' ');
out.push_str(h);
out.push_str(" |");
}
out.push('\n');
// Separator row
out.push('|');
for _ in 0..col_count {
out.push_str(" --- |");
}
out.push('\n');
// Data rows
for row in rows {
out.push('|');
for i in 0..col_count {
out.push(' ');
if let Some(cell) = row.get(i) {
out.push_str(cell);
}
out.push_str(" |");
}
out.push('\n');
}
// Remove trailing newline (caller handles spacing)
out.trim_end().to_string()
}
#[cfg(test)]
mod tests {
use super::*;
use scraper::{Html, Selector};
fn parse_table(html: &str) -> Html {
Html::parse_document(html)
}
fn select_table(doc: &Html) -> ElementRef<'_> {
let sel = Selector::parse("table").unwrap();
doc.select(&sel).next().unwrap()
}
#[test]
fn single_cell_is_layout() {
let doc = parse_table("");
assert!(!is_data_table(select_table(&doc)));
}
#[test]
fn table_with_th_is_data() {
let doc = parse_table(
"",
);
assert!(is_data_table(select_table(&doc)));
}
#[test]
fn table_with_caption_is_data() {
let doc = parse_table(
"",
);
assert!(is_data_table(select_table(&doc)));
}
#[test]
fn role_presentation_is_layout() {
let doc = parse_table(
r#""#,
);
assert!(!is_data_table(select_table(&doc)));
}
#[test]
fn role_grid_is_data() {
let doc =
parse_table(r#""#);
assert!(is_data_table(select_table(&doc)));
}
#[test]
fn multi_row_multi_cell_is_data() {
let doc = parse_table(
"\
| Alice | Engineer |
\
| Bob | Designer |
\
",
);
assert!(is_data_table(select_table(&doc)));
}
#[test]
fn spacer_cells_not_substantive() {
let doc = parse_table(
"",
);
// Only one substantive cell per row
assert!(!is_data_table(select_table(&doc)));
}
#[test]
fn render_simple_table() {
let headers = vec!["Name".into(), "Age".into()];
let rows = vec![
vec!["Alice".into(), "30".into()],
vec!["Bob".into(), "25".into()],
];
let md = render_markdown_table(&headers, &rows);
assert_eq!(
md,
"| Name | Age |\n| --- | --- |\n| Alice | 30 |\n| Bob | 25 |"
);
}
#[test]
fn render_empty_headers() {
let md = render_markdown_table(&[], &[]);
assert_eq!(md, "");
}
#[test]
fn extract_with_thead() {
let doc = parse_table(
"",
);
let (h, r) = extract_table_data(select_table(&doc));
assert_eq!(h, vec!["A", "B"]);
assert_eq!(r, vec![vec!["1".to_string(), "2".to_string()]]);
}
#[test]
fn extract_promotes_first_row() {
let doc = parse_table(
"",
);
let (h, r) = extract_table_data(select_table(&doc));
assert_eq!(h, vec!["Name", "Val"]);
assert_eq!(r, vec![vec!["X".to_string(), "Y".to_string()]]);
}
// -- Boundary tests for is_data_table role handling --
#[test]
fn role_none_is_layout() {
// role="none" → explicit layout signal. Catches L22 `||` mutation
// (presentation OR none); without the ||, "none" wouldn't short-circuit.
let doc = parse_table(
r#""#,
);
// Even with , the explicit role="none" should win.
assert!(!is_data_table(select_table(&doc)));
}
#[test]
fn role_table_is_data() {
// role="table" → data. Catches L22 == "grid" mutating to != (which would
// make grid not match) AND covers the parallel `|| role == "table"` arm.
let doc =
parse_table(r#""#);
assert!(is_data_table(select_table(&doc)));
}
#[test]
fn role_unknown_falls_through_to_structural() {
// Unknown role → no early decision; structural rules apply.
// Single-cell single-row layout table → not data.
let doc =
parse_table(r#""#);
assert!(!is_data_table(select_table(&doc)));
}
#[test]
fn role_presentation_overrides_structure() {
// role="presentation" → layout, even with multiple substantive rows.
// Catches L22 == "presentation" mutating to != (which would skip this check).
let doc = parse_table(
r#"| Alice | Engineer | \
| Bob | Designer | "#,
);
assert!(!is_data_table(select_table(&doc)));
}
// -- Boundary tests for has_substantive_text > 1 --
#[test]
fn single_char_cells_not_substantive() {
// Two rows of single-char cells → not substantive → not a data table.
// Catches L66 `>` mutating to `>=`: with >=, single chars become substantive
// and these two rows would qualify as a data table.
let doc = parse_table(
"",
);
assert!(!is_data_table(select_table(&doc)));
}
#[test]
fn two_char_cells_are_substantive() {
let doc = parse_table(
"",
);
assert!(is_data_table(select_table(&doc)));
}
// -- Boundary tests for extract_table_data tbody handling --
#[test]
fn extract_with_tbody_no_thead() {
// Catches L87 `== "tbody"` mutating to != (which would skip tbody).
let doc = parse_table(
"",
);
let (h, r) = extract_table_data(select_table(&doc));
// First tbody row promoted to headers; second row is data.
assert_eq!(h, vec!["Name", "Val"]);
assert_eq!(r, vec![vec!["X".to_string(), "Y".to_string()]]);
}
// -- Boundary tests for the headers-vs-th-row decision (L104 &&) --
#[test]
fn thead_present_blocks_later_th_row_promotion() {
// Headers already set by thead. A later th-row should NOT overwrite them.
// Catches L104 `&&` mutating to `||`: with ||, has_th_cells alone would
// re-promote, clobbering the thead headers.
let doc = parse_table(
"",
);
let (h, r) = extract_table_data(select_table(&doc));
assert_eq!(h, vec!["A", "B"], "thead headers must not be overwritten");
// Both the th-row and the td-row become data rows.
assert_eq!(r.len(), 2);
}
#[test]
fn no_thead_th_row_promotes_to_headers() {
// No thead, but a tr full of th cells → that tr's cells become headers.
// Catches `has_th_cells -> bool` always-false mutation (which would
// make this row become a data row instead).
let doc = parse_table(
"",
);
let (h, r) = extract_table_data(select_table(&doc));
assert_eq!(h, vec!["X", "Y"]);
assert_eq!(r, vec![vec!["1".to_string(), "2".to_string()]]);
}
#[test]
fn all_td_rows_promote_first_to_headers() {
// No th anywhere → has_th_cells is false for every row → first row promoted
// by the `if headers.is_empty() && !rows.is_empty()` fallback.
// Catches `has_th_cells -> bool` always-true mutation (which would promote
// every row as headers, leaving rows empty after the first).
let doc = parse_table(
"",
);
let (h, r) = extract_table_data(select_table(&doc));
assert_eq!(h, vec!["Name", "Val"]);
assert_eq!(r.len(), 2);
}
// -- Boundary test for has_th_cells (L139 == "th") --
#[test]
fn td_only_row_is_not_a_header_row() {
// A tr with only | cells should NOT promote to headers when other
// rows exist. Catches L139 `== "th"` mutating to `!=` (which would
// match td cells and incorrectly treat every td row as a header row).
let doc = parse_table(
"| data-1 | data-2 | \
| data-3 | data-4 | \
| data-5 | data-6 | ",
);
let (h, r) = extract_table_data(select_table(&doc));
// First row is promoted (via the fallback at the end), leaving exactly two data rows.
assert_eq!(h, vec!["data-1", "data-2"]);
assert_eq!(r.len(), 2, "remaining rows should be data, not headers");
}
}
|