use std::collections::HashMap;
use std::path::Path;
use std::sync::LazyLock;

use regex::Regex;

static LINK_RE: LazyLock<Regex> = LazyLock::new(|| {
    Regex::new(r"\[([^\]]+)\]\(([^)]+)\)").expect("valid regex")
});

/// Configuration for the doc loader.
pub struct DocLoaderConfig {
    /// Sections as `(directory_name, display_name)` pairs in display order.
    pub sections: Vec<(String, String)>,
    /// URL prefix for rewritten links (e.g., "/docs").
    pub link_prefix: String,
    /// Pattern that identifies unpublished links to strip (e.g., "unpublished/").
    pub unpublished_pattern: Option<String>,
    /// Path to directory containing UI example `.html` fragments.
    /// If set, `[!UI] name` directives are resolved by loading `{examples_path}/{name}.html`.
    pub examples_path: Option<std::path::PathBuf>,
    /// Optional pre-processor applied to raw markdown before link rewriting.
    /// On `Err`, the page is skipped with a warning. Use to wire
    /// [`crate::Assumptions::substitute`] or a similar transform.
    pub pre_process: Option<Box<dyn Fn(&str) -> Result<String, String> + Send + Sync>>,
}

/// A rendered documentation page.
#[derive(Clone, Debug)]
pub struct DocPage {
    pub title: String,
    pub slug: String,
    pub section: String,
    pub html_content: String,
}

/// Ordered entry for the docs index page.
#[derive(Clone, Debug)]
pub struct DocIndexEntry {
    pub title: String,
    pub slug: String,
    pub section: String,
}

/// Entry in the full-text search index, serialised to JSON for client-side search.
#[derive(Clone, Debug, serde::Serialize)]
pub struct DocSearchEntry {
    pub slug: String,
    pub title: String,
    pub section: String,
    pub body_text: String,
}

/// In-memory store of rendered documentation pages, built once at startup.
#[derive(Clone, Debug)]
pub struct DocLoader {
    pages: HashMap<String, DocPage>,
    index: Vec<DocIndexEntry>,
}

impl DocLoader {
    /// Load all `.md` files from `base_path`, rendering them into HTML.
    ///
    /// Expects subdirectories matching the configured sections.
    pub fn load(base_path: &Path, config: &DocLoaderConfig) -> Self {
        let mut pages = HashMap::new();
        let mut index = Vec::new();

        for (dir_name, section_display) in &config.sections {
            let section_path = base_path.join(dir_name);
            if !section_path.is_dir() {
                continue;
            }

            let read_dir = match std::fs::read_dir(&section_path) {
                Ok(rd) => rd,
                Err(e) => {
                    tracing::warn!(path = %section_path.display(), error = %e, "Failed to read docs section directory");
                    continue;
                }
            };

            let mut entries: Vec<_> = read_dir
                .filter_map(|e| e.ok())
                .filter(|e| {
                    e.path()
                        .extension()
                        .map(|ext| ext == "md")
                        .unwrap_or(false)
                })
                .collect();

            entries.sort_by_key(|e| e.file_name());

            for entry in entries {
                let path = entry.path();
                let slug = path
                    .file_stem()
                    .and_then(|s| s.to_str())
                    .unwrap_or_default()
                    .to_string();

                let raw_md = match std::fs::read_to_string(&path) {
                    Ok(content) => content,
                    Err(_) => continue,
                };

                let raw_md = match &config.pre_process {
                    Some(pp) => match pp(&raw_md) {
                        Ok(md) => md,
                        Err(e) => {
                            tracing::warn!(
                                path = %path.display(),
                                error = %e,
                                "pre_process failed; skipping page"
                            );
                            continue;
                        }
                    },
                    None => raw_md,
                };

                let title =
                    crate::text::extract_title(&raw_md).unwrap_or_else(|| slug.clone());
                let rewritten_md = rewrite_links(
                    &raw_md,
                    &config.link_prefix,
                    config.unpublished_pattern.as_deref(),
                );
                let md_without_title = crate::text::strip_first_heading(&rewritten_md);
                let html_content = crate::render_permissive(&md_without_title);
                #[cfg(feature = "directives")]
                let html_content = crate::directives::post_process_directives(&html_content);
                let html_content = resolve_ui_examples(&html_content, config.examples_path.as_deref());

                let page = DocPage {
                    title,
                    slug,
                    section: section_display.clone(),
                    html_content,
                };

                index.push(DocIndexEntry {
                    title: page.title.clone(),
                    slug: page.slug.clone(),
                    section: page.section.clone(),
                });

                let slug_key = page.slug.clone();
                pages.insert(slug_key, page);
            }
        }

        DocLoader { pages, index }
    }

    /// Look up a rendered page by slug.
    pub fn get(&self, slug: &str) -> Option<&DocPage> {
        self.pages.get(slug)
    }

    /// Get the full ordered index.
    pub fn index(&self) -> &[DocIndexEntry] {
        &self.index
    }

    /// Build a search index with HTML stripped to plain text.
    pub fn search_index(&self) -> Vec<DocSearchEntry> {
        self.index
            .iter()
            .filter_map(|entry| {
                let page = self.pages.get(&entry.slug)?;
                Some(DocSearchEntry {
                    slug: entry.slug.clone(),
                    title: entry.title.clone(),
                    section: entry.section.clone(),
                    body_text: strip_html_tags(&page.html_content),
                })
            })
            .collect()
    }
}

/// Replace `<div class="doc-ui-frame" data-ui="name"></div>` placeholders with
/// the contents of `{examples_path}/{name}.html`.
///
/// If no examples path is configured or a file is missing, the placeholder is
/// replaced with a fallback message.
fn resolve_ui_examples(html: &str, examples_path: Option<&Path>) -> String {
    static UI_PLACEHOLDER: LazyLock<Regex> = LazyLock::new(|| {
        Regex::new(r#"<div class="doc-ui-frame" data-ui="([a-z0-9_-]+)"></div>"#)
            .expect("valid UI placeholder regex")
    });

    if !html.contains("doc-ui-frame") {
        return html.to_string();
    }

    UI_PLACEHOLDER.replace_all(html, |caps: &regex::Captures| {
        let name = &caps[1];
        match examples_path {
            Some(dir) => {
                let file = dir.join(format!("{name}.html"));
                match std::fs::read_to_string(&file) {
                    Ok(content) => format!(
                        "<div class=\"doc-ui-frame\">{content}</div>"
                    ),
                    Err(_) => {
                        tracing::warn!(example = name, "UI example file not found");
                        format!(
                            "<div class=\"doc-ui-frame doc-ui-missing\">[UI example: {name}]</div>"
                        )
                    }
                }
            }
            None => format!(
                "<div class=\"doc-ui-frame doc-ui-missing\">[UI example: {name}]</div>"
            ),
        }
    }).into_owned()
}

/// Strip HTML tags from a string, returning plain text.
/// Decodes common HTML entities so search indexes match plain-text queries.
fn strip_html_tags(html: &str) -> String {
    let mut out = String::with_capacity(html.len());
    let mut in_tag = false;
    for ch in html.chars() {
        match ch {
            '<' => in_tag = true,
            '>' => {
                in_tag = false;
                // Add a space after closing tags to separate words.
                if !out.ends_with(' ') {
                    out.push(' ');
                }
            }
            _ if !in_tag => out.push(ch),
            _ => {}
        }
    }
    // Collapse runs of whitespace.
    let collapsed: String = out.split_whitespace().collect::<Vec<_>>().join(" ");
    // Decode common HTML entities for search index accuracy.
    collapsed
        .replace("&amp;", "&")
        .replace("&lt;", "<")
        .replace("&gt;", ">")
        .replace("&quot;", "\"")
        .replace("&#x27;", "'")
        .replace("&#39;", "'")
}

/// Rewrite relative `.md` links to the configured prefix.
fn rewrite_links(markdown: &str, link_prefix: &str, unpublished_pattern: Option<&str>) -> String {
    LINK_RE
        .replace_all(markdown, |caps: &regex::Captures| {
            let text = &caps[1];
            let url = &caps[2];

            // Preserve absolute URLs, mailto, and internal routes.
            if url.starts_with("http://")
                || url.starts_with("https://")
                || url.starts_with("mailto:")
                || url.starts_with('/')
            {
                return caps[0].to_string();
            }

            // Unpublished docs: strip link, keep text.
            if let Some(pattern) = unpublished_pattern {
                if url.contains(pattern) {
                    return text.to_string();
                }
            }

            // Only rewrite links containing .md
            if !url.contains(".md") {
                return caps[0].to_string();
            }

            // Split off any #anchor.
            let (path_part, anchor): (&str, Option<&str>) = match url.split_once('#') {
                Some((p, a)) => (p, Some(a)),
                None => (url, None),
            };

            // Extract slug from filename: ../support/faq.md -> faq
            let filename = path_part
                .rsplit('/')
                .next()
                .unwrap_or(path_part)
                .trim_end_matches(".md");

            let mut new_url = format!("{link_prefix}/{filename}");
            if let Some(anchor) = anchor {
                new_url.push('#');
                new_url.push_str(anchor);
            }

            format!("[{text}]({new_url})")
        })
        .to_string()
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn rewrite_same_section_link() {
        let md = "See [SLA](./guarantees.md) for details.";
        let result = rewrite_links(md, "/docs", Some("unpublished/"));
        assert_eq!(result, "See [SLA](/docs/guarantees) for details.");
    }

    #[test]
    fn rewrite_cross_section_link() {
        let md = "Check [FAQ](../support/faq.md) for more.";
        let result = rewrite_links(md, "/docs", Some("unpublished/"));
        assert_eq!(result, "Check [FAQ](/docs/faq) for more.");
    }

    #[test]
    fn rewrite_unpublished_link_becomes_plain_text() {
        let md = "See [Content Moderation](../../unpublished/legal/moderation.md) for details.";
        let result = rewrite_links(md, "/docs", Some("unpublished/"));
        assert_eq!(result, "See Content Moderation for details.");
    }

    #[test]
    fn rewrite_preserves_absolute_urls() {
        let md = "Visit [our site](https://example.com) today.";
        let result = rewrite_links(md, "/docs", Some("unpublished/"));
        assert_eq!(result, md);
    }

    #[test]
    fn rewrite_preserves_plain_http_urls() {
        // Distinct from https — catches the `url.starts_with("http://")` arm
        // mutation (L244 `||` → `&&`). Without this case, the only protocol
        // tested is https, leaving the http arm uncovered.
        let md = "Visit [legacy](http://example.com) today.";
        let result = rewrite_links(md, "/docs", Some("unpublished/"));
        assert_eq!(result, md);
    }

    #[test]
    fn rewrite_preserves_external_md_links() {
        // Absolute URLs that happen to end in .md must NOT be rewritten.
        // This catches the L244 `||` → `&&` mutation: under the mutant, the
        // early-return short-circuit fails (since one URL can't both start
        // with "http://" AND "https://"), so the URL falls through to the
        // .md-rewrite path and gets incorrectly mangled.
        let md_http = "See [external](http://example.com/foo.md).";
        assert_eq!(
            rewrite_links(md_http, "/docs", Some("unpublished/")),
            md_http,
            "http:// + .md must be preserved"
        );
        let md_https = "See [external](https://example.com/foo.md).";
        assert_eq!(
            rewrite_links(md_https, "/docs", Some("unpublished/")),
            md_https,
            "https:// + .md must be preserved"
        );
        let md_mailto = "Email [us](mailto:a@b.md).";
        assert_eq!(
            rewrite_links(md_mailto, "/docs", Some("unpublished/")),
            md_mailto,
            "mailto: + .md must be preserved"
        );
    }

    #[test]
    fn rewrite_preserves_mailto() {
        let md = "Email [us](mailto:test@example.com)";
        let result = rewrite_links(md, "/docs", Some("unpublished/"));
        assert_eq!(result, md);
    }

    #[test]
    fn rewrite_preserves_internal_routes() {
        let md = "Go to [pricing](/pricing) page.";
        let result = rewrite_links(md, "/docs", Some("unpublished/"));
        assert_eq!(result, md);
    }

    #[test]
    fn rewrite_link_with_anchor() {
        let md = "See [section](./faq.md#billing).";
        let result = rewrite_links(md, "/docs", Some("unpublished/"));
        assert_eq!(result, "See [section](/docs/faq#billing).");
    }

    #[test]
    fn rewrite_public_cross_ref() {
        let md = "See [Acceptable Use](../../public/legal/acceptable-use.md).";
        let result = rewrite_links(md, "/docs", Some("unpublished/"));
        assert_eq!(result, "See [Acceptable Use](/docs/acceptable-use).");
    }

    #[test]
    fn rewrite_custom_prefix() {
        let md = "See [FAQ](./faq.md) here.";
        let result = rewrite_links(md, "/help", None);
        assert_eq!(result, "See [FAQ](/help/faq) here.");
    }

    #[test]
    fn rewrite_no_unpublished_pattern() {
        let md = "See [doc](../../unpublished/foo.md).";
        let result = rewrite_links(md, "/docs", None);
        // Without the pattern, it just rewrites normally
        assert_eq!(result, "See [doc](/docs/foo).");
    }

    #[test]
    fn rewrite_non_md_link_preserved() {
        let md = "See [image](./photo.png) here.";
        let result = rewrite_links(md, "/docs", None);
        assert_eq!(result, md);
    }

    #[test]
    fn strip_html_tags_removes_tags() {
        let html = "<p>Hello <strong>world</strong></p>";
        assert_eq!(strip_html_tags(html), "Hello world");
    }

    #[test]
    fn strip_html_tags_empty_input() {
        assert_eq!(strip_html_tags(""), "");
    }

    #[test]
    fn strip_html_tags_decodes_entities() {
        let html = "<p>Price: $10 &amp; free</p>";
        assert_eq!(strip_html_tags(html), "Price: $10 & free");

        let html2 = "<p>a &lt; b &gt; c</p>";
        assert_eq!(strip_html_tags(html2), "a < b > c");

        let html3 = "<p>&quot;hello&quot; &amp; &#x27;world&#39;</p>";
        assert_eq!(strip_html_tags(html3), "\"hello\" & 'world'");
    }

    #[test]
    fn strip_html_tags_nested_tags() {
        let html = "<div><p>A <em>nested <strong>deep</strong></em> tag</p></div>";
        assert_eq!(strip_html_tags(html), "A nested deep tag");
    }

    // ── DocLoader::load / get / index / search_index (tempdir fixtures) ──

    fn config_for(base: &Path) -> DocLoaderConfig {
        // Sections listed in display order: "guide" first, then "support".
        let _ = base; // base path lives at the call site; config doesn't need it.
        DocLoaderConfig {
            sections: vec![
                ("guide".to_string(), "Guide".to_string()),
                ("support".to_string(), "Support".to_string()),
            ],
            link_prefix: "/docs".to_string(),
            unpublished_pattern: Some("unpublished/".to_string()),
            examples_path: None,
            pre_process: None,
        }
    }

    #[test]
    fn pre_process_hook_transforms_markdown_before_render() {
        let tmp = tempfile::tempdir().unwrap();
        let base = tmp.path();
        let p = base.join("guide");
        std::fs::create_dir_all(&p).unwrap();
        std::fs::write(p.join("a.md"), "# Hi\n\nValue: TOKEN").unwrap();

        let mut config = config_for(base);
        config.pre_process = Some(Box::new(|md: &str| {
            Ok(md.replace("TOKEN", "42"))
        }));
        let loader = DocLoader::load(base, &config);
        let page = loader.get("a").expect("page loaded");
        assert!(page.html_content.contains("42"), "got: {}", page.html_content);
        assert!(!page.html_content.contains("TOKEN"), "got: {}", page.html_content);
    }

    #[test]
    fn pre_process_hook_error_skips_page() {
        let tmp = tempfile::tempdir().unwrap();
        let base = tmp.path();
        let p = base.join("guide");
        std::fs::create_dir_all(&p).unwrap();
        std::fs::write(p.join("good.md"), "# Good").unwrap();
        std::fs::write(p.join("bad.md"), "# Bad\n\n{{ missing }}").unwrap();

        let mut config = config_for(base);
        config.pre_process = Some(Box::new(|md: &str| {
            if md.contains("{{") {
                Err("unresolved placeholder".into())
            } else {
                Ok(md.to_string())
            }
        }));
        let loader = DocLoader::load(base, &config);
        assert!(loader.get("good").is_some(), "good page should load");
        assert!(loader.get("bad").is_none(), "bad page should be skipped");
    }

    fn write(base: &Path, rel: &str, content: &str) {
        let p = base.join(rel);
        std::fs::create_dir_all(p.parent().unwrap()).unwrap();
        std::fs::write(p, content).unwrap();
    }

    #[test]
    fn load_indexes_pages_across_sections_in_display_order() {
        let tmp = tempfile::tempdir().unwrap();
        let base = tmp.path();
        // Guide section: two pages, with the file order intentionally reversed
        // from desired sort order to confirm `entries.sort_by_key(file_name)`.
        write(base, "guide/zzz-last.md", "# Z Page\n\nzzz body");
        write(base, "guide/aaa-first.md", "# A Page\n\naaa body");
        // Support section: one page.
        write(base, "support/faq.md", "# FAQ\n\nfaq body");

        let loader = DocLoader::load(base, &config_for(base));
        let idx = loader.index();
        assert_eq!(idx.len(), 3, "expected 3 indexed pages, got: {idx:?}");

        // Sections appear in config order; entries within a section in
        // sort_by_key(file_name) order.
        assert_eq!(idx[0].slug, "aaa-first");
        assert_eq!(idx[0].section, "Guide");
        assert_eq!(idx[1].slug, "zzz-last");
        assert_eq!(idx[1].section, "Guide");
        assert_eq!(idx[2].slug, "faq");
        assert_eq!(idx[2].section, "Support");
    }

    #[test]
    fn load_extracts_title_from_first_heading_and_strips_it_from_body() {
        let tmp = tempfile::tempdir().unwrap();
        let base = tmp.path();
        write(base, "guide/welcome.md", "# Welcome Title\n\nBody paragraph here.");

        let loader = DocLoader::load(base, &config_for(base));
        let page = loader.get("welcome").expect("page should be indexed");
        assert_eq!(page.title, "Welcome Title");
        // The H1 itself must be stripped from html_content — only body remains.
        assert!(!page.html_content.contains("Welcome Title"),
            "title leaked into body: {}", page.html_content);
        assert!(page.html_content.contains("Body paragraph here"));
    }

    #[test]
    fn load_falls_back_to_slug_when_no_title_heading() {
        let tmp = tempfile::tempdir().unwrap();
        let base = tmp.path();
        write(base, "guide/no-title.md", "Body without any heading.");

        let loader = DocLoader::load(base, &config_for(base));
        let page = loader.get("no-title").unwrap();
        assert_eq!(page.title, "no-title");
    }

    #[test]
    fn load_skips_non_markdown_files() {
        let tmp = tempfile::tempdir().unwrap();
        let base = tmp.path();
        write(base, "guide/keep.md", "# Keep\n\nbody");
        write(base, "guide/ignore.txt", "should not be indexed");
        write(base, "guide/also-ignore.json", "{}");

        let loader = DocLoader::load(base, &config_for(base));
        assert_eq!(loader.index().len(), 1);
        assert_eq!(loader.index()[0].slug, "keep");
    }

    #[test]
    fn load_skips_missing_section_directories() {
        let tmp = tempfile::tempdir().unwrap();
        let base = tmp.path();
        // Only `guide` exists; `support` is missing entirely.
        write(base, "guide/page.md", "# Page\n\nbody");

        let loader = DocLoader::load(base, &config_for(base));
        // Should index the one page that exists, not panic on the missing dir.
        assert_eq!(loader.index().len(), 1);
        assert_eq!(loader.index()[0].slug, "page");
    }

    #[test]
    fn load_rewrites_relative_md_links() {
        let tmp = tempfile::tempdir().unwrap();
        let base = tmp.path();
        write(
            base,
            "guide/main.md",
            "# Main\n\nSee [FAQ](../support/faq.md) for help.",
        );
        write(base, "support/faq.md", "# FAQ\n\nFAQ body.");

        let loader = DocLoader::load(base, &config_for(base));
        let main = loader.get("main").unwrap();
        // The .md link must be rewritten to /docs/<slug>; the original
        // `../support/faq.md` path must not appear.
        assert!(main.html_content.contains("/docs/faq"),
            "link not rewritten: {}", main.html_content);
        assert!(!main.html_content.contains("faq.md"),
            "raw .md path leaked: {}", main.html_content);
    }

    #[test]
    fn get_returns_none_for_unknown_slug() {
        let tmp = tempfile::tempdir().unwrap();
        let base = tmp.path();
        write(base, "guide/exists.md", "# Exists\n\nbody");

        let loader = DocLoader::load(base, &config_for(base));
        assert!(loader.get("nope").is_none());
        assert!(loader.get("exists").is_some());
    }

    #[test]
    fn search_index_strips_html_and_preserves_metadata() {
        let tmp = tempfile::tempdir().unwrap();
        let base = tmp.path();
        write(
            base,
            "guide/with-html.md",
            "# Title\n\nA **bold** word and an `inline code` token.",
        );

        let loader = DocLoader::load(base, &config_for(base));
        let entries = loader.search_index();
        assert_eq!(entries.len(), 1);
        let e = &entries[0];
        assert_eq!(e.slug, "with-html");
        assert_eq!(e.title, "Title");
        assert_eq!(e.section, "Guide");
        // body_text must be plain text — no surviving tags.
        assert!(!e.body_text.contains('<'), "tag leaked into search: {}", e.body_text);
        assert!(e.body_text.contains("bold"));
        assert!(e.body_text.contains("inline code"));
    }

    // ── resolve_ui_examples ──

    #[test]
    fn resolve_ui_examples_inlines_file_contents_when_present() {
        let tmp = tempfile::tempdir().unwrap();
        let dir = tmp.path();
        std::fs::write(dir.join("cart.html"), "<button>Buy</button>").unwrap();

        let html = r#"<div class="doc-ui-frame" data-ui="cart"></div>"#;
        let result = resolve_ui_examples(html, Some(dir));
        assert!(result.contains("<button>Buy</button>"));
        // The data-ui attribute is consumed during inlining.
        assert!(!result.contains(r#"data-ui="cart""#));
    }

    #[test]
    fn resolve_ui_examples_falls_back_when_file_missing() {
        let tmp = tempfile::tempdir().unwrap();
        let dir = tmp.path();
        // examples_path exists but file does not.
        let html = r#"<div class="doc-ui-frame" data-ui="ghost"></div>"#;
        let result = resolve_ui_examples(html, Some(dir));
        assert!(result.contains("doc-ui-missing"));
        assert!(result.contains("[UI example: ghost]"));
    }

    #[test]
    fn resolve_ui_examples_falls_back_when_no_examples_path() {
        // Pins the `None` arm of the `match examples_path`.
        let html = r#"<div class="doc-ui-frame" data-ui="anything"></div>"#;
        let result = resolve_ui_examples(html, None);
        assert!(result.contains("doc-ui-missing"));
        assert!(result.contains("[UI example: anything]"));
    }

    #[test]
    fn resolve_ui_examples_short_circuits_when_no_placeholder() {
        // Pins the `if !html.contains("doc-ui-frame") { return html.to_string(); }` early return.
        let html = "<p>Just regular HTML, no placeholders.</p>";
        let result = resolve_ui_examples(html, None);
        assert_eq!(result, html);
    }
}