use std::collections::HashMap;
use std::path::Path;
use std::sync::LazyLock;

use regex::Regex;

static LINK_RE: LazyLock<Regex> = LazyLock::new(|| {
    Regex::new(r"\[([^\]]+)\]\(([^)]+)\)").expect("valid regex")
});

/// Configuration for the doc loader.
pub struct DocLoaderConfig {
    /// Sections as `(directory_name, display_name)` pairs in display order.
    pub sections: Vec<(String, String)>,
    /// URL prefix for rewritten links (e.g., "/docs").
    pub link_prefix: String,
    /// Pattern that identifies unpublished links to strip (e.g., "unpublished/").
    pub unpublished_pattern: Option<String>,
}

/// A rendered documentation page.
#[derive(Clone, Debug)]
pub struct DocPage {
    pub title: String,
    pub slug: String,
    pub section: String,
    pub html_content: String,
}

/// Ordered entry for the docs index page.
#[derive(Clone, Debug)]
pub struct DocIndexEntry {
    pub title: String,
    pub slug: String,
    pub section: String,
}

/// Entry in the full-text search index, serialised to JSON for client-side search.
#[derive(Clone, Debug, serde::Serialize)]
pub struct DocSearchEntry {
    pub slug: String,
    pub title: String,
    pub section: String,
    pub body_text: String,
}

/// In-memory store of rendered documentation pages, built once at startup.
#[derive(Clone, Debug)]
pub struct DocLoader {
    pages: HashMap<String, DocPage>,
    index: Vec<DocIndexEntry>,
}

impl DocLoader {
    /// Load all `.md` files from `base_path`, rendering them into HTML.
    ///
    /// Expects subdirectories matching the configured sections.
    pub fn load(base_path: &Path, config: &DocLoaderConfig) -> Self {
        let mut pages = HashMap::new();
        let mut index = Vec::new();

        for (dir_name, section_display) in &config.sections {
            let section_path = base_path.join(dir_name);
            if !section_path.is_dir() {
                continue;
            }

            let read_dir = match std::fs::read_dir(&section_path) {
                Ok(rd) => rd,
                Err(e) => {
                    tracing::warn!(path = %section_path.display(), error = %e, "Failed to read docs section directory");
                    continue;
                }
            };

            let mut entries: Vec<_> = read_dir
                .filter_map(|e| e.ok())
                .filter(|e| {
                    e.path()
                        .extension()
                        .map(|ext| ext == "md")
                        .unwrap_or(false)
                })
                .collect();

            entries.sort_by_key(|e| e.file_name());

            for entry in entries {
                let path = entry.path();
                let slug = path
                    .file_stem()
                    .and_then(|s| s.to_str())
                    .unwrap_or_default()
                    .to_string();

                let raw_md = match std::fs::read_to_string(&path) {
                    Ok(content) => content,
                    Err(_) => continue,
                };

                let title =
                    crate::text::extract_title(&raw_md).unwrap_or_else(|| slug.clone());
                let rewritten_md = rewrite_links(
                    &raw_md,
                    &config.link_prefix,
                    config.unpublished_pattern.as_deref(),
                );
                let md_without_title = crate::text::strip_first_heading(&rewritten_md);
                let html_content = crate::render_permissive(&md_without_title);
                #[cfg(feature = "directives")]
                let html_content = crate::directives::post_process_directives(&html_content);

                let page = DocPage {
                    title,
                    slug,
                    section: section_display.clone(),
                    html_content,
                };

                index.push(DocIndexEntry {
                    title: page.title.clone(),
                    slug: page.slug.clone(),
                    section: page.section.clone(),
                });

                let slug_key = page.slug.clone();
                pages.insert(slug_key, page);
            }
        }

        DocLoader { pages, index }
    }

    /// Look up a rendered page by slug.
    pub fn get(&self, slug: &str) -> Option<&DocPage> {
        self.pages.get(slug)
    }

    /// Get the full ordered index.
    pub fn index(&self) -> &[DocIndexEntry] {
        &self.index
    }

    /// Build a search index with HTML stripped to plain text.
    pub fn search_index(&self) -> Vec<DocSearchEntry> {
        self.index
            .iter()
            .filter_map(|entry| {
                let page = self.pages.get(&entry.slug)?;
                Some(DocSearchEntry {
                    slug: entry.slug.clone(),
                    title: entry.title.clone(),
                    section: entry.section.clone(),
                    body_text: strip_html_tags(&page.html_content),
                })
            })
            .collect()
    }
}

/// Strip HTML tags from a string, returning plain text.
/// Decodes common HTML entities so search indexes match plain-text queries.
fn strip_html_tags(html: &str) -> String {
    let mut out = String::with_capacity(html.len());
    let mut in_tag = false;
    for ch in html.chars() {
        match ch {
            '<' => in_tag = true,
            '>' => {
                in_tag = false;
                // Add a space after closing tags to separate words.
                if !out.ends_with(' ') {
                    out.push(' ');
                }
            }
            _ if !in_tag => out.push(ch),
            _ => {}
        }
    }
    // Collapse runs of whitespace.
    let collapsed: String = out.split_whitespace().collect::<Vec<_>>().join(" ");
    // Decode common HTML entities for search index accuracy.
    collapsed
        .replace("&amp;", "&")
        .replace("&lt;", "<")
        .replace("&gt;", ">")
        .replace("&quot;", "\"")
        .replace("&#x27;", "'")
        .replace("&#39;", "'")
}

/// Rewrite relative `.md` links to the configured prefix.
fn rewrite_links(markdown: &str, link_prefix: &str, unpublished_pattern: Option<&str>) -> String {
    LINK_RE
        .replace_all(markdown, |caps: &regex::Captures| {
            let text = &caps[1];
            let url = &caps[2];

            // Preserve absolute URLs, mailto, and internal routes.
            if url.starts_with("http://")
                || url.starts_with("https://")
                || url.starts_with("mailto:")
                || url.starts_with('/')
            {
                return caps[0].to_string();
            }

            // Unpublished docs: strip link, keep text.
            if let Some(pattern) = unpublished_pattern {
                if url.contains(pattern) {
                    return text.to_string();
                }
            }

            // Only rewrite links containing .md
            if !url.contains(".md") {
                return caps[0].to_string();
            }

            // Split off any #anchor.
            let (path_part, anchor): (&str, Option<&str>) = match url.split_once('#') {
                Some((p, a)) => (p, Some(a)),
                None => (url, None),
            };

            // Extract slug from filename: ../support/faq.md -> faq
            let filename = path_part
                .rsplit('/')
                .next()
                .unwrap_or(path_part)
                .trim_end_matches(".md");

            let mut new_url = format!("{link_prefix}/{filename}");
            if let Some(anchor) = anchor {
                new_url.push('#');
                new_url.push_str(anchor);
            }

            format!("[{text}]({new_url})")
        })
        .to_string()
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn rewrite_same_section_link() {
        let md = "See [SLA](./guarantees.md) for details.";
        let result = rewrite_links(md, "/docs", Some("unpublished/"));
        assert_eq!(result, "See [SLA](/docs/guarantees) for details.");
    }

    #[test]
    fn rewrite_cross_section_link() {
        let md = "Check [FAQ](../support/faq.md) for more.";
        let result = rewrite_links(md, "/docs", Some("unpublished/"));
        assert_eq!(result, "Check [FAQ](/docs/faq) for more.");
    }

    #[test]
    fn rewrite_unpublished_link_becomes_plain_text() {
        let md = "See [Content Moderation](../../unpublished/legal/moderation.md) for details.";
        let result = rewrite_links(md, "/docs", Some("unpublished/"));
        assert_eq!(result, "See Content Moderation for details.");
    }

    #[test]
    fn rewrite_preserves_absolute_urls() {
        let md = "Visit [our site](https://example.com) today.";
        let result = rewrite_links(md, "/docs", Some("unpublished/"));
        assert_eq!(result, md);
    }

    #[test]
    fn rewrite_preserves_mailto() {
        let md = "Email [us](mailto:test@example.com)";
        let result = rewrite_links(md, "/docs", Some("unpublished/"));
        assert_eq!(result, md);
    }

    #[test]
    fn rewrite_preserves_internal_routes() {
        let md = "Go to [pricing](/pricing) page.";
        let result = rewrite_links(md, "/docs", Some("unpublished/"));
        assert_eq!(result, md);
    }

    #[test]
    fn rewrite_link_with_anchor() {
        let md = "See [section](./faq.md#billing).";
        let result = rewrite_links(md, "/docs", Some("unpublished/"));
        assert_eq!(result, "See [section](/docs/faq#billing).");
    }

    #[test]
    fn rewrite_public_cross_ref() {
        let md = "See [Acceptable Use](../../public/legal/acceptable-use.md).";
        let result = rewrite_links(md, "/docs", Some("unpublished/"));
        assert_eq!(result, "See [Acceptable Use](/docs/acceptable-use).");
    }

    #[test]
    fn rewrite_custom_prefix() {
        let md = "See [FAQ](./faq.md) here.";
        let result = rewrite_links(md, "/help", None);
        assert_eq!(result, "See [FAQ](/help/faq) here.");
    }

    #[test]
    fn rewrite_no_unpublished_pattern() {
        let md = "See [doc](../../unpublished/foo.md).";
        let result = rewrite_links(md, "/docs", None);
        // Without the pattern, it just rewrites normally
        assert_eq!(result, "See [doc](/docs/foo).");
    }

    #[test]
    fn rewrite_non_md_link_preserved() {
        let md = "See [image](./photo.png) here.";
        let result = rewrite_links(md, "/docs", None);
        assert_eq!(result, md);
    }

    #[test]
    fn strip_html_tags_removes_tags() {
        let html = "<p>Hello <strong>world</strong></p>";
        assert_eq!(strip_html_tags(html), "Hello world");
    }

    #[test]
    fn strip_html_tags_empty_input() {
        assert_eq!(strip_html_tags(""), "");
    }

    #[test]
    fn strip_html_tags_decodes_entities() {
        let html = "<p>Price: $10 &amp; free</p>";
        assert_eq!(strip_html_tags(html), "Price: $10 & free");

        let html2 = "<p>a &lt; b &gt; c</p>";
        assert_eq!(strip_html_tags(html2), "a < b > c");

        let html3 = "<p>&quot;hello&quot; &amp; &#x27;world&#39;</p>";
        assert_eq!(strip_html_tags(html3), "\"hello\" & 'world'");
    }

    #[test]
    fn strip_html_tags_nested_tags() {
        let html = "<div><p>A <em>nested <strong>deep</strong></em> tag</p></div>";
        assert_eq!(strip_html_tags(html), "A nested deep tag");
    }
}