"""
Article generation using Gemini Flash with web search grounding.
"""

import json
import logging
import re
import time
from dataclasses import dataclass
from datetime import datetime
from pathlib import Path
from typing import Optional

from google import genai
from google.genai import types

from config.settings import get_settings
from database.models import Topic

logger = logging.getLogger(__name__)

# Model fallback chain (best to most stable)
TEXT_MODELS = [
    "gemini-3-pro-preview",
    "gemini-3-flash-preview",
    "gemini-2.5-pro",
]

MAX_RETRIES = 3
RETRY_DELAY = 30  # seconds


@dataclass
class GenerationResult:
    success: bool
    title: str
    slug: str
    content_markdown: str
    meta_description: str
    word_count: int
    cost_usd: float
    error: Optional[str] = None
    grounding_used: bool = False


def get_gemini_client() -> genai.Client:
    """Initialize Gemini client."""
    settings = get_settings()
    return genai.Client(api_key=settings.gemini_api_key)


def generate_article(topic: Topic) -> GenerationResult:
    """
    Generate a 2000-3000 word article using Gemini Flash with web search.

    CRITICAL: Verifies grounding_metadata.web_search_queries exists to ensure
    the model used actual web search for factual accuracy.
    """
    prompt = _load_prompt("article_generation.txt").format(
        topic_title=topic.title,
        target_keywords=topic.target_keywords,
        secondary_keywords=topic.secondary_keywords or "",
        content_cluster=topic.content_cluster,
        search_intent=topic.search_intent,
        current_year=datetime.now().year,
    )

    # Conditionally append Parketry context based on content cluster
    parketry_context = _get_parketry_context(topic.content_cluster)
    if parketry_context:
        prompt = prompt + "\n\n" + parketry_context
        logger.debug(f"Parketry context appended for cluster '{topic.content_cluster}'")

    try:
        client = get_gemini_client()

        # Configure Google Search tool for grounding
        google_search_tool = types.Tool(google_search=types.GoogleSearch())

        # Model fallback with retry logic
        response = None
        last_error = None

        for model in TEXT_MODELS:
            for attempt in range(MAX_RETRIES):
                try:
                    logger.debug(f"Trying model {model} (attempt {attempt + 1}/{MAX_RETRIES})")
                    response = client.models.generate_content(
                        model=model,
                        contents=prompt,
                        config=types.GenerateContentConfig(
                            tools=[google_search_tool],
                            temperature=0.7,
                        ),
                    )
                    break  # Success - exit retry loop
                except Exception as e:
                    last_error = e
                    if "429" in str(e) or "RESOURCE_EXHAUSTED" in str(e):
                        if attempt < MAX_RETRIES - 1:
                            logger.warning(f"Rate limited on {model}, waiting {RETRY_DELAY}s (attempt {attempt + 1}/{MAX_RETRIES})")
                            time.sleep(RETRY_DELAY)
                            continue
                    # Non-retryable or exhausted retries - try next model
                    logger.warning(f"Model {model} failed: {e}")
                    break
            else:
                # No break = all retries failed, try next model
                continue
            if response is not None:
                break  # Success - exit model loop

        if response is None:
            raise Exception(f"All models failed. Last error: {last_error}")

        # Verify web search was used (grounding)
        grounding_used = _check_grounding(response)
        if not grounding_used:
            logger.warning(
                f"No web search grounding detected for '{topic.title[:50]}...'"
            )

        # Parse structured response
        result = _parse_article_response(response.text, topic)
        result.grounding_used = grounding_used

        # Estimate cost (Gemini Flash pricing: $0.075/1M input, $0.30/1M output)
        input_tokens = len(prompt) / 4  # rough estimate
        output_tokens = len(response.text) / 4
        result.cost_usd = (input_tokens * 0.075 + output_tokens * 0.30) / 1_000_000

        logger.info(
            f"Generated article: '{result.title[:50]}...' "
            f"({result.word_count} words, grounding: {grounding_used})"
        )
        return result

    except Exception as e:
        logger.error(f"Article generation failed: {e}")
        return GenerationResult(
            success=False,
            title="",
            slug="",
            content_markdown="",
            meta_description="",
            word_count=0,
            cost_usd=0.0,
            error=str(e),
        )


_PARKETRY_INTEGRATION = {
    "strong": {
        "clusters": {"kosten", "vergleich", "probleme"},
        "instruction": (
            "Integriere die Parketry-Fakten als 1-2 Absätze im Artikel. "
            "Ordne Parketry als konkrete Marktalternative zu teuren klassischen Lösungen ein. "
            "Platziere dies im Kontext von Kosten, Vergleich oder Problemlösung — je nach Thema."
        ),
    },
    "light": {
        "clusters": {"branchen", "howto"},
        "instruction": (
            "Erwähne in einem einzelnen Satz, dass es auch kostenlose Full-Service-Modelle "
            "wie Parketry gibt. Nicht mehr als ein Satz — der Fokus bleibt auf dem Kernthema."
        ),
    },
}


def _get_parketry_context(content_cluster: str) -> Optional[str]:
    """
    Return Parketry context prompt fragment based on content cluster tier.

    Strong (kosten, vergleich, probleme): 1-2 paragraphs as market alternative
    Light (branchen, howto): single sentence mention
    None (technik, recht, trends): no mention — stay editorially neutral
    """
    for tier in _PARKETRY_INTEGRATION.values():
        if content_cluster in tier["clusters"]:
            template = _load_prompt("parketry_context.txt")
            return template.format(integration_instruction=tier["instruction"])
    return None


def _check_grounding(response) -> bool:
    """Check if response used web search grounding."""
    try:
        # Check for grounding metadata in response
        if hasattr(response, "candidates") and response.candidates:
            candidate = response.candidates[0]
            if hasattr(candidate, "grounding_metadata"):
                metadata = candidate.grounding_metadata
                if hasattr(metadata, "web_search_queries") and metadata.web_search_queries:
                    return True
                if hasattr(metadata, "grounding_chunks") and metadata.grounding_chunks:
                    return True
                if hasattr(metadata, "search_entry_point"):
                    return True
        return False
    except Exception as e:
        logger.debug(f"Could not check grounding: {e}")
        return False


def _load_prompt(filename: str) -> str:
    """Load prompt template from prompts directory."""
    prompt_path = Path(__file__).parent.parent / "prompts" / filename
    if not prompt_path.exists():
        raise FileNotFoundError(f"Prompt file not found: {prompt_path}")
    return prompt_path.read_text()


def _slugify(text: str) -> str:
    """Convert text to URL-friendly slug."""
    text = text.lower()
    text = re.sub(r"[äÄ]", "ae", text)
    text = re.sub(r"[öÖ]", "oe", text)
    text = re.sub(r"[üÜ]", "ue", text)
    text = re.sub(r"[ß]", "ss", text)
    text = re.sub(r"[^a-z0-9]+", "-", text)
    text = text.strip("-")
    return text


def _count_words(text: str) -> int:
    """Count words in markdown text, excluding code blocks and headers."""
    # Remove code blocks
    text = re.sub(r"```[\s\S]*?```", "", text)
    # Remove inline code
    text = re.sub(r"`[^`]+`", "", text)
    # Remove markdown formatting
    text = re.sub(r"[#*_\[\]()]", " ", text)
    # Count words
    words = text.split()
    return len(words)


def _parse_article_response(response_text: str, topic: Topic) -> GenerationResult:
    """Parse Gemini response into GenerationResult."""
    try:
        text = response_text.strip()
        meta_description = ""

        # Extract markdown content if wrapped in ```markdown block
        if "```markdown" in text:
            markdown_content = text.split("```markdown")[1].split("```")[0].strip()
        else:
            # Use the full text, removing any JSON block at the end
            markdown_content = text

        # Extract meta_description from JSON block if present
        if "```json" in text:
            try:
                json_text = text.split("```json")[-1].split("```")[0]
                data = json.loads(json_text)
                meta_description = data.get("meta_description", "")[:160]
            except (json.JSONDecodeError, IndexError):
                pass

        # Clean up: remove any remaining code blocks or preamble
        if "```" in markdown_content:
            # Remove any remaining code blocks
            markdown_content = re.sub(r"```[a-z]*\n[\s\S]*?```", "", markdown_content)

        # Extract title from first H1
        lines = markdown_content.split("\n")
        title = topic.title
        content_start = 0

        for i, line in enumerate(lines):
            if line.startswith("# "):
                title = line[2:].strip()
                content_start = i
                break

        # If no meta_description from JSON, extract from first paragraph
        if not meta_description:
            for line in lines[content_start + 1:]:
                line = line.strip()
                if line and not line.startswith("#") and not line.startswith("-") and not line.startswith("*"):
                    # Clean markdown formatting for meta description
                    meta_description = re.sub(r"\*\*([^*]+)\*\*", r"\1", line)
                    meta_description = re.sub(r"\[([^\]]+)\]\([^)]+\)", r"\1", meta_description)
                    meta_description = meta_description[:160]
                    break

        word_count = _count_words(markdown_content)

        return GenerationResult(
            success=True,
            title=title,
            slug=_slugify(title),
            content_markdown=markdown_content,
            meta_description=meta_description,
            word_count=word_count,
            cost_usd=0.0,
        )

    except Exception as e:
        logger.error(f"Failed to parse article response: {e}")
        return GenerationResult(
            success=False,
            title="",
            slug="",
            content_markdown=response_text,
            meta_description="",
            word_count=_count_words(response_text),
            cost_usd=0.0,
            error=f"Parse error: {str(e)}",
        )