#!/usr/bin/env python3
"""
Daily scheduler for content pipeline.
Runs at a random time between 08:00 and 19:00.

Usage:
    - Schedule this script to run early morning via cron (e.g., 00:05)
    - It will sleep until a random time in the configured window
    - Then execute the pipeline

Cron entry:
    5 0 * * * cd /var/www/html/content-pipeline && ./venv/bin/python daily_scheduler.py >> logs/scheduler.log 2>&1
"""

import logging
import random
import sys
import time
from datetime import datetime, timedelta
from pathlib import Path

# Configuration
PUBLISH_WINDOW_START_HOUR = 8   # 08:00
PUBLISH_WINDOW_END_HOUR = 19    # 19:00 (exclusive, so up to 18:59)

# Setup paths
PROJECT_DIR = Path(__file__).parent
LOG_DIR = PROJECT_DIR / "logs"
LOG_DIR.mkdir(exist_ok=True)

# Setup logging
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s",
    handlers=[
        logging.FileHandler(LOG_DIR / "scheduler.log"),
        logging.StreamHandler(),
    ],
)
logger = logging.getLogger(__name__)


def get_random_publish_time() -> datetime:
    """Generate a random datetime between start and end hour for today."""
    now = datetime.now()
    today = now.replace(hour=0, minute=0, second=0, microsecond=0)

    # Calculate window in minutes from midnight
    start_minutes = PUBLISH_WINDOW_START_HOUR * 60
    end_minutes = PUBLISH_WINDOW_END_HOUR * 60

    # Pick random minute within window
    random_minute = random.randint(start_minutes, end_minutes - 1)

    publish_time = today + timedelta(minutes=random_minute)

    # If we've already passed this time today, still return it
    # (the sleep calculation will handle this)
    return publish_time


def sleep_until(target_time: datetime) -> bool:
    """
    Sleep until the target time.
    Returns True if we slept, False if target time has passed.
    """
    now = datetime.now()

    if now >= target_time:
        logger.info(f"Target time {target_time.strftime('%H:%M')} has passed, running immediately")
        return False

    sleep_seconds = (target_time - now).total_seconds()
    hours, remainder = divmod(int(sleep_seconds), 3600)
    minutes, seconds = divmod(remainder, 60)

    logger.info(f"Sleeping for {hours}h {minutes}m until {target_time.strftime('%H:%M')}")

    # Sleep in chunks to allow for interruption and log progress
    chunk_size = 300  # 5 minutes
    while sleep_seconds > 0:
        sleep_duration = min(chunk_size, sleep_seconds)
        time.sleep(sleep_duration)
        sleep_seconds -= sleep_duration

        if sleep_seconds > 0:
            remaining = timedelta(seconds=int(sleep_seconds))
            logger.debug(f"Still waiting... {remaining} remaining")

    return True


def run_pipeline() -> int:
    """Execute the content pipeline."""
    logger.info("Starting content pipeline")

    try:
        # Import and run
        sys.path.insert(0, str(PROJECT_DIR))
        from cron_runner import main as cron_main

        return cron_main()

    except Exception as e:
        logger.exception(f"Pipeline execution failed: {e}")
        return 1


def main():
    """Main scheduler entry point."""
    logger.info("=" * 60)
    logger.info("Daily Content Scheduler Started")
    logger.info(f"Publish window: {PUBLISH_WINDOW_START_HOUR:02d}:00 - {PUBLISH_WINDOW_END_HOUR:02d}:00")
    logger.info("=" * 60)

    # Generate random publish time
    publish_time = get_random_publish_time()
    logger.info(f"Selected publish time: {publish_time.strftime('%Y-%m-%d %H:%M')}")

    # Save scheduled time for monitoring
    schedule_file = LOG_DIR / "scheduled_time.txt"
    with open(schedule_file, "w") as f:
        f.write(f"{publish_time.isoformat()}\n")

    # Sleep until publish time
    sleep_until(publish_time)

    # Run the pipeline
    result = run_pipeline()

    logger.info(f"Scheduler finished with exit code: {result}")
    return result


if __name__ == "__main__":
    sys.exit(main())
