import random
import json
from datetime import datetime
from flask import Blueprint, jsonify, render_template, request, current_app, flash
from ..models import ScheduleConfig, VolumeConfig, ActivityLog, PaperMetadata, ActivityCategory
from ..db import db
from ..celery import celery

bp = Blueprint("scraper", __name__, url_prefix="/scraper")

# Global variables to track scraper state
SCRAPER_ACTIVE = False
SCRAPER_PAUSED = False

@bp.route("/")
def index():
    """Render the scraper control panel."""
    volume_config = VolumeConfig.query.first()
    
    # Ensure we have volume config
    if not volume_config:
        volume_config = VolumeConfig(volume=100)  # Default value
        db.session.add(volume_config)
        db.session.commit()
    
    # Ensure we have schedule config for all hours
    existing_hours = {record.hour: record for record in ScheduleConfig.query.all()}
    schedule_config = {}
    
    for hour in range(24):
        if hour in existing_hours:
            schedule_config[hour] = existing_hours[hour].weight
        else:
            # Create default schedule entry (weight 1.0)
            new_config = ScheduleConfig(hour=hour, weight=1.0)
            db.session.add(new_config)
            schedule_config[hour] = 1.0
    
    if len(existing_hours) < 24:
        db.session.commit()
    
    return render_template(
        "scraper.html.jinja", 
        volume_config=volume_config,
        schedule_config=schedule_config,
        scraper_active=SCRAPER_ACTIVE,
        scraper_paused=SCRAPER_PAUSED
    )

@bp.route("/start", methods=["POST"])
def start_scraper():
    """Start the scraper."""
    global SCRAPER_ACTIVE, SCRAPER_PAUSED
    
    if not SCRAPER_ACTIVE:
        SCRAPER_ACTIVE = True
        SCRAPER_PAUSED = False
        
        # Log the action
        ActivityLog.log_scraper_command(
            action="start_scraper",
            status="success",
            description="Scraper started manually"
        )
        
        # Start the scheduler task
        task = dummy_scraper_scheduler.delay()
        
        return jsonify({
            "success": True, 
            "message": "Scraper started",
            "task_id": task.id
        })
    else:
        return jsonify({
            "success": False, 
            "message": "Scraper is already running"
        })

@bp.route("/stop", methods=["POST"])
def stop_scraper():
    """Stop the scraper."""
    global SCRAPER_ACTIVE, SCRAPER_PAUSED
    
    if SCRAPER_ACTIVE:
        SCRAPER_ACTIVE = False
        SCRAPER_PAUSED = False
        
        ActivityLog.log_scraper_command(
            action="stop_scraper",
            status="success",
            description="Scraper stopped manually"
        )
        
        return jsonify({
            "success": True, 
            "message": "Scraper stopped"
        })
    else:
        return jsonify({
            "success": False, 
            "message": "Scraper is not running"
        })

@bp.route("/pause", methods=["POST"])
def pause_scraper():
    """Pause the scraper."""
    global SCRAPER_ACTIVE, SCRAPER_PAUSED
    
    if SCRAPER_ACTIVE and not SCRAPER_PAUSED:
        SCRAPER_PAUSED = True
        
        ActivityLog.log_scraper_command(
            action="pause_scraper",
            status="success",
            description="Scraper paused manually"
        )
        
        return jsonify({
            "success": True, 
            "message": "Scraper paused"
        })
    elif SCRAPER_ACTIVE and SCRAPER_PAUSED:
        SCRAPER_PAUSED = False
        
        ActivityLog.log_scraper_command(
            action="resume_scraper",
            status="success",
            description="Scraper resumed manually"
        )
        
        return jsonify({
            "success": True, 
            "message": "Scraper resumed"
        })
    else:
        return jsonify({
            "success": False, 
            "message": "Scraper is not running"
        })

@bp.route("/status")
def scraper_status():
    """Get the current status of the scraper."""
    return jsonify({
        "active": SCRAPER_ACTIVE,
        "paused": SCRAPER_PAUSED,
        "current_hour": datetime.now().hour,
    })

@bp.route("/stats")
def scraper_stats():
    """Get scraper statistics for the dashboard."""
    # Get the last 24 hours of activity
    hours = 24
    if request.args.get('hours'):
        try:
            hours = int(request.args.get('hours'))
        except ValueError:
            pass
    
    cutoff_time = datetime.utcnow().replace(
        minute=0, second=0, microsecond=0
    )
    
    # Get activity logs for scraper actions
    logs = ActivityLog.query.filter(
        ActivityLog.category == ActivityCategory.SCRAPER_ACTIVITY.value,
        ActivityLog.timestamp >= cutoff_time.replace(hour=cutoff_time.hour - hours)
    ).all()
    
    # Group by hour and status
    stats = {}
    for hour in range(hours):
        target_hour = (cutoff_time.hour - hour) % 24
        stats[target_hour] = {
            "success": 0,
            "error": 0,
            "pending": 0,
            "hour": target_hour,
        }
    
    for log in logs:
        hour = log.timestamp.hour
        if hour in stats:
            if log.status == "success":
                stats[hour]["success"] += 1
            elif log.status == "error":
                stats[hour]["error"] += 1
            elif log.status == "pending":
                stats[hour]["pending"] += 1
    
    # Convert to list for easier consumption by JavaScript
    result = [stats[hour] for hour in sorted(stats.keys())]
    
    return jsonify(result)

@bp.route("/update_config", methods=["POST"])
def update_config():
    """Update scraper configuration."""
    data = request.json
    
    try:
        if "volume" in data:
            try:
                new_volume = float(data["volume"])
                
                # Validate volume value (from schedule.py)
                if new_volume <= 0 or new_volume > 1000:
                    return jsonify({
                        "success": False, 
                        "message": "Volume must be between 1 and 1000"
                    })
                    
                volume_config = VolumeConfig.query.first()
                if not volume_config:
                    volume_config = VolumeConfig(volume=new_volume)
                    db.session.add(volume_config)
                else:
                    old_value = volume_config.volume
                    volume_config.volume = new_volume
                    ActivityLog.log_config_change(
                        config_key="scraper_volume",
                        old_value=old_value,
                        new_value=new_volume,
                        description="Updated scraper volume"
                    )
                
                db.session.commit()
            except (ValueError, TypeError):
                return jsonify({
                    "success": False, 
                    "message": "Invalid volume value"
                })
        
        if "schedule" in data:
            try:
                schedule = data["schedule"]
                
                # Validate entire schedule
                for hour_str, weight in schedule.items():
                    try:
                        hour = int(hour_str)
                        weight = float(weight)
                        
                        if hour < 0 or hour > 23:
                            return jsonify({
                                "success": False, 
                                "message": f"Hour value must be between 0 and 23, got {hour}"
                            })
                            
                        if weight < 0.1 or weight > 5:
                            return jsonify({
                                "success": False, 
                                "message": f"Weight for hour {hour} must be between 0.1 and 5, got {weight}"
                            })
                    except ValueError:
                        return jsonify({
                            "success": False, 
                            "message": f"Invalid data format for hour {hour_str}"
                        })
                
                # Update schedule after validation
                for hour_str, weight in schedule.items():
                    hour = int(hour_str)
                    weight = float(weight)
                    
                    schedule_config = ScheduleConfig.query.get(hour)
                    if not schedule_config:
                        schedule_config = ScheduleConfig(hour=hour, weight=weight)
                        db.session.add(schedule_config)
                    else:
                        old_value = schedule_config.weight
                        schedule_config.weight = weight
                        ActivityLog.log_config_change(
                            config_key=f"schedule_hour_{hour}",
                            old_value=old_value,
                            new_value=weight,
                            description=f"Updated schedule weight for hour {hour}"
                        )
                
                db.session.commit()
            except Exception as e:
                db.session.rollback()
                return jsonify({
                    "success": False, 
                    "message": f"Error updating schedule: {str(e)}"
                })
        
        return jsonify({"success": True, "message": "Configuration updated"})
    
    except Exception as e:
        db.session.rollback()
        return jsonify({"success": False, "message": f"Unexpected error: {str(e)}"})

@bp.route("/schedule", methods=["GET", "POST"])
def schedule():
    """Legacy route to maintain compatibility with the schedule blueprint."""
    # For GET requests, redirect to the scraper index with the schedule tab active
    if request.method == "GET":
        return index()
    
    # For POST requests, handle form data and process like the original schedule blueprint
    if request.method == "POST":
        try:
            # Check if we're updating volume or schedule
            if "total_volume" in request.form:
                # Volume update
                try:
                    new_volume = float(request.form.get("total_volume", 0))
                    if new_volume <= 0 or new_volume > 1000:
                        raise ValueError("Volume must be between 1 and 1000")

                    volume_config = VolumeConfig.query.first()
                    if not volume_config:
                        volume_config = VolumeConfig(volume=new_volume)
                        db.session.add(volume_config)
                    else:
                        volume_config.volume = new_volume

                    db.session.commit()
                    flash("Volume updated successfully!", "success")

                except ValueError as e:
                    db.session.rollback()
                    flash(f"Error updating volume: {str(e)}", "error")
            else:
                # Schedule update logic
                # Validate form data
                for hour in range(24):
                    key = f"hour_{hour}"
                    if key not in request.form:
                        raise ValueError(f"Missing data for hour {hour}")

                    try:
                        weight = float(request.form.get(key, 0))
                        if weight < 0 or weight > 5:
                            raise ValueError(
                                f"Weight for hour {hour} must be between 0 and 5"
                            )
                    except ValueError:
                        raise ValueError(f"Invalid weight value for hour {hour}")

                # Update database if validation passes
                for hour in range(24):
                    key = f"hour_{hour}"
                    weight = float(request.form.get(key, 0))
                    config = ScheduleConfig.query.get(hour)
                    if config:
                        config.weight = weight
                    else:
                        db.session.add(ScheduleConfig(hour=hour, weight=weight))

                db.session.commit()
                flash("Schedule updated successfully!", "success")

        except ValueError as e:
            db.session.rollback()
            flash(f"Error updating schedule: {str(e)}", "error")
            
        # Redirect back to the scraper page
        return index()

# Calculate schedule information for visualization/decision making
def get_schedule_stats():
    """Get statistics about the current schedule configuration."""
    volume_config = VolumeConfig.query.first()
    if not volume_config:
        return {"error": "No volume configuration found"}
    
    total_volume = volume_config.volume
    schedule_configs = ScheduleConfig.query.all()
    
    if not schedule_configs:
        return {"error": "No schedule configuration found"}
    
    # Calculate total weight
    total_weight = sum(config.weight for config in schedule_configs)
    
    # Calculate papers per hour
    papers_per_hour = {}
    for config in schedule_configs:
        weight_ratio = config.weight / total_weight if total_weight > 0 else 0
        papers = weight_ratio * total_volume
        papers_per_hour[config.hour] = papers
    
    return {
        "total_volume": total_volume,
        "total_weight": total_weight,
        "papers_per_hour": papers_per_hour
    }

# Enhanced API route to get schedule information
@bp.route("/schedule_info")
def schedule_info():
    """Get information about the current schedule configuration."""
    stats = get_schedule_stats()
    return jsonify(stats)

# Define the Celery tasks
@celery.task(bind=True)
def dummy_scraper_scheduler(self):
    """Main scheduler task for the dummy scraper."""
    global SCRAPER_ACTIVE, SCRAPER_PAUSED
    
    if not SCRAPER_ACTIVE:
        return {"status": "Scraper not active"}
    
    if SCRAPER_PAUSED:
        return {"status": "Scraper paused"}
    
    # Calculate how many papers to scrape based on current hour and configuration
    current_hour = datetime.now().hour
    hour_config = ScheduleConfig.query.get(current_hour)
    volume_config = VolumeConfig.query.first()
    
    if not hour_config or not volume_config:
        return {"status": "Missing configuration"}
    
    # Calculate papers to scrape this hour
    hourly_rate = volume_config.volume / 24  # Base rate per hour
    adjusted_rate = hourly_rate * (1 / hour_config.weight)  # Adjust by weight
    papers_to_scrape = int(adjusted_rate)
    
    # Log the scheduling decision
    ActivityLog.log_scraper_activity(
        action="schedule_papers",
        status="success",
        description=f"Scheduled {papers_to_scrape} papers for scraping at hour {current_hour}",
        hourly_rate=hourly_rate,
        weight=hour_config.weight,
        adjusted_rate=adjusted_rate,
    )
    
    # Launch individual scraping tasks
    for _ in range(papers_to_scrape):
        if not SCRAPER_ACTIVE or SCRAPER_PAUSED:
            break
            
        # Schedule a new paper to be scraped
        dummy_scrape_paper.delay()
    
    # Schedule the next run in 5 minutes if still active
    if SCRAPER_ACTIVE:
        dummy_scraper_scheduler.apply_async(countdown=300)  # 5 minutes
    
    return {"status": "success", "papers_scheduled": papers_to_scrape}

@celery.task(bind=True)
def dummy_scrape_paper(self):
    """Simulate scraping a single paper."""
    # Simulate success or failure
    success = random.random() > 0.3  # 70% success rate
    
    # Simulate processing time
    import time
    time.sleep(random.randint(2, 5))  # 2-5 seconds
    
    if success:
        # Create a dummy paper
        new_paper = PaperMetadata(
            title=f"Dummy Paper {random.randint(1000, 9999)}",
            doi=f"10.1234/dummy.{random.randint(1000, 9999)}",
            journal=random.choice([
                "Nature", "Science", "PLOS ONE", "Journal of Dummy Research",
                "Proceedings of the Dummy Society", "Cell", "Dummy Review Letters"
            ]),
            type="article",
            language="en",
            published_online=datetime.now().date(),
            status="Done",
            file_path="/path/to/dummy/paper.pdf"
        )
        
        db.session.add(new_paper)
        db.session.commit()
        
        # Log the successful scrape
        ActivityLog.log_scraper_activity(
            action="scrape_paper",
            paper_id=new_paper.id,
            status="success",
            description=f"Successfully scraped paper {new_paper.doi}"
        )
        
        return {
            "success": True,
            "paper_id": new_paper.id,
            "title": new_paper.title,
            "doi": new_paper.doi
        }
    else:
        # Log the failed scrape
        error_message = random.choice([
            "Connection timeout",
            "404 Not Found",
            "Access denied",
            "Invalid DOI format",
            "PDF download failed",
            "Rate limited by publisher"
        ])
        
        ActivityLog.log_scraper_activity(
            action="scrape_paper",
            status="error",
            description=f"Failed to scrape paper: {error_message}"
        )
        
        return {
            "success": False,
            "error": error_message
        }