from flask import jsonify, request, Response, send_from_directory, current_app import threading import os import glob from datetime import datetime import pandas as pd from app.models import Scraper from app.util import create_zip, delete_old_zips, tail from app.config import load_config from app.forms import ScrapingForm from app.tasks import start_scraping_task, stop_scraping_task, get_redis scraping_thread = None scraper = None scrape_lock = threading.Lock() def register_api(app): @app.route('/start_scraping', methods=['POST']) def start_scraping(): form = ScrapingForm() if form.validate_on_submit(): redis_client = get_redis() faction_id = form.faction_id.data # Check if scraping is already active if redis_client.hget(f"scraper:{faction_id}", "scraping_active") == "1": return jsonify({"status": "Scraping already in progress"}) # Convert config to a serializable dict with only needed values config_dict = { 'DATA': {'DATA_DIR': current_app.config['DATA']['DATA_DIR']}, 'DEFAULT': {'API_KEY': current_app.config['DEFAULT']['API_KEY']} } start_scraping_task.delay( faction_id, int(form.fetch_interval.data), # Ensure this is an int int(form.run_interval.data), # Ensure this is an int config_dict ) return jsonify({"status": "Scraping started"}) return jsonify({"status": "Invalid form data"}) @app.route('/stop_scraping', methods=['POST']) def stop_scraping(): redis_client = get_redis() faction_id = redis_client.get("current_faction_id") if not faction_id: return jsonify({"status": "No active scraping session"}) stop_scraping_task.delay(faction_id) return jsonify({"status": "Stopping scraping"}) @app.route('/logfile', methods=['GET']) def logfile(): log_file_name = current_app.logger.handlers[0].baseFilename page = int(request.args.get('page', 0)) # Page number lines_per_page = int(request.args.get('lines_per_page', current_app.config['LOGGING']['VIEW_PAGE_LINES'])) # Lines per page log_file_path = log_file_name # Path to the current log file if not os.path.isfile(log_file_path): current_app.logger.error("Log file not found") return jsonify({"error": "Log file not found"}), 404 log_lines = list(tail(log_file_path, current_app.config['LOGGING']['VIEW_MAX_LINES'])) log_lines = log_lines[::-1] # Reverse the list start = page * lines_per_page end = start + lines_per_page paginated_lines = log_lines[start:end] if start < len(log_lines) else [] return jsonify({ "log": paginated_lines, "total_lines": len(log_lines), "pages": (len(log_lines) + lines_per_page - 1) // lines_per_page, "start_line": len(log_lines) - start }) @app.route('/download_files', methods=['POST']) def download_files(): delete_old_zips() # Clean up old zip files file_paths = request.json.get('file_paths') if not file_paths: return jsonify({"error": "No files specified"}), 400 # Get the absolute path of the parent directory parent_dir = os.path.abspath(os.path.join(app.root_path, os.pardir)) # Validate and correct file paths valid_file_paths = [] for file_path in file_paths: if file_path.startswith('/data/'): corrected_path = file_path.lstrip('/') full_path = os.path.join(parent_dir, corrected_path) if os.path.isfile(full_path): valid_file_paths.append(full_path) elif file_path.startswith('/log/'): corrected_path = file_path.lstrip('/') full_path = os.path.join(parent_dir, corrected_path) if os.path.isfile(full_path): valid_file_paths.append(full_path) if not valid_file_paths: return jsonify({"error": "No valid files specified"}), 400 # Create a unique zip file name zip_name = f"files_{datetime.now().strftime('%Y%m%d%H%M%S')}.zip" zip_path = create_zip(valid_file_paths, zip_name, app) # Log the directory and file path for debugging current_app.logger.debug(f"Sending file from directory: temp, file: {zip_name}") return download_tmp_file(zip_name) @app.route('/delete_files', methods=['POST']) def delete_files(): log_file_name = current_app.logger.handlers[0].baseFilename file_paths = request.json.get('file_paths', []) if not file_paths: return jsonify({"error": "No files specified"}), 400 errors = [] data_dir = os.path.abspath(current_app.config['DATA']['DATA_DIR']) log_dir = os.path.abspath(current_app.config['LOGGING']['LOG_DIR']) for file_path in file_paths: if file_path.startswith('/data/'): full_file_path = os.path.join(data_dir, file_path.lstrip('/data/')) elif file_path.startswith('/log/'): full_file_path = os.path.join(log_dir, file_path.lstrip('/log/')) else: errors.append({"file": file_path, "error": "File not in allowed directory"}) continue # Check if the file is in either the logs or the data files folder #if not (full_file_path.startswith(data_dir) or full_file_path.startswith(log_dir)): # errors.append({"file": file_path, "error": "File not in allowed directory"}) # continue # Check if it's the currently active log file if full_file_path == log_file_name: errors.append({"file": file_path, "error": "Cannot delete active log file."}) continue # Check if it's an active data file if scraper and scraper.data_file_name == full_file_path: errors.append({"file": file_path, "error": "Cannot delete active data file."}) continue if not os.path.isfile(full_file_path): errors.append({"file": file_path, "error": "File not found"}) continue try: os.remove(full_file_path) except Exception as e: errors.append({"file": file_path, "error": str(e)}) if errors: return jsonify({"errors": errors}), 207 # Multi-Status response return jsonify({"success": True}), 200 @app.route('/data/') def download_data_file(filename): data_dir = os.path.abspath(current_app.config['DATA']['DATA_DIR']) file_path = os.path.join(data_dir, filename) return send_from_directory(directory=data_dir, path=filename, as_attachment=True) @app.route('/log/') def download_log_file(filename): log_dir = os.path.abspath(current_app.config['LOGGING']['LOG_DIR']) file_path = os.path.join(log_dir, filename) return send_from_directory(directory=log_dir, path=filename, as_attachment=True) @app.route('/tmp/') def download_tmp_file(filename): tmp_dir = os.path.abspath(current_app.config['TEMP']['TEMP_DIR']) file_path = os.path.join(tmp_dir, filename) return send_from_directory(directory=tmp_dir, path=filename, as_attachment=True) @app.route('/config/lines_per_page') def get_lines_per_page(): lines_per_page = current_app.config['LOGGING']['VIEW_PAGE_LINES'] return jsonify({"lines_per_page": lines_per_page}) @app.route('/scraping_status', methods=['GET']) def scraping_status(): redis_client = get_redis() current_faction_id = redis_client.get("current_faction_id") if not current_faction_id: return jsonify({"scraping_active": False}) scraping_active = redis_client.hget(f"scraper:{current_faction_id}", "scraping_active") # If we have a faction_id but scraping is not active, clean up the stale state if not scraping_active or scraping_active == "0": redis_client.delete("current_faction_id") return jsonify({"scraping_active": False}) return jsonify({ "scraping_active": True, "faction_id": current_faction_id }) @app.route('/scraping_get_end_time') def scraping_get_end_time(): redis_client = get_redis() current_faction_id = redis_client.get("current_faction_id") if not current_faction_id: return jsonify({"scraping_active": False}) end_time = redis_client.hget(f"scraper:{current_faction_id}", "end_time") if not end_time: return jsonify({"scraping_active": False}) return jsonify({ "end_time": end_time, "faction_id": current_faction_id })