Michael Beck 5e98a0ba47 init
2025-02-22 16:55:41 +01:00

231 lines
9.0 KiB
Python

from flask import jsonify, request, Response, send_from_directory, current_app
import threading
import os
import glob
from datetime import datetime
import pandas as pd
from app.models import Scraper
from app.util import create_zip, delete_old_zips, tail
from app.config import load_config
from app.forms import ScrapingForm
from app.tasks import start_scraping_task, stop_scraping_task, get_redis
scraping_thread = None
scraper = None
scrape_lock = threading.Lock()
def register_api(app):
@app.route('/start_scraping', methods=['POST'])
def start_scraping():
form = ScrapingForm()
if form.validate_on_submit():
redis_client = get_redis()
faction_id = form.faction_id.data
# Check if scraping is already active
if redis_client.hget(f"scraper:{faction_id}", "scraping_active") == "1":
return jsonify({"status": "Scraping already in progress"})
# Convert config to a serializable dict with only needed values
config_dict = {
'DATA': {'DATA_DIR': current_app.config['DATA']['DATA_DIR']},
'DEFAULT': {'API_KEY': current_app.config['DEFAULT']['API_KEY']}
}
start_scraping_task.delay(
faction_id,
int(form.fetch_interval.data), # Ensure this is an int
int(form.run_interval.data), # Ensure this is an int
config_dict
)
return jsonify({"status": "Scraping started"})
return jsonify({"status": "Invalid form data"})
@app.route('/stop_scraping', methods=['POST'])
def stop_scraping():
redis_client = get_redis()
faction_id = redis_client.get("current_faction_id")
if not faction_id:
return jsonify({"status": "No active scraping session"})
stop_scraping_task.delay(faction_id)
return jsonify({"status": "Stopping scraping"})
@app.route('/logfile', methods=['GET'])
def logfile():
log_file_name = current_app.logger.handlers[0].baseFilename
page = int(request.args.get('page', 0)) # Page number
lines_per_page = int(request.args.get('lines_per_page', current_app.config['LOGGING']['VIEW_PAGE_LINES'])) # Lines per page
log_file_path = log_file_name # Path to the current log file
if not os.path.isfile(log_file_path):
current_app.logger.error("Log file not found")
return jsonify({"error": "Log file not found"}), 404
log_lines = list(tail(log_file_path, current_app.config['LOGGING']['VIEW_MAX_LINES']))
log_lines = log_lines[::-1] # Reverse the list
start = page * lines_per_page
end = start + lines_per_page
paginated_lines = log_lines[start:end] if start < len(log_lines) else []
return jsonify({
"log": paginated_lines,
"total_lines": len(log_lines),
"pages": (len(log_lines) + lines_per_page - 1) // lines_per_page,
"start_line": len(log_lines) - start
})
@app.route('/download_files', methods=['POST'])
def download_files():
delete_old_zips() # Clean up old zip files
file_paths = request.json.get('file_paths')
if not file_paths:
return jsonify({"error": "No files specified"}), 400
# Get the absolute path of the parent directory
parent_dir = os.path.abspath(os.path.join(app.root_path, os.pardir))
# Validate and correct file paths
valid_file_paths = []
for file_path in file_paths:
if file_path.startswith('/data/'):
corrected_path = file_path.lstrip('/')
full_path = os.path.join(parent_dir, corrected_path)
if os.path.isfile(full_path):
valid_file_paths.append(full_path)
elif file_path.startswith('/log/'):
corrected_path = file_path.lstrip('/')
full_path = os.path.join(parent_dir, corrected_path)
if os.path.isfile(full_path):
valid_file_paths.append(full_path)
if not valid_file_paths:
return jsonify({"error": "No valid files specified"}), 400
# Create a unique zip file name
zip_name = f"files_{datetime.now().strftime('%Y%m%d%H%M%S')}.zip"
zip_path = create_zip(valid_file_paths, zip_name, app)
# Log the directory and file path for debugging
current_app.logger.debug(f"Sending file from directory: temp, file: {zip_name}")
return download_tmp_file(zip_name)
@app.route('/delete_files', methods=['POST'])
def delete_files():
log_file_name = current_app.logger.handlers[0].baseFilename
file_paths = request.json.get('file_paths', [])
if not file_paths:
return jsonify({"error": "No files specified"}), 400
errors = []
data_dir = os.path.abspath(current_app.config['DATA']['DATA_DIR'])
log_dir = os.path.abspath(current_app.config['LOGGING']['LOG_DIR'])
for file_path in file_paths:
if file_path.startswith('/data/'):
full_file_path = os.path.join(data_dir, file_path.lstrip('/data/'))
elif file_path.startswith('/log/'):
full_file_path = os.path.join(log_dir, file_path.lstrip('/log/'))
else:
errors.append({"file": file_path, "error": "File not in allowed directory"})
continue
# Check if the file is in either the logs or the data files folder
#if not (full_file_path.startswith(data_dir) or full_file_path.startswith(log_dir)):
# errors.append({"file": file_path, "error": "File not in allowed directory"})
# continue
# Check if it's the currently active log file
if full_file_path == log_file_name:
errors.append({"file": file_path, "error": "Cannot delete active log file."})
continue
# Check if it's an active data file
if scraper and scraper.data_file_name == full_file_path:
errors.append({"file": file_path, "error": "Cannot delete active data file."})
continue
if not os.path.isfile(full_file_path):
errors.append({"file": file_path, "error": "File not found"})
continue
try:
os.remove(full_file_path)
except Exception as e:
errors.append({"file": file_path, "error": str(e)})
if errors:
return jsonify({"errors": errors}), 207 # Multi-Status response
return jsonify({"success": True}), 200
@app.route('/data/<path:filename>')
def download_data_file(filename):
data_dir = os.path.abspath(current_app.config['DATA']['DATA_DIR'])
file_path = os.path.join(data_dir, filename)
return send_from_directory(directory=data_dir, path=filename, as_attachment=True)
@app.route('/log/<path:filename>')
def download_log_file(filename):
log_dir = os.path.abspath(current_app.config['LOGGING']['LOG_DIR'])
file_path = os.path.join(log_dir, filename)
return send_from_directory(directory=log_dir, path=filename, as_attachment=True)
@app.route('/tmp/<path:filename>')
def download_tmp_file(filename):
tmp_dir = os.path.abspath(current_app.config['TEMP']['TEMP_DIR'])
file_path = os.path.join(tmp_dir, filename)
return send_from_directory(directory=tmp_dir, path=filename, as_attachment=True)
@app.route('/config/lines_per_page')
def get_lines_per_page():
lines_per_page = current_app.config['LOGGING']['VIEW_PAGE_LINES']
return jsonify({"lines_per_page": lines_per_page})
@app.route('/scraping_status', methods=['GET'])
def scraping_status():
redis_client = get_redis()
current_faction_id = redis_client.get("current_faction_id")
if not current_faction_id:
return jsonify({"scraping_active": False})
scraping_active = redis_client.hget(f"scraper:{current_faction_id}", "scraping_active")
# If we have a faction_id but scraping is not active, clean up the stale state
if not scraping_active or scraping_active == "0":
redis_client.delete("current_faction_id")
return jsonify({"scraping_active": False})
return jsonify({
"scraping_active": True,
"faction_id": current_faction_id
})
@app.route('/scraping_get_end_time')
def scraping_get_end_time():
redis_client = get_redis()
current_faction_id = redis_client.get("current_faction_id")
if not current_faction_id:
return jsonify({"scraping_active": False})
end_time = redis_client.hget(f"scraper:{current_faction_id}", "end_time")
if not end_time:
return jsonify({"scraping_active": False})
return jsonify({
"end_time": end_time,
"faction_id": current_faction_id
})