231 lines
9.0 KiB
Python
231 lines
9.0 KiB
Python
from flask import jsonify, request, Response, send_from_directory, current_app
|
|
import threading
|
|
import os
|
|
import glob
|
|
from datetime import datetime
|
|
import pandas as pd
|
|
|
|
from app.models import Scraper
|
|
from app.util import create_zip, delete_old_zips, tail
|
|
from app.config import load_config
|
|
from app.forms import ScrapingForm
|
|
from app.tasks import start_scraping_task, stop_scraping_task, get_redis
|
|
|
|
scraping_thread = None
|
|
scraper = None
|
|
scrape_lock = threading.Lock()
|
|
|
|
def register_api(app):
|
|
@app.route('/start_scraping', methods=['POST'])
|
|
def start_scraping():
|
|
form = ScrapingForm()
|
|
if form.validate_on_submit():
|
|
redis_client = get_redis()
|
|
faction_id = form.faction_id.data
|
|
|
|
# Check if scraping is already active
|
|
if redis_client.hget(f"scraper:{faction_id}", "scraping_active") == "1":
|
|
return jsonify({"status": "Scraping already in progress"})
|
|
|
|
# Convert config to a serializable dict with only needed values
|
|
config_dict = {
|
|
'DATA': {'DATA_DIR': current_app.config['DATA']['DATA_DIR']},
|
|
'DEFAULT': {'API_KEY': current_app.config['DEFAULT']['API_KEY']}
|
|
}
|
|
|
|
start_scraping_task.delay(
|
|
faction_id,
|
|
int(form.fetch_interval.data), # Ensure this is an int
|
|
int(form.run_interval.data), # Ensure this is an int
|
|
config_dict
|
|
)
|
|
return jsonify({"status": "Scraping started"})
|
|
return jsonify({"status": "Invalid form data"})
|
|
|
|
@app.route('/stop_scraping', methods=['POST'])
|
|
def stop_scraping():
|
|
redis_client = get_redis()
|
|
faction_id = redis_client.get("current_faction_id")
|
|
if not faction_id:
|
|
return jsonify({"status": "No active scraping session"})
|
|
|
|
stop_scraping_task.delay(faction_id)
|
|
return jsonify({"status": "Stopping scraping"})
|
|
|
|
@app.route('/logfile', methods=['GET'])
|
|
def logfile():
|
|
log_file_name = current_app.logger.handlers[0].baseFilename
|
|
|
|
page = int(request.args.get('page', 0)) # Page number
|
|
lines_per_page = int(request.args.get('lines_per_page', current_app.config['LOGGING']['VIEW_PAGE_LINES'])) # Lines per page
|
|
log_file_path = log_file_name # Path to the current log file
|
|
|
|
if not os.path.isfile(log_file_path):
|
|
current_app.logger.error("Log file not found")
|
|
return jsonify({"error": "Log file not found"}), 404
|
|
|
|
log_lines = list(tail(log_file_path, current_app.config['LOGGING']['VIEW_MAX_LINES']))
|
|
|
|
log_lines = log_lines[::-1] # Reverse the list
|
|
|
|
start = page * lines_per_page
|
|
end = start + lines_per_page
|
|
paginated_lines = log_lines[start:end] if start < len(log_lines) else []
|
|
|
|
return jsonify({
|
|
"log": paginated_lines,
|
|
"total_lines": len(log_lines),
|
|
"pages": (len(log_lines) + lines_per_page - 1) // lines_per_page,
|
|
"start_line": len(log_lines) - start
|
|
})
|
|
|
|
|
|
@app.route('/download_files', methods=['POST'])
|
|
def download_files():
|
|
delete_old_zips() # Clean up old zip files
|
|
|
|
file_paths = request.json.get('file_paths')
|
|
if not file_paths:
|
|
return jsonify({"error": "No files specified"}), 400
|
|
|
|
# Get the absolute path of the parent directory
|
|
parent_dir = os.path.abspath(os.path.join(app.root_path, os.pardir))
|
|
|
|
# Validate and correct file paths
|
|
valid_file_paths = []
|
|
for file_path in file_paths:
|
|
if file_path.startswith('/data/'):
|
|
corrected_path = file_path.lstrip('/')
|
|
full_path = os.path.join(parent_dir, corrected_path)
|
|
if os.path.isfile(full_path):
|
|
valid_file_paths.append(full_path)
|
|
elif file_path.startswith('/log/'):
|
|
corrected_path = file_path.lstrip('/')
|
|
full_path = os.path.join(parent_dir, corrected_path)
|
|
if os.path.isfile(full_path):
|
|
valid_file_paths.append(full_path)
|
|
|
|
if not valid_file_paths:
|
|
return jsonify({"error": "No valid files specified"}), 400
|
|
|
|
# Create a unique zip file name
|
|
zip_name = f"files_{datetime.now().strftime('%Y%m%d%H%M%S')}.zip"
|
|
zip_path = create_zip(valid_file_paths, zip_name, app)
|
|
|
|
# Log the directory and file path for debugging
|
|
current_app.logger.debug(f"Sending file from directory: temp, file: {zip_name}")
|
|
|
|
return download_tmp_file(zip_name)
|
|
|
|
@app.route('/delete_files', methods=['POST'])
|
|
def delete_files():
|
|
log_file_name = current_app.logger.handlers[0].baseFilename
|
|
file_paths = request.json.get('file_paths', [])
|
|
|
|
if not file_paths:
|
|
return jsonify({"error": "No files specified"}), 400
|
|
|
|
errors = []
|
|
data_dir = os.path.abspath(current_app.config['DATA']['DATA_DIR'])
|
|
log_dir = os.path.abspath(current_app.config['LOGGING']['LOG_DIR'])
|
|
|
|
for file_path in file_paths:
|
|
if file_path.startswith('/data/'):
|
|
full_file_path = os.path.join(data_dir, file_path.lstrip('/data/'))
|
|
elif file_path.startswith('/log/'):
|
|
full_file_path = os.path.join(log_dir, file_path.lstrip('/log/'))
|
|
else:
|
|
errors.append({"file": file_path, "error": "File not in allowed directory"})
|
|
continue
|
|
|
|
# Check if the file is in either the logs or the data files folder
|
|
#if not (full_file_path.startswith(data_dir) or full_file_path.startswith(log_dir)):
|
|
# errors.append({"file": file_path, "error": "File not in allowed directory"})
|
|
# continue
|
|
|
|
# Check if it's the currently active log file
|
|
if full_file_path == log_file_name:
|
|
errors.append({"file": file_path, "error": "Cannot delete active log file."})
|
|
continue
|
|
|
|
# Check if it's an active data file
|
|
if scraper and scraper.data_file_name == full_file_path:
|
|
errors.append({"file": file_path, "error": "Cannot delete active data file."})
|
|
continue
|
|
|
|
if not os.path.isfile(full_file_path):
|
|
errors.append({"file": file_path, "error": "File not found"})
|
|
continue
|
|
|
|
try:
|
|
os.remove(full_file_path)
|
|
except Exception as e:
|
|
errors.append({"file": file_path, "error": str(e)})
|
|
|
|
if errors:
|
|
return jsonify({"errors": errors}), 207 # Multi-Status response
|
|
return jsonify({"success": True}), 200
|
|
|
|
@app.route('/data/<path:filename>')
|
|
def download_data_file(filename):
|
|
data_dir = os.path.abspath(current_app.config['DATA']['DATA_DIR'])
|
|
file_path = os.path.join(data_dir, filename)
|
|
|
|
return send_from_directory(directory=data_dir, path=filename, as_attachment=True)
|
|
|
|
@app.route('/log/<path:filename>')
|
|
def download_log_file(filename):
|
|
log_dir = os.path.abspath(current_app.config['LOGGING']['LOG_DIR'])
|
|
file_path = os.path.join(log_dir, filename)
|
|
|
|
return send_from_directory(directory=log_dir, path=filename, as_attachment=True)
|
|
|
|
@app.route('/tmp/<path:filename>')
|
|
def download_tmp_file(filename):
|
|
tmp_dir = os.path.abspath(current_app.config['TEMP']['TEMP_DIR'])
|
|
file_path = os.path.join(tmp_dir, filename)
|
|
|
|
return send_from_directory(directory=tmp_dir, path=filename, as_attachment=True)
|
|
|
|
@app.route('/config/lines_per_page')
|
|
def get_lines_per_page():
|
|
lines_per_page = current_app.config['LOGGING']['VIEW_PAGE_LINES']
|
|
return jsonify({"lines_per_page": lines_per_page})
|
|
|
|
@app.route('/scraping_status', methods=['GET'])
|
|
def scraping_status():
|
|
redis_client = get_redis()
|
|
current_faction_id = redis_client.get("current_faction_id")
|
|
|
|
if not current_faction_id:
|
|
return jsonify({"scraping_active": False})
|
|
|
|
scraping_active = redis_client.hget(f"scraper:{current_faction_id}", "scraping_active")
|
|
|
|
# If we have a faction_id but scraping is not active, clean up the stale state
|
|
if not scraping_active or scraping_active == "0":
|
|
redis_client.delete("current_faction_id")
|
|
return jsonify({"scraping_active": False})
|
|
|
|
return jsonify({
|
|
"scraping_active": True,
|
|
"faction_id": current_faction_id
|
|
})
|
|
|
|
@app.route('/scraping_get_end_time')
|
|
def scraping_get_end_time():
|
|
redis_client = get_redis()
|
|
current_faction_id = redis_client.get("current_faction_id")
|
|
|
|
if not current_faction_id:
|
|
return jsonify({"scraping_active": False})
|
|
|
|
end_time = redis_client.hget(f"scraper:{current_faction_id}", "end_time")
|
|
if not end_time:
|
|
return jsonify({"scraping_active": False})
|
|
|
|
return jsonify({
|
|
"end_time": end_time,
|
|
"faction_id": current_faction_id
|
|
})
|