Michael Beck 5e98a0ba47 init
2025-02-22 16:55:41 +01:00

45 lines
1.7 KiB
Python

from flask import current_app, url_for
import os
import pandas as pd
def prepare_data(df):
"""
Prepares the data for analysis by converting timestamps, calculating previous timestamps,
determining active status, and extracting the hour from the timestamp.
Parameters:
df (pd.DataFrame): The input DataFrame containing user activity data.
Returns:
pd.DataFrame: The processed DataFrame with additional columns for analysis.
The returned DataFrame will have the following columns:
user_id name last_action status timestamp prev_timestamp was_active hour
0 12345678 UserName 2025-02-08 17:58:11 Okay 2025-02-08 18:09:41.867984056 NaT False 18
"""
df["timestamp"] = pd.to_datetime(df["timestamp"])
df["last_action"] = pd.to_datetime(df["last_action"])
df["prev_timestamp"] = df.groupby("user_id")["timestamp"].shift(1)
df["was_active"] = (df["timestamp"] - df["last_action"]) <= pd.Timedelta(seconds=60)
df["was_active"] = df["was_active"].fillna(False)
df['hour'] = df['timestamp'].dt.hour
return df
def mk_plotdir(output_filename):
"""
Creates the directory for storing plots and generates the output path and URL for the plot.
Parameters:
output_filename (str): The filename for the output plot.
Returns:
dict: A dictionary containing the output path and plot URL.
"""
plots_dir = os.path.join(current_app.root_path, "static", "plots")
os.makedirs(plots_dir, exist_ok=True)
output_path = os.path.join(plots_dir, output_filename)
plot_url = url_for('static', filename=f'plots/{output_filename}', _external=True)
return {'output_path': output_path, 'plot_url': plot_url}