This commit is contained in:
Michael Beck 2025-02-22 16:55:41 +01:00
parent 71103b0186
commit 5e98a0ba47
59 changed files with 3829 additions and 0 deletions

199
.gitignore vendored Normal file
View File

@ -0,0 +1,199 @@
# Created by https://www.toptal.com/developers/gitignore/api/flask
# Edit at https://www.toptal.com/developers/gitignore?templates=flask
### Flask ###
instance/*
!instance/.gitignore
.webassets-cache
.env
### Flask.Python Stack ###
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
# End of https://www.toptal.com/developers/gitignore/api/flask
# Created by https://www.toptal.com/developers/gitignore/api/visualstudiocode
# Edit at https://www.toptal.com/developers/gitignore?templates=visualstudiocode
### VisualStudioCode ###
.vscode/*
!.vscode/settings.json
!.vscode/tasks.json
!.vscode/launch.json
!.vscode/extensions.json
!.vscode/*.code-snippets
# Local History for Visual Studio Code
.history/
# Built Visual Studio Code Extensions
*.vsix
### VisualStudioCode Patch ###
# Ignore all local history of files
.history
.ionide
# End of https://www.toptal.com/developers/gitignore/api/visualstudiocode
# Exclude data files
*.csv
*.zip
config.ini

21
Dockerfile Normal file
View File

@ -0,0 +1,21 @@
FROM python:3.9-alpine
ENV PATH="$PATH:/home/app/.local/bin"
RUN apk update \
&& apk add netcat-openbsd \
&& apk add --virtual py3-pip build-base \
&& apk add --virtual python3-dev gcc libc-dev linux-headers pcre-dev
WORKDIR /app
COPY . /app
RUN chmod +x /app/entrypoint.sh
RUN adduser -D app
RUN echo 'permit app as root' > /etc/doas.conf
USER app
RUN pip install --upgrade pip \
&& pip install -r requirements.txt
EXPOSE 8000

426
LICENSE Normal file
View File

@ -0,0 +1,426 @@
Attribution-ShareAlike 4.0 International
=======================================================================
Creative Commons Corporation ("Creative Commons") is not a law firm and
does not provide legal services or legal advice. Distribution of
Creative Commons public licenses does not create a lawyer-client or
other relationship. Creative Commons makes its licenses and related
information available on an "as-is" basis. Creative Commons gives no
warranties regarding its licenses, any material licensed under their
terms and conditions, or any related information. Creative Commons
disclaims all liability for damages resulting from their use to the
fullest extent possible.
Using Creative Commons Public Licenses
Creative Commons public licenses provide a standard set of terms and
conditions that creators and other rights holders may use to share
original works of authorship and other material subject to copyright
and certain other rights specified in the public license below. The
following considerations are for informational purposes only, are not
exhaustive, and do not form part of our licenses.
Considerations for licensors: Our public licenses are
intended for use by those authorized to give the public
permission to use material in ways otherwise restricted by
copyright and certain other rights. Our licenses are
irrevocable. Licensors should read and understand the terms
and conditions of the license they choose before applying it.
Licensors should also secure all rights necessary before
applying our licenses so that the public can reuse the
material as expected. Licensors should clearly mark any
material not subject to the license. This includes other CC-
licensed material, or material used under an exception or
limitation to copyright. More considerations for licensors:
wiki.creativecommons.org/Considerations_for_licensors
Considerations for the public: By using one of our public
licenses, a licensor grants the public permission to use the
licensed material under specified terms and conditions. If
the licensor's permission is not necessary for any reason--for
example, because of any applicable exception or limitation to
copyright--then that use is not regulated by the license. Our
licenses grant only permissions under copyright and certain
other rights that a licensor has authority to grant. Use of
the licensed material may still be restricted for other
reasons, including because others have copyright or other
rights in the material. A licensor may make special requests,
such as asking that all changes be marked or described.
Although not required by our licenses, you are encouraged to
respect those requests where reasonable. More considerations
for the public:
wiki.creativecommons.org/Considerations_for_licensees
=======================================================================
Creative Commons Attribution-ShareAlike 4.0 International Public
License
By exercising the Licensed Rights (defined below), You accept and agree
to be bound by the terms and conditions of this Creative Commons
Attribution-ShareAlike 4.0 International Public License ("Public
License"). To the extent this Public License may be interpreted as a
contract, You are granted the Licensed Rights in consideration of Your
acceptance of these terms and conditions, and the Licensor grants You
such rights in consideration of benefits the Licensor receives from
making the Licensed Material available under these terms and
conditions.
Section 1 -- Definitions.
a. Adapted Material means material subject to Copyright and Similar
Rights that is derived from or based upon the Licensed Material
and in which the Licensed Material is translated, altered,
arranged, transformed, or otherwise modified in a manner requiring
permission under the Copyright and Similar Rights held by the
Licensor. For purposes of this Public License, where the Licensed
Material is a musical work, performance, or sound recording,
Adapted Material is always produced where the Licensed Material is
synched in timed relation with a moving image.
b. Adapter's License means the license You apply to Your Copyright
and Similar Rights in Your contributions to Adapted Material in
accordance with the terms and conditions of this Public License.
c. BY-SA Compatible License means a license listed at
creativecommons.org/compatiblelicenses, approved by Creative
Commons as essentially the equivalent of this Public License.
d. Copyright and Similar Rights means copyright and/or similar rights
closely related to copyright including, without limitation,
performance, broadcast, sound recording, and Sui Generis Database
Rights, without regard to how the rights are labeled or
categorized. For purposes of this Public License, the rights
specified in Section 2(b)(1)-(2) are not Copyright and Similar
Rights.
e. Effective Technological Measures means those measures that, in the
absence of proper authority, may not be circumvented under laws
fulfilling obligations under Article 11 of the WIPO Copyright
Treaty adopted on December 20, 1996, and/or similar international
agreements.
f. Exceptions and Limitations means fair use, fair dealing, and/or
any other exception or limitation to Copyright and Similar Rights
that applies to Your use of the Licensed Material.
g. License Elements means the license attributes listed in the name
of a Creative Commons Public License. The License Elements of this
Public License are Attribution and ShareAlike.
h. Licensed Material means the artistic or literary work, database,
or other material to which the Licensor applied this Public
License.
i. Licensed Rights means the rights granted to You subject to the
terms and conditions of this Public License, which are limited to
all Copyright and Similar Rights that apply to Your use of the
Licensed Material and that the Licensor has authority to license.
j. Licensor means the individual(s) or entity(ies) granting rights
under this Public License.
k. Share means to provide material to the public by any means or
process that requires permission under the Licensed Rights, such
as reproduction, public display, public performance, distribution,
dissemination, communication, or importation, and to make material
available to the public including in ways that members of the
public may access the material from a place and at a time
individually chosen by them.
l. Sui Generis Database Rights means rights other than copyright
resulting from Directive 96/9/EC of the European Parliament and of
the Council of 11 March 1996 on the legal protection of databases,
as amended and/or succeeded, as well as other essentially
equivalent rights anywhere in the world.
m. You means the individual or entity exercising the Licensed Rights
under this Public License. Your has a corresponding meaning.
Section 2 -- Scope.
a. License grant.
1. Subject to the terms and conditions of this Public License,
the Licensor hereby grants You a worldwide, royalty-free,
non-sublicensable, non-exclusive, irrevocable license to
exercise the Licensed Rights in the Licensed Material to:
a. reproduce and Share the Licensed Material, in whole or
in part; and
b. produce, reproduce, and Share Adapted Material.
2. Exceptions and Limitations. For the avoidance of doubt, where
Exceptions and Limitations apply to Your use, this Public
License does not apply, and You do not need to comply with
its terms and conditions.
3. Term. The term of this Public License is specified in Section
6(a).
4. Media and formats; technical modifications allowed. The
Licensor authorizes You to exercise the Licensed Rights in
all media and formats whether now known or hereafter created,
and to make technical modifications necessary to do so. The
Licensor waives and/or agrees not to assert any right or
authority to forbid You from making technical modifications
necessary to exercise the Licensed Rights, including
technical modifications necessary to circumvent Effective
Technological Measures. For purposes of this Public License,
simply making modifications authorized by this Section 2(a)
(4) never produces Adapted Material.
5. Downstream recipients.
a. Offer from the Licensor -- Licensed Material. Every
recipient of the Licensed Material automatically
receives an offer from the Licensor to exercise the
Licensed Rights under the terms and conditions of this
Public License.
b. Additional offer from the Licensor -- Adapted Material.
Every recipient of Adapted Material from You
automatically receives an offer from the Licensor to
exercise the Licensed Rights in the Adapted Material
under the conditions of the Adapter's License You apply.
c. No downstream restrictions. You may not offer or impose
any additional or different terms or conditions on, or
apply any Effective Technological Measures to, the
Licensed Material if doing so restricts exercise of the
Licensed Rights by any recipient of the Licensed
Material.
6. No endorsement. Nothing in this Public License constitutes or
may be construed as permission to assert or imply that You
are, or that Your use of the Licensed Material is, connected
with, or sponsored, endorsed, or granted official status by,
the Licensor or others designated to receive attribution as
provided in Section 3(a)(1)(A)(i).
b. Other rights.
1. Moral rights, such as the right of integrity, are not
licensed under this Public License, nor are publicity,
privacy, and/or other similar personality rights; however, to
the extent possible, the Licensor waives and/or agrees not to
assert any such rights held by the Licensor to the limited
extent necessary to allow You to exercise the Licensed
Rights, but not otherwise.
2. Patent and trademark rights are not licensed under this
Public License.
3. To the extent possible, the Licensor waives any right to
collect royalties from You for the exercise of the Licensed
Rights, whether directly or through a collecting society
under any voluntary or waivable statutory or compulsory
licensing scheme. In all other cases the Licensor expressly
reserves any right to collect such royalties.
Section 3 -- License Conditions.
Your exercise of the Licensed Rights is expressly made subject to the
following conditions.
a. Attribution.
1. If You Share the Licensed Material (including in modified
form), You must:
a. retain the following if it is supplied by the Licensor
with the Licensed Material:
i. identification of the creator(s) of the Licensed
Material and any others designated to receive
attribution, in any reasonable manner requested by
the Licensor (including by pseudonym if
designated);
ii. a copyright notice;
iii. a notice that refers to this Public License;
iv. a notice that refers to the disclaimer of
warranties;
v. a URI or hyperlink to the Licensed Material to the
extent reasonably practicable;
b. indicate if You modified the Licensed Material and
retain an indication of any previous modifications; and
c. indicate the Licensed Material is licensed under this
Public License, and include the text of, or the URI or
hyperlink to, this Public License.
2. You may satisfy the conditions in Section 3(a)(1) in any
reasonable manner based on the medium, means, and context in
which You Share the Licensed Material. For example, it may be
reasonable to satisfy the conditions by providing a URI or
hyperlink to a resource that includes the required
information.
3. If requested by the Licensor, You must remove any of the
information required by Section 3(a)(1)(A) to the extent
reasonably practicable.
b. ShareAlike.
In addition to the conditions in Section 3(a), if You Share
Adapted Material You produce, the following conditions also apply.
1. The Adapter's License You apply must be a Creative Commons
license with the same License Elements, this version or
later, or a BY-SA Compatible License.
2. You must include the text of, or the URI or hyperlink to, the
Adapter's License You apply. You may satisfy this condition
in any reasonable manner based on the medium, means, and
context in which You Share Adapted Material.
3. You may not offer or impose any additional or different terms
or conditions on, or apply any Effective Technological
Measures to, Adapted Material that restrict exercise of the
rights granted under the Adapter's License You apply.
Section 4 -- Sui Generis Database Rights.
Where the Licensed Rights include Sui Generis Database Rights that
apply to Your use of the Licensed Material:
a. for the avoidance of doubt, Section 2(a)(1) grants You the right
to extract, reuse, reproduce, and Share all or a substantial
portion of the contents of the database;
b. if You include all or a substantial portion of the database
contents in a database in which You have Sui Generis Database
Rights, then the database in which You have Sui Generis Database
Rights (but not its individual contents) is Adapted Material,
including for purposes of Section 3(b); and
c. You must comply with the conditions in Section 3(a) if You Share
all or a substantial portion of the contents of the database.
For the avoidance of doubt, this Section 4 supplements and does not
replace Your obligations under this Public License where the Licensed
Rights include other Copyright and Similar Rights.
Section 5 -- Disclaimer of Warranties and Limitation of Liability.
a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE
EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS
AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF
ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS,
IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION,
WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR
PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS,
ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT
KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT
ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU.
b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE
TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION,
NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT,
INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES,
COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR
USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN
ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR
DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR
IN PART, THIS LIMITATION MAY NOT APPLY TO YOU.
c. The disclaimer of warranties and limitation of liability provided
above shall be interpreted in a manner that, to the extent
possible, most closely approximates an absolute disclaimer and
waiver of all liability.
Section 6 -- Term and Termination.
a. This Public License applies for the term of the Copyright and
Similar Rights licensed here. However, if You fail to comply with
this Public License, then Your rights under this Public License
terminate automatically.
b. Where Your right to use the Licensed Material has terminated under
Section 6(a), it reinstates:
1. automatically as of the date the violation is cured, provided
it is cured within 30 days of Your discovery of the
violation; or
2. upon express reinstatement by the Licensor.
For the avoidance of doubt, this Section 6(b) does not affect any
right the Licensor may have to seek remedies for Your violations
of this Public License.
c. For the avoidance of doubt, the Licensor may also offer the
Licensed Material under separate terms or conditions or stop
distributing the Licensed Material at any time; however, doing so
will not terminate this Public License.
d. Sections 1, 5, 6, 7, and 8 survive termination of this Public
License.
Section 7 -- Other Terms and Conditions. CC BY-SA 4
a. The Licensor shall not be bound by any additional or different
terms or conditions communicated by You unless expressly agreed.
b. Any arrangements, understandings, or agreements regarding the
Licensed Material not stated herein are separate from and
independent of the terms and conditions of this Public License.
Section 8 -- Interpretation.
a. For the avoidance of doubt, this Public License does not, and
shall not be interpreted to, reduce, limit, restrict, or impose
conditions on any use of the Licensed Material that could lawfully
be made without permission under this Public License.
b. To the extent possible, if any provision of this Public License is
deemed unenforceable, it shall be automatically reformed to the
minimum extent necessary to make it enforceable. If the provision
cannot be reformed, it shall be severed from this Public License
without affecting the enforceability of the remaining terms and
conditions.
c. No term or condition of this Public License will be waived and no
failure to comply consented to unless expressly agreed to by the
Licensor.
d. Nothing in this Public License constitutes or may be interpreted
as a limitation upon, or waiver of, any privileges and immunities
that apply to the Licensor or You, including from the legal
processes of any jurisdiction or authority.
=======================================================================
Creative Commons is not a party to its public
licenses. Notwithstanding, Creative Commons may elect to apply one of
its public licenses to material it publishes and in those instances
will be considered the “Licensor.” The text of the Creative Commons
public licenses is dedicated to the public domain under the CC0 Public
Domain Dedication. Except for the limited purpose of indicating that
material is shared under a Creative Commons public license or as
otherwise permitted by the Creative Commons policies published at
creativecommons.org/policies, Creative Commons does not authorize the
use of the trademark "Creative Commons" or any other trademark or logo
of Creative Commons without its prior written consent including,
without limitation, in connection with any unauthorized modifications
to any of its public licenses or any other arrangements,
understandings, or agreements concerning use of licensed material. For
the avoidance of doubt, this paragraph does not form part of the
public licenses.
Creative Commons may be contacted at creativecommons.org.

257
README.md Normal file
View File

@ -0,0 +1,257 @@
# Torn User Activity Tracker
> [!WARNING]
> **Development is still in its early stages; do not put it to productive use!**
## Features
Multiple users control a single activity tracker using Torn's API.
- Start and stop scraping user activity data
- View real-time logs
- Download data and log files
- View scraping results
- Plugin based analysis system
- Toggle between light and dark mode
**Note:** Many features are not fully implemented yet, but the activity tracker/grabber works as intended.
## Planned Features
- Additional analyses plugins
- Selector for Torn API data to choose which data shall be tracked
- log viewer
## Requirements
- Python 3.8+
- Flask
- Flask-Bootstrap
- Flask-WTF
- Pandas
- Requests
- Redis
- Celery
- uWSGI
Redis currently has to run locally, but this will be changed in the future. See file tasks.py:
```python
# tasks.py
def get_redis():
return redis.StrictRedis(
host='localhost',
port=6379,
db=0,
decode_responses=True
)
```
## Installation
### Docker
#### Prerequisites
- Docker
- Docker Compose
#### Steps to Deploy
1. Clone the repository:
```bash
git clone <repository-url>
cd TornActivityTracker
```
2. Configure environment variables:
- Copy the example .env file and modify if needed
```bash
cp .env.example .env
```
3. Build and start the containers:
```bash
docker-compose up -d --build
```
This will start:
- The main Flask application
- Redis for task queue management
- Nginx as reverse proxy
The application will be available at `http://localhost:80`
#### Maintenance
To view logs:
```bash
docker-compose logs -f
```
To stop the application:
```bash
docker-compose down
```
To rebuild and restart:
```bash
docker-compose up -d --build
```
### Manual
1. Clone the repository:
```sh
git clone https://github.com/MichaelB7/TornActivityTracker.git
cd TornActivityTracker
```
2. Create a virtual environment and activate it:
```sh
python3 -m venv venv
source venv/bin/activate # On Windows use: .\venv\Scripts\activate
```
3. Install the required packages:
```sh
pip install -r requirements.txt
```
4. Start Redis server locally:
```sh
redis-server
```
5. Set up your configuration:
Create a `config.ini` file in the root directory by copying `example_config.ini`:
```sh
cp example_config.ini config.ini
```
Then edit `config.ini` with your settings:
```ini
[DEFAULT]
SECRET_KEY = your_secret_key
API_KEY = your_api_key
# ...rest of the config settings...
```
6. Start the Celery worker:
```sh
celery -A app.celery_worker worker --loglevel=info
```
7. Run the Flask application:
```sh
flask run
```
The application will be available at `http://127.0.0.1:5000/`
## Adding an Analysis Module
This guide explains how to add a new analysis module using the provided base classes: `BasePlotlyAnalysis` and `BasePlotAnalysis`. These base classes ensure a structured workflow for data preparation, transformation, and visualization.
### 1. Choosing the Right Base Class
Before implementing an analysis module, decide on the appropriate base class:
- **`BasePlotlyAnalysis`**: Use this for interactive plots with **Plotly** that generate **HTML** outputs.
- **`BasePlotAnalysis`**: Use this for static plots with **Matplotlib/Seaborn** that generate **PNG** image files.
- **`BaseAnalysis`**: Use this for any other type of analysis with **text** or **HTML** output for max flexibility.
### 2. Naming Convention
Follow a structured naming convention for consistency:
- **File name:** `plotly_<analysis_name>.py` for Plotly analyses, `plot_<analysis_name>.py` for Matplotlib-based analyses.
- **Class name:** Use PascalCase and a descriptive suffix:
- Example for Plotly: `PlotlyActivityHeatmap`
- Example for Matplotlib: `PlotUserSessionDuration`
### 3. Data Structure
The following DataFrame structure is passed to analysis classes:
| user_id | name | last_action | status | timestamp | prev_timestamp | was_active | hour |
|----------|-----------|----------------------|--------|-----------------------------|----------------|------------|------|
| XXXXXXX | UserA | 2025-02-08 17:58:11 | Okay | 2025-02-08 18:09:41.867984056 | NaT | False | 18 |
| XXXXXXX | UserB | 2025-02-08 17:00:10 | Okay | 2025-02-08 18:09:42.427846909 | NaT | False | 18 |
| XXXXXXX | UserC | 2025-02-08 16:31:52 | Okay | 2025-02-08 18:09:42.823201895 | NaT | False | 18 |
| XXXXXXX | UserD | 2025-02-06 23:57:24 | Okay | 2025-02-08 18:09:43.179914951 | NaT | False | 18 |
| XXXXXXX | UserE | 2025-02-06 06:33:40 | Okay | 2025-02-08 18:09:43.434650898 | NaT | False | 18 |
Note that the first X rows, depending on the number of the members, will always contain empty values in prev_timestamp as there has to be a previous timestamp ....
### 4. Implementing an Analysis Module
Each analysis module should define two key methods:
- `transform_data(self, df: pd.DataFrame) -> pd.DataFrame`: Processes the input data for plotting.
- `plot_data(self, df: pd.DataFrame)`: Generates and saves the plot.
#### Example: Adding a Plotly Heatmap
Below is an example of how to create a new analysis module using `BasePlotlyAnalysis`.
```python
import pandas as pd
import plotly.graph_objects as go
from .basePlotlyAnalysis import BasePlotlyAnalysis
class PlotlyActivityHeatmap(BasePlotlyAnalysis):
"""
Displays user activity trends over multiple days using an interactive heatmap.
"""
name = "Activity Heatmap (Interactive)"
description = "Displays user activity trends over multiple days."
plot_filename = "activity_heatmap.html"
def transform_data(self, df: pd.DataFrame) -> pd.DataFrame:
df['hour'] = df['timestamp'].dt.hour
active_counts = df[df['was_active']].pivot_table(
index='name',
columns='hour',
values='was_active',
aggfunc='sum',
fill_value=0
).reset_index()
return active_counts.melt(id_vars='name', var_name='hour', value_name='activity_count')
def plot_data(self, df: pd.DataFrame):
df = df.pivot(index='name', columns='hour', values='activity_count').fillna(0)
self.fig = go.Figure(data=go.Heatmap(
z=df.values, x=df.columns, y=df.index, colorscale='Viridis',
colorbar=dict(title='Activity Count')
))
self.fig.update_layout(title='User Activity Heatmap', xaxis_title='Hour', yaxis_title='User')
```
#### Example: Adding a Static Matplotlib Plot
Below is an example of a Matplotlib-based analysis module using `BasePlotAnalysis`.
```python
import pandas as pd
import matplotlib.pyplot as plt
from .basePlotAnalysis import BasePlotAnalysis
class PlotUserSessionDuration(BasePlotAnalysis):
"""
Displays a histogram of user session durations.
"""
name = "User Session Duration Histogram"
description = "Histogram of session durations."
plot_filename = "session_duration.png"
def transform_data(self, df: pd.DataFrame) -> pd.DataFrame:
df['session_duration'] = (df['last_action'] - df['timestamp']).dt.total_seconds()
return df
def plot_data(self, df: pd.DataFrame):
plt.figure(figsize=(10, 6))
plt.hist(df['session_duration'].dropna(), bins=30, edgecolor='black')
plt.xlabel('Session Duration (seconds)')
plt.ylabel('Frequency')
plt.title('User Session Duration Histogram')
```
## License
All assets and code are under the [CC BY-SA 4.0](http://creativecommons.org/licenses/by-sa/4.0/) LICENSE and in the public domain unless specified otherwise.

56
app/__init__.py Normal file
View File

@ -0,0 +1,56 @@
import os
from flask import Flask
from flask_bootstrap import Bootstrap5
from datetime import datetime
from app.views import register_views
from app.api import register_api
from app.config import load_config
from app.filters import register_filters
from app.tasks import celery
from app.logging_config import init_logger
def create_app(config=None):
app = Flask(__name__)
if config is None:
config = load_config()
app.config.update(config)
os.environ['TZ'] = 'UTC'
app.config['SECRET_KEY'] = config['DEFAULT']['SECRET_KEY']
# Move bootstrap settings to root level
for key, value in config.get('BOOTSTRAP', {}).items():
app.config[key.upper()] = value
# Initialize Celery
celery.conf.update(app.config)
bootstrap = Bootstrap5(app)
# Store the entire config in Flask app
app.config.update(config)
# Initialize other settings
app.config['SCRAPING_ACTIVE'] = False
app.config['SCRAPING_THREAD'] = None
app.config['DATA_FILE_NAME'] = None
app.config['LOG_FILE_NAME'] = "log/" + datetime.now().strftime('%Y-%m-%d-%H-%M') + '.log'
# Initialize logging
app.logger = init_logger(app.config)
# Register routes
register_views(app)
register_api(app)
register_filters(app)
@app.context_processor
def inject_main_config():
main_config = app.config.get('MAIN', {})
return dict(main_config=main_config)
return app

34
app/analysis/__init__.py Normal file
View File

@ -0,0 +1,34 @@
import os
import pkgutil
import importlib
import inspect
from abc import ABC
from .base import BaseAnalysis
import pandas as pd
def load_analysis_modules():
analysis_modules = []
package_path = __path__[0]
for _, module_name, _ in pkgutil.iter_modules([package_path]):
module = importlib.import_module(f"app.analysis.{module_name}")
for _, obj in inspect.getmembers(module, inspect.isclass):
# Exclude abstract classes (like BasePlotAnalysis)
if issubclass(obj, BaseAnalysis) and obj is not BaseAnalysis and not inspect.isabstract(obj):
analysis_modules.append(obj()) # Instantiate only concrete classes
return analysis_modules
def load_data(file_path: str) -> pd.DataFrame:
"""Loads the scraped data from a CSV file into a Pandas DataFrame."""
if not os.path.exists(file_path):
raise FileNotFoundError(f"File {file_path} not found.")
df = pd.read_csv(file_path)
df["timestamp"] = pd.to_datetime(df["timestamp"], errors="coerce")
df["last_action"] = pd.to_datetime(df["last_action"], errors="coerce")
return df

11
app/analysis/base.py Normal file
View File

@ -0,0 +1,11 @@
from abc import ABC, abstractmethod
import pandas as pd
class BaseAnalysis(ABC):
name = "Base Analysis"
description = "This is a base analysis module."
@abstractmethod
def execute(self, df: pd.DataFrame):
"""Run analysis on the given DataFrame"""
pass

View File

@ -0,0 +1,77 @@
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from abc import ABC, abstractmethod
from .base import BaseAnalysis
from app.analysis.data_utils import prepare_data, mk_plotdir
import matplotlib
matplotlib.use('Agg')
# -------------------------------------------
# Base Class for All Plot Analyses
# -------------------------------------------
class BasePlotAnalysis(BaseAnalysis, ABC):
"""
Base class for all plot-based analyses.
It enforces a structure for:
- Data preparation
- Transformation
- Plot generation
- Memory cleanup
Attributes:
plot_filename (str): The filename for the output plot.
alt_text (str): The alt text for the plot.
"""
plot_filename = "default_plot.png"
alt_text = "Default Alt Text"
def execute(self, df: pd.DataFrame):
"""
Executes the full analysis pipeline.
Parameters:
df (pd.DataFrame): The input DataFrame containing user activity data.
Returns:
str: HTML img tag containing the URL to the generated plot.
"""
df = prepare_data(df) # Step 1: Prepare data
paths = mk_plotdir(self.plot_filename)
self.output_path, self.plot_url = paths['output_path'], paths['plot_url']
df = self.transform_data(df) # Step 2: Transform data (implemented by subclass)
self.plot_data(df) # Step 3: Create the plot
plt.savefig(self.output_path, bbox_inches="tight")
plt.close()
del df # Step 4: Free memory
return f'<img src="{self.plot_url}" alt="{self.note}">'
@abstractmethod
def transform_data(self, df: pd.DataFrame) -> pd.DataFrame:
"""
Subclasses must define how they transform the data.
Parameters:
df (pd.DataFrame): The input DataFrame containing user activity data.
Returns:
pd.DataFrame: The transformed DataFrame.
"""
pass
@abstractmethod
def plot_data(self, df: pd.DataFrame):
"""
Subclasses must define how they generate the plot.
Parameters:
df (pd.DataFrame): The transformed DataFrame containing data to be plotted.
"""
pass

View File

@ -0,0 +1,73 @@
import os
import pandas as pd
import plotly.graph_objects as go
from abc import ABC, abstractmethod
from .base import BaseAnalysis
from app.analysis.data_utils import prepare_data, mk_plotdir
# -------------------------------------------
# Base Class for All Plotly Plot Analyses
# -------------------------------------------
class BasePlotlyAnalysis(BaseAnalysis, ABC):
"""
Base class for all Plotly plot-based analyses.
It enforces a structure for:
- Data preparation
- Transformation
- Plot generation
- Memory cleanup
Attributes:
plot_filename (str): The filename for the output plot.
alt_text (str): The alt text for the plot.
"""
plot_filename = "default_plot.html"
alt_text = "Default Alt Text"
def execute(self, df: pd.DataFrame):
"""
Executes the full analysis pipeline.
Parameters:
df (pd.DataFrame): The input DataFrame containing user activity data.
Returns:
str: HTML iframe containing the URL to the generated plot.
"""
df = prepare_data(df) # Step 1: Prepare data
paths = mk_plotdir(self.plot_filename)
self.output_path, self.plot_url = paths['output_path'], paths['plot_url']
df = self.transform_data(df) # Step 2: Transform data (implemented by subclass)
self.plot_data(df) # Step 3: Create the plot
# Save the plot as an HTML file
self.fig.write_html(self.output_path)
del df # Step 4: Free memory
return f'<iframe src="{self.plot_url}" width="100%" height="600"></iframe>'
@abstractmethod
def transform_data(self, df: pd.DataFrame) -> pd.DataFrame:
"""
Subclasses must define how they transform the data.
Parameters:
df (pd.DataFrame): The input DataFrame containing user activity data.
Returns:
pd.DataFrame: The transformed DataFrame.
"""
pass
@abstractmethod
def plot_data(self, df: pd.DataFrame):
"""
Subclasses must define how they generate the plot.
Parameters:
df (pd.DataFrame): The transformed DataFrame containing data to be plotted.
"""
pass

View File

@ -0,0 +1,45 @@
from flask import current_app, url_for
import os
import pandas as pd
def prepare_data(df):
"""
Prepares the data for analysis by converting timestamps, calculating previous timestamps,
determining active status, and extracting the hour from the timestamp.
Parameters:
df (pd.DataFrame): The input DataFrame containing user activity data.
Returns:
pd.DataFrame: The processed DataFrame with additional columns for analysis.
The returned DataFrame will have the following columns:
user_id name last_action status timestamp prev_timestamp was_active hour
0 12345678 UserName 2025-02-08 17:58:11 Okay 2025-02-08 18:09:41.867984056 NaT False 18
"""
df["timestamp"] = pd.to_datetime(df["timestamp"])
df["last_action"] = pd.to_datetime(df["last_action"])
df["prev_timestamp"] = df.groupby("user_id")["timestamp"].shift(1)
df["was_active"] = (df["timestamp"] - df["last_action"]) <= pd.Timedelta(seconds=60)
df["was_active"] = df["was_active"].fillna(False)
df['hour'] = df['timestamp'].dt.hour
return df
def mk_plotdir(output_filename):
"""
Creates the directory for storing plots and generates the output path and URL for the plot.
Parameters:
output_filename (str): The filename for the output plot.
Returns:
dict: A dictionary containing the output path and plot URL.
"""
plots_dir = os.path.join(current_app.root_path, "static", "plots")
os.makedirs(plots_dir, exist_ok=True)
output_path = os.path.join(plots_dir, output_filename)
plot_url = url_for('static', filename=f'plots/{output_filename}', _external=True)
return {'output_path': output_path, 'plot_url': plot_url}

View File

@ -0,0 +1,51 @@
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from .basePlotAnalysis import BasePlotAnalysis
from flask import current_app, url_for
import matplotlib
matplotlib.use('Agg')
class PlotTopActiveUsers(BasePlotAnalysis):
"""
Class for analyzing the most active users and generating a bar chart.
Attributes:
name (str): The name of the analysis.
description (str): A brief description of the analysis.
plot_filename (str): The filename for the output plot.
note (str): Additional notes for the analysis.
"""
name = "Top Active Users"
description = "Displays the most active users based on their number of recorded actions."
plot_filename = "bar_activity-per-user.png"
note = ""
def transform_data(self, df: pd.DataFrame) -> pd.DataFrame:
"""
Transform data for the bar plot.
Parameters:
df (pd.DataFrame): The input DataFrame containing user activity data.
Returns:
pd.DataFrame: The transformed DataFrame with active counts per user.
"""
df = df[df['was_active'] == True].groupby('name').size().reset_index(name='active_count')
return df
def plot_data(self, df: pd.DataFrame):
"""
Generate bar plot.
Parameters:
df (pd.DataFrame): The transformed DataFrame containing active counts per user.
"""
# create a barplot from active counts sorted by active count
plt.figure(figsize=(10, 6))
sns.barplot(x='active_count', y='name', data=df.sort_values('active_count', ascending=False))
plt.xticks(rotation=90)
plt.title('Minutes Active')
plt.xlabel('Player')
plt.ylabel('Active Count')

View File

@ -0,0 +1,53 @@
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from .basePlotAnalysis import BasePlotAnalysis
import matplotlib
matplotlib.use('Agg')
class PlotPeakHours(BasePlotAnalysis):
"""
Class for analyzing peak activity hours and generating a bar chart.
Attributes:
name (str): The name of the analysis.
description (str): A brief description of the analysis.
plot_filename (str): The filename for the output plot.
note (str): Additional notes for the analysis.
"""
name = "Peak Hours Analysis"
description = "Identifies peak activity hours using a bar chart."
plot_filename = "peak_hours.png"
note = ""
def transform_data(self, df: pd.DataFrame) -> pd.DataFrame:
"""
Transform data to add was_active column and extract peak hours. See data_utils.py.
Parameters:
df (pd.DataFrame): The input DataFrame containing user activity data.
Returns:
pd.DataFrame: The transformed DataFrame with additional columns for analysis.
"""
return df
def plot_data(self, df: pd.DataFrame):
"""
Generate bar chart for peak hours.
Parameters:
df (pd.DataFrame): The transformed DataFrame containing user activity data.
"""
peak_hours = df[df["was_active"]]["hour"].value_counts().sort_index()
plt.figure(figsize=(12, 5))
sns.barplot(x=peak_hours.index, y=peak_hours.values, hue=peak_hours.values, palette="coolwarm")
plt.xlabel("Hour of the Day")
plt.ylabel("Activity Count")
plt.title("Peak Hours of User Activity")
plt.xticks(range(0, 24))

View File

@ -0,0 +1,55 @@
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from .basePlotAnalysis import BasePlotAnalysis
import matplotlib
matplotlib.use('Agg')
class PlotActivityHeatmap(BasePlotAnalysis):
"""
Class for analyzing user activity trends over multiple days and generating a heatmap.
Attributes:
name (str): The name of the analysis.
description (str): A brief description of the analysis.
plot_filename (str): The filename for the output plot.
note (str): Additional notes for the analysis.
"""
name = "Activity Heatmap"
description = "Displays user activity trends over multiple days using a heatmap. Generates a downloadable PNG image."
plot_filename = "activity_heatmap.png"
note = ""
def transform_data(self, df: pd.DataFrame) -> pd.DataFrame:
"""
Transform data for the heatmap.
Parameters:
df (pd.DataFrame): The input DataFrame containing user activity data.
Returns:
pd.DataFrame: The transformed DataFrame with activity counts by hour.
"""
active_counts = df[df['was_active']].pivot_table(
index='name',
columns='hour',
values='was_active',
aggfunc='sum',
fill_value=0
)
active_counts['total_active_minutes'] = active_counts.sum(axis=1)
return active_counts.sort_values(by='total_active_minutes', ascending=False)
def plot_data(self, df: pd.DataFrame):
"""
Generate heatmap plot.
Parameters:
df (pd.DataFrame): The transformed DataFrame containing activity counts by hour.
"""
plt.figure(figsize=(12, 8))
sns.heatmap(df.loc[:, df.columns != 'total_active_minutes'], cmap='viridis', cbar_kws={'label': 'Count of was_active == True'})
plt.xlabel('Hour of Day')
plt.ylabel('User ID')
plt.title('User Activity Heatmap')

View File

@ -0,0 +1,67 @@
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from .basePlotAnalysis import BasePlotAnalysis
from flask import current_app, url_for
import matplotlib
matplotlib.use('Agg')
class PlotLineActivityAllUsers(BasePlotAnalysis):
"""
Class for analyzing user activity trends over multiple days and generating a line graph.
Attributes:
name (str): The name of the analysis.
description (str): A brief description of the analysis.
plot_filename (str): The filename for the output plot.
note (str): Additional notes for the analysis.
"""
name = "Activity Line Graph (All Users)"
description = "This analysis shows the activity line graph for all users. Gneerates a downloadable PNG image."
plot_filename = "line_activity-all_users.png"
note = ""
def transform_data(self, df: pd.DataFrame) -> pd.DataFrame:
"""
Transform data for the line plot.
Parameters:
df (pd.DataFrame): The input DataFrame containing user activity data.
Returns:
pd.DataFrame: The transformed DataFrame with activity counts by hour.
"""
df['hour'] = df['timestamp'].dt.hour
df = df[df['was_active'] == True].pivot_table(index='name', columns='hour', values='was_active', aggfunc='sum', fill_value=0)
df['total_active_minutes'] = df.sum(axis=1)
df = df.sort_values(by='total_active_minutes', ascending=False).drop('total_active_minutes', axis=1)
cumulative_sum_row = df.cumsum().iloc[-1]
df.loc['Cumulative Sum'] = cumulative_sum_row
return df
def plot_data(self, df: pd.DataFrame):
"""
Generate line graph for user activity throughout the day.
Parameters:
df (pd.DataFrame): The transformed DataFrame containing activity counts by hour.
"""
plt.figure(figsize=(12, 6))
# Plot each user's activity
for index, row in df.iterrows():
if index == 'Cumulative Sum':
plt.plot(row.index, row.values, label=index, linewidth=3, color='black') # Bold line for cumulative sum
else:
plt.plot(row.index, row.values, label=index)
# Add labels and title
plt.xlabel('Hour of Day')
plt.ylabel('Activity Count')
plt.title('User Activity Throughout the Day')
plt.legend(loc='upper left', bbox_to_anchor=(1, 1))
plt.grid(True)

View File

@ -0,0 +1,82 @@
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from .basePlotlyAnalysis import BasePlotlyAnalysis
from flask import current_app, url_for
class PlotlyActivityHeatmap(BasePlotlyAnalysis):
"""
Class for analyzing user activity trends over multiple days and generating an interactive heatmap.
Attributes:
name (str): The name of the analysis.
description (str): A brief description of the analysis.
plot_filename (str): The filename for the output plot.
note (str): Additional notes for the analysis.
"""
name = "Activity Heatmap (Interactive)"
description = "Displays user activity trends over multiple days using an interactive heatmap."
plot_filename = "activity_heatmap.html"
note = ""
def transform_data(self, df: pd.DataFrame) -> pd.DataFrame:
"""
Transform data for the heatmap.
Parameters:
df (pd.DataFrame): The input DataFrame containing user activity data.
Returns:
pd.DataFrame: The transformed DataFrame with activity counts by hour.
"""
df['hour'] = df['timestamp'].dt.hour
active_counts = df[df['was_active']].pivot_table(
index='name',
columns='hour',
values='was_active',
aggfunc='sum',
fill_value=0
).reset_index()
# Ensure all hours are represented
all_hours = pd.DataFrame({'hour': range(24)})
active_counts = active_counts.melt(id_vars='name', var_name='hour', value_name='activity_count')
active_counts = active_counts.merge(all_hours, on='hour', how='right').fillna(0)
active_counts['hour'] = active_counts['hour'].astype(int) # Ensure hour is treated as numeric
return active_counts
def plot_data(self, df: pd.DataFrame):
"""
Generate heatmap plot.
Parameters:
df (pd.DataFrame): The transformed DataFrame containing activity counts by hour.
"""
df = df.pivot(index='name', columns='hour', values='activity_count').fillna(0)
# Create a Plotly heatmap
self.fig = go.Figure(data=go.Heatmap(
z=df.values,
x=df.columns,
y=df.index,
colorscale='Viridis',
colorbar=dict(title='Count of was_active == True')
))
# Update layout
self.fig.update_layout(
title='User Activity Heatmap',
xaxis_title='Hour of Day',
yaxis_title='User ID',
xaxis=dict(tickmode='linear', dtick=1, range=[0, 23]), # Ensure x-axis covers all hours
template='plotly_white'
)
self.fig.update_traces(
hovertemplate="<br>".join([
"Hour: %{x}",
"Name: %{y}",
"Activity: %{z}",
])
)

View File

@ -0,0 +1,65 @@
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from .basePlotlyAnalysis import BasePlotlyAnalysis
from flask import current_app, url_for
class PlotlyLineActivityAllUsers(BasePlotlyAnalysis):
"""
Class for analyzing user activity trends over multiple days and generating an interactive line graph.
Attributes:
name (str): The name of the analysis.
description (str): A brief description of the analysis.
plot_filename (str): The filename for the output plot.
note (str): Additional notes for the analysis.
"""
name = "Activity Line Graph (All Users, Interactive)"
description = "This analysis shows the activity line graph for all users. The graph is interactive and can be used to explore the data."
plot_filename = "line_activity-all_users.html"
note = ""
def transform_data(self, df: pd.DataFrame) -> pd.DataFrame:
"""
Transform data for the line plot.
Parameters:
df (pd.DataFrame): The input DataFrame containing user activity data.
Returns:
pd.DataFrame: The transformed DataFrame with activity counts by hour.
"""
df['hour'] = df['timestamp'].dt.hour
df = df[df['was_active'] == True].pivot_table(index='name', columns='hour', values='was_active', aggfunc='sum', fill_value=0)
df['total_active_minutes'] = df.sum(axis=1)
df = df.sort_values(by='total_active_minutes', ascending=False).drop('total_active_minutes', axis=1)
cumulative_sum_row = df.cumsum().iloc[-1]
df.loc['Cumulative Sum'] = cumulative_sum_row
return df
def plot_data(self, df: pd.DataFrame):
"""
Generate interactive line graph for user activity throughout the day.
Parameters:
df (pd.DataFrame): The transformed DataFrame containing activity counts by hour.
"""
self.fig = make_subplots()
# Plot each user's activity
for index, row in df.iterrows():
if index == 'Cumulative Sum':
self.fig.add_trace(go.Scatter(x=row.index, y=row.values, mode='lines', name=index, line=dict(width=3, color='black'))) # Bold line for cumulative sum
else:
self.fig.add_trace(go.Scatter(x=row.index, y=row.values, mode='lines', name=index))
self.fig.update_layout(
title='User Activity Throughout the Day',
xaxis_title='Hour of Day',
yaxis_title='Activity Count',
legend_title='User',
legend=dict(x=1, y=1),
template='plotly_white'
)

View File

@ -0,0 +1,31 @@
import pandas as pd
from .base import BaseAnalysis
from flask import render_template_string
class GenerateStatistics(BaseAnalysis):
name = "Test Statistics (Placeholder)"
description = "Generates activity statistics grouped by hour."
def execute(self, df: pd.DataFrame):
df["hour"] = df["timestamp"].dt.hour
statistics = df.groupby("hour").size().reset_index(name="count")
# Convert statistics DataFrame to HTML
table_html = statistics.to_html(classes="table table-bordered table-striped")
# Wrap it in Bootstrap styling
html_content = render_template_string(
"""
<div class="card mt-3">
<div class="card-header">
<h4>Activity Statistics</h4>
</div>
<div class="card-body">
{{ table_html | safe }}
</div>
</div>
""",
table_html=table_html
)
return html_content

230
app/api.py Normal file
View File

@ -0,0 +1,230 @@
from flask import jsonify, request, Response, send_from_directory, current_app
import threading
import os
import glob
from datetime import datetime
import pandas as pd
from app.models import Scraper
from app.util import create_zip, delete_old_zips, tail
from app.config import load_config
from app.forms import ScrapingForm
from app.tasks import start_scraping_task, stop_scraping_task, get_redis
scraping_thread = None
scraper = None
scrape_lock = threading.Lock()
def register_api(app):
@app.route('/start_scraping', methods=['POST'])
def start_scraping():
form = ScrapingForm()
if form.validate_on_submit():
redis_client = get_redis()
faction_id = form.faction_id.data
# Check if scraping is already active
if redis_client.hget(f"scraper:{faction_id}", "scraping_active") == "1":
return jsonify({"status": "Scraping already in progress"})
# Convert config to a serializable dict with only needed values
config_dict = {
'DATA': {'DATA_DIR': current_app.config['DATA']['DATA_DIR']},
'DEFAULT': {'API_KEY': current_app.config['DEFAULT']['API_KEY']}
}
start_scraping_task.delay(
faction_id,
int(form.fetch_interval.data), # Ensure this is an int
int(form.run_interval.data), # Ensure this is an int
config_dict
)
return jsonify({"status": "Scraping started"})
return jsonify({"status": "Invalid form data"})
@app.route('/stop_scraping', methods=['POST'])
def stop_scraping():
redis_client = get_redis()
faction_id = redis_client.get("current_faction_id")
if not faction_id:
return jsonify({"status": "No active scraping session"})
stop_scraping_task.delay(faction_id)
return jsonify({"status": "Stopping scraping"})
@app.route('/logfile', methods=['GET'])
def logfile():
log_file_name = current_app.logger.handlers[0].baseFilename
page = int(request.args.get('page', 0)) # Page number
lines_per_page = int(request.args.get('lines_per_page', current_app.config['LOGGING']['VIEW_PAGE_LINES'])) # Lines per page
log_file_path = log_file_name # Path to the current log file
if not os.path.isfile(log_file_path):
current_app.logger.error("Log file not found")
return jsonify({"error": "Log file not found"}), 404
log_lines = list(tail(log_file_path, current_app.config['LOGGING']['VIEW_MAX_LINES']))
log_lines = log_lines[::-1] # Reverse the list
start = page * lines_per_page
end = start + lines_per_page
paginated_lines = log_lines[start:end] if start < len(log_lines) else []
return jsonify({
"log": paginated_lines,
"total_lines": len(log_lines),
"pages": (len(log_lines) + lines_per_page - 1) // lines_per_page,
"start_line": len(log_lines) - start
})
@app.route('/download_files', methods=['POST'])
def download_files():
delete_old_zips() # Clean up old zip files
file_paths = request.json.get('file_paths')
if not file_paths:
return jsonify({"error": "No files specified"}), 400
# Get the absolute path of the parent directory
parent_dir = os.path.abspath(os.path.join(app.root_path, os.pardir))
# Validate and correct file paths
valid_file_paths = []
for file_path in file_paths:
if file_path.startswith('/data/'):
corrected_path = file_path.lstrip('/')
full_path = os.path.join(parent_dir, corrected_path)
if os.path.isfile(full_path):
valid_file_paths.append(full_path)
elif file_path.startswith('/log/'):
corrected_path = file_path.lstrip('/')
full_path = os.path.join(parent_dir, corrected_path)
if os.path.isfile(full_path):
valid_file_paths.append(full_path)
if not valid_file_paths:
return jsonify({"error": "No valid files specified"}), 400
# Create a unique zip file name
zip_name = f"files_{datetime.now().strftime('%Y%m%d%H%M%S')}.zip"
zip_path = create_zip(valid_file_paths, zip_name, app)
# Log the directory and file path for debugging
current_app.logger.debug(f"Sending file from directory: temp, file: {zip_name}")
return download_tmp_file(zip_name)
@app.route('/delete_files', methods=['POST'])
def delete_files():
log_file_name = current_app.logger.handlers[0].baseFilename
file_paths = request.json.get('file_paths', [])
if not file_paths:
return jsonify({"error": "No files specified"}), 400
errors = []
data_dir = os.path.abspath(current_app.config['DATA']['DATA_DIR'])
log_dir = os.path.abspath(current_app.config['LOGGING']['LOG_DIR'])
for file_path in file_paths:
if file_path.startswith('/data/'):
full_file_path = os.path.join(data_dir, file_path.lstrip('/data/'))
elif file_path.startswith('/log/'):
full_file_path = os.path.join(log_dir, file_path.lstrip('/log/'))
else:
errors.append({"file": file_path, "error": "File not in allowed directory"})
continue
# Check if the file is in either the logs or the data files folder
#if not (full_file_path.startswith(data_dir) or full_file_path.startswith(log_dir)):
# errors.append({"file": file_path, "error": "File not in allowed directory"})
# continue
# Check if it's the currently active log file
if full_file_path == log_file_name:
errors.append({"file": file_path, "error": "Cannot delete active log file."})
continue
# Check if it's an active data file
if scraper and scraper.data_file_name == full_file_path:
errors.append({"file": file_path, "error": "Cannot delete active data file."})
continue
if not os.path.isfile(full_file_path):
errors.append({"file": file_path, "error": "File not found"})
continue
try:
os.remove(full_file_path)
except Exception as e:
errors.append({"file": file_path, "error": str(e)})
if errors:
return jsonify({"errors": errors}), 207 # Multi-Status response
return jsonify({"success": True}), 200
@app.route('/data/<path:filename>')
def download_data_file(filename):
data_dir = os.path.abspath(current_app.config['DATA']['DATA_DIR'])
file_path = os.path.join(data_dir, filename)
return send_from_directory(directory=data_dir, path=filename, as_attachment=True)
@app.route('/log/<path:filename>')
def download_log_file(filename):
log_dir = os.path.abspath(current_app.config['LOGGING']['LOG_DIR'])
file_path = os.path.join(log_dir, filename)
return send_from_directory(directory=log_dir, path=filename, as_attachment=True)
@app.route('/tmp/<path:filename>')
def download_tmp_file(filename):
tmp_dir = os.path.abspath(current_app.config['TEMP']['TEMP_DIR'])
file_path = os.path.join(tmp_dir, filename)
return send_from_directory(directory=tmp_dir, path=filename, as_attachment=True)
@app.route('/config/lines_per_page')
def get_lines_per_page():
lines_per_page = current_app.config['LOGGING']['VIEW_PAGE_LINES']
return jsonify({"lines_per_page": lines_per_page})
@app.route('/scraping_status', methods=['GET'])
def scraping_status():
redis_client = get_redis()
current_faction_id = redis_client.get("current_faction_id")
if not current_faction_id:
return jsonify({"scraping_active": False})
scraping_active = redis_client.hget(f"scraper:{current_faction_id}", "scraping_active")
# If we have a faction_id but scraping is not active, clean up the stale state
if not scraping_active or scraping_active == "0":
redis_client.delete("current_faction_id")
return jsonify({"scraping_active": False})
return jsonify({
"scraping_active": True,
"faction_id": current_faction_id
})
@app.route('/scraping_get_end_time')
def scraping_get_end_time():
redis_client = get_redis()
current_faction_id = redis_client.get("current_faction_id")
if not current_faction_id:
return jsonify({"scraping_active": False})
end_time = redis_client.hget(f"scraper:{current_faction_id}", "end_time")
if not end_time:
return jsonify({"scraping_active": False})
return jsonify({
"end_time": end_time,
"faction_id": current_faction_id
})

8
app/config.py Normal file
View File

@ -0,0 +1,8 @@
from configobj import ConfigObj
import os
def load_config():
config_path = os.path.join(os.path.dirname(__file__), '..', 'config.ini')
# Load config while preserving sections as nested dicts
return ConfigObj(config_path)

15
app/filters.py Normal file
View File

@ -0,0 +1,15 @@
from flask import Blueprint, request, jsonify
from datetime import datetime
def register_filters(app):
@app.template_filter('datetimeformat')
def datetimeformat(value):
"""Convert datetime or timestamp to formatted string"""
if isinstance(value, datetime):
dt = value
else:
try:
dt = datetime.fromtimestamp(float(value))
except (ValueError, TypeError):
return str(value)
return dt.strftime('%Y-%m-%d %H:%M:%S')

9
app/forms.py Normal file
View File

@ -0,0 +1,9 @@
from flask_wtf import FlaskForm
from wtforms import StringField, IntegerField, SubmitField
from wtforms.validators import DataRequired
class ScrapingForm(FlaskForm):
faction_id = StringField('Faction ID', validators=[DataRequired()], default='9686')
fetch_interval = IntegerField('Fetch Interval (seconds)', validators=[DataRequired()], default=60)
run_interval = IntegerField('Run Interval (days)', validators=[DataRequired()], default=1)
submit = SubmitField('Start')

34
app/logging_config.py Normal file
View File

@ -0,0 +1,34 @@
import logging
from logging.handlers import QueueHandler
from queue import Queue
import os
from datetime import datetime
from flask import current_app
def init_logger(config):
LOG_DIR = config.get('LOGGING', {}).get('LOG_DIR', 'log')
if not os.path.exists(LOG_DIR):
os.makedirs(LOG_DIR)
log_file_name = os.path.join(LOG_DIR, datetime.now().strftime('%Y-%m-%d-%H-%M') + '.log')
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
file_handler = logging.FileHandler(log_file_name, mode='w')
file_handler.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s - %(levelname)s: %(message)s',
datefmt='%m/%d/%Y %I:%M:%S %p')
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)
log_queue = Queue()
queue_handler = QueueHandler(log_queue)
queue_handler.setLevel(logging.DEBUG)
logger.addHandler(queue_handler)
logger.debug("Logger initialized")
return logger

194
app/models.py Normal file
View File

@ -0,0 +1,194 @@
from typing import List, Dict, Optional
import requests
import pandas as pd
import os
import time
from datetime import datetime, timedelta
from requests.exceptions import ConnectionError, Timeout, RequestException
import redis
import threading
from flask import current_app
class Scraper:
_instances = {} # Track all instances by faction_id
_lock = threading.Lock()
def __new__(cls, faction_id, *args, **kwargs):
with cls._lock:
# Stop any existing instance for this faction
if faction_id in cls._instances:
old_instance = cls._instances[faction_id]
old_instance.stop_scraping()
instance = super().__new__(cls)
cls._instances[faction_id] = instance
return instance
def __init__(self, faction_id, fetch_interval, run_interval, config):
# Only initialize if not already initialized
if not hasattr(self, 'faction_id'):
self.redis_client = redis.StrictRedis(
host='localhost', port=6379, db=0, decode_responses=True
)
self.faction_id = faction_id
self.fetch_interval = fetch_interval
self.run_interval = run_interval
self.API_KEY = config['DEFAULT']['API_KEY']
self.data_file_name = os.path.join(
config['DATA']['DATA_DIR'],
f"{faction_id}-{datetime.now().strftime('%Y-%m-%d-%H-%M')}.csv"
)
self.end_time = datetime.now() + timedelta(days=int(run_interval))
# Store scraper state in Redis
self.redis_client.hmset(f"scraper:{faction_id}", {
"faction_id": faction_id,
"fetch_interval": fetch_interval,
"run_interval": run_interval,
"end_time": self.end_time.isoformat(),
"data_file_name": self.data_file_name,
"scraping_active": "0",
"api_key": self.API_KEY
})
@property
def scraping_active(self):
return bool(int(self.redis_client.hget(f"scraper:{self.faction_id}", "scraping_active")))
@scraping_active.setter
def scraping_active(self, value):
self.redis_client.hset(f"scraper:{self.faction_id}", "scraping_active", "1" if value else "0")
def fetch_faction_data(self):
url = f"https://api.torn.com/faction/{self.faction_id}?selections=&key={self.API_KEY}"
response = requests.get(url)
if response.status_code == 200:
return response.json()
current_app.logger.warning(f"Failed to fetch faction data for faction ID {self.faction_id}. Response: {response.text}")
return None
def fetch_user_activity(self, user_id):
url = f"https://api.torn.com/user/{user_id}?selections=basic,profile&key={self.API_KEY}"
retries = 3
for attempt in range(retries):
try:
response = requests.get(url, timeout=10)
response.raise_for_status()
return response.json()
except ConnectionError as e:
current_app.logger.error(f"Connection error while fetching user activity for user ID {user_id}: {e}")
except Timeout as e:
current_app.logger.error(f"Timeout error while fetching user activity for user ID {user_id}: {e}")
except RequestException as e:
current_app.logger.error(f"Error while fetching user activity for user ID {user_id}: {e}")
if attempt < retries - 1:
current_app.logger.debug(f"Retrying {attempt + 1}/{retries} for user {user_id}")
time.sleep(2 ** attempt) # Exponential backoff
return None
def start_scraping(self) -> None:
"""Starts the scraping process until the end time is reached or stopped manually."""
self.scraping_active = True
current_app.logger.info(f"Starting scraping for faction ID {self.faction_id}")
current_app.logger.debug(f"Fetch interval: {self.fetch_interval}s, Run interval: {self.run_interval} days, End time: {self.end_time}")
MAX_FAILURES = 5
failure_count = 0
while datetime.now() < self.end_time and self.scraping_active:
current_app.logger.info(f"Fetching data at {datetime.now()}")
faction_data = self.fetch_faction_data()
if not faction_data or "members" not in faction_data:
current_app.logger.warning(f"No faction data found for ID {self.faction_id} (Failure {failure_count + 1}/{MAX_FAILURES})")
failure_count += 1
if failure_count >= MAX_FAILURES:
current_app.logger.error(f"Max failures reached ({MAX_FAILURES}). Stopping scraping.")
break
time.sleep(self.fetch_interval)
continue
current_app.logger.info(f"Fetched {len(faction_data['members'])} members for faction {self.faction_id}")
failure_count = 0 # Reset failure count on success
user_activity_data = self.process_faction_members(faction_data["members"])
self.save_data(user_activity_data)
current_app.logger.info(f"Data appended to {self.data_file_name}")
time.sleep(self.fetch_interval)
self.handle_scraping_end()
def process_faction_members(self, members: Dict[str, Dict]) -> List[Dict]:
"""Processes and retrieves user activity for all faction members."""
user_activity_data = []
for user_id in members.keys():
user_activity = self.fetch_user_activity(user_id)
if user_activity:
user_activity_data.append({
"user_id": user_id,
"name": user_activity.get("name", ""),
"last_action": user_activity.get("last_action", {}).get("timestamp", 0),
"status": user_activity.get("status", {}).get("state", ""),
"timestamp": datetime.now().timestamp(),
})
current_app.logger.info(f"Fetched data for user {user_id} ({user_activity.get('name', '')})")
else:
current_app.logger.warning(f"Failed to fetch data for user {user_id}")
return user_activity_data
def save_data(self, user_activity_data: List[Dict]) -> None:
"""Saves user activity data to a CSV file."""
if not user_activity_data:
current_app.logger.warning("No data to save.")
return
df = pd.DataFrame(user_activity_data)
df["last_action"] = pd.to_datetime(df["last_action"], unit="s")
df["timestamp"] = pd.to_datetime(df["timestamp"], unit="s")
file_exists = os.path.isfile(self.data_file_name)
try:
with open(self.data_file_name, "a" if file_exists else "w") as f:
df.to_csv(f, mode="a" if file_exists else "w", header=not file_exists, index=False)
current_app.logger.info(f"Data successfully saved to {self.data_file_name}")
except Exception as e:
current_app.logger.error(f"Error saving data to {self.data_file_name}: {e}")
def cleanup_redis_state(self):
"""Clean up all Redis state for this scraper instance"""
if hasattr(self, 'faction_id'):
self.redis_client.delete(f"scraper:{self.faction_id}")
current_id = self.redis_client.get("current_faction_id")
if current_id and current_id == str(self.faction_id):
self.redis_client.delete("current_faction_id")
# Remove from instances tracking
with self._lock:
if self.faction_id in self._instances:
del self._instances[self.faction_id]
def handle_scraping_end(self) -> None:
"""Handles cleanup and logging when scraping ends."""
if not self.scraping_active:
current_app.logger.warning(f"Scraping stopped manually at {datetime.now()}")
elif datetime.now() >= self.end_time:
current_app.logger.warning(f"Scraping stopped due to timeout at {datetime.now()} (Run interval: {self.run_interval} days)")
else:
current_app.logger.error(f"Unexpected stop at {datetime.now()}")
current_app.logger.info("Scraping completed.")
self.scraping_active = False
self.cleanup_redis_state()
def stop_scraping(self):
self.scraping_active = False
self.cleanup_redis_state()
current_app.logger.debug(f"Scraping stopped for faction {self.faction_id}")
def __del__(self):
"""Ensure Redis cleanup on object destruction"""
self.cleanup_redis_state()

26
app/static/color_mode.js Normal file
View File

@ -0,0 +1,26 @@
document.addEventListener('DOMContentLoaded', () => {
const themeToggle = document.getElementById('bd-theme');
// Check if a theme preference is saved in localStorage
const savedTheme = localStorage.getItem('theme');
if (savedTheme === 'dark') {
themeToggle.checked = true;
document.documentElement.setAttribute('data-bs-theme', 'dark');
} else {
themeToggle.checked = false;
document.documentElement.setAttribute('data-bs-theme', 'light');
}
// Add event listener to toggle theme on checkbox change
themeToggle.addEventListener('change', () => {
if (themeToggle.checked) {
document.documentElement.setAttribute('data-bs-theme', 'dark');
localStorage.setItem('theme', 'dark');
} else {
document.documentElement.setAttribute('data-bs-theme', 'light');
localStorage.setItem('theme', 'light');
}
});
});

38
app/static/common.js Normal file
View File

@ -0,0 +1,38 @@
import { ScraperUtils } from './scraper_utils.js';
class Common {
constructor() {
this.utils = new ScraperUtils();
this.addEventListeners();
this.scheduleUpdates();
}
scheduleUpdates() {
// Ensure server time updates every minute but only after initial fetch
setTimeout(() => {
setInterval(() => this.utils.updateServerTime(), 60000);
}, 5000); // Delay first scheduled update to prevent duplicate initial request
}
addEventListeners() {
if (this.utils.stopButton) {
this.utils.stopButton.addEventListener('click', () => this.utils.checkScrapingStatus());
}
}
}
document.addEventListener('DOMContentLoaded', () => {
new Common();
});
window.checkAllCheckboxes = function(tableId, checkAllId) {
var table = document.getElementById(tableId);
var checkAll = document.getElementById(checkAllId);
var checkboxes = table.querySelectorAll('input[type="checkbox"]');
checkboxes.forEach(function(checkbox) {
if (!checkbox.disabled) {
checkbox.checked = checkAll.checked;
}
});
};

View File

@ -0,0 +1,96 @@
async function deleteFiles(filePaths) {
try {
const response = await fetch('/delete_files', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({ file_paths: filePaths })
});
const data = await response.json();
if (data.success) {
alert('Files deleted successfully');
location.reload();
} else {
alert(`Error deleting files: ${JSON.stringify(data.errors)}`);
}
} catch (error) {
console.error('Error:', error);
alert('An error occurred while deleting files.');
}
}
function getSelectedFiles() {
return Array.from(document.querySelectorAll('input[name="fileCheckbox"]:checked'))
.map(checkbox => checkbox.value);
}
function deleteSelectedFiles() {
const selectedFiles = getSelectedFiles();
if (selectedFiles.length > 0) {
deleteFiles(selectedFiles);
} else {
alert('No files selected');
}
}
async function downloadSelectedFiles() {
const selectedFiles = getSelectedFiles();
if (selectedFiles.length === 0) {
alert('No files selected');
return;
}
try {
const response = await fetch('/download_files', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({ file_paths: selectedFiles })
});
if (!response.ok) {
const errorData = await response.json();
throw new Error(errorData.error || 'Failed to download files.');
}
const blob = await response.blob();
if (blob.type !== 'application/zip') {
throw new Error('Received invalid ZIP file.');
}
const url = window.URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = 'files.zip';
document.body.appendChild(a);
a.click();
a.remove();
window.URL.revokeObjectURL(url);
} catch (error) {
console.error('Download error:', error);
alert(`Error: ${error.message}`);
}
}
function sortTable(columnIndex, tableId) {
const table = document.getElementById(tableId);
const tbody = table.querySelector('tbody');
const rows = Array.from(tbody.rows);
const isAscending = table.dataset.sortAsc === 'true';
rows.sort((rowA, rowB) => {
const cellA = rowA.cells[columnIndex].innerText.trim().toLowerCase();
const cellB = rowB.cells[columnIndex].innerText.trim().toLowerCase();
return cellA.localeCompare(cellB) * (isAscending ? 1 : -1);
});
// Toggle sorting order for next click
table.dataset.sortAsc = !isAscending;
// Reinsert sorted rows
rows.forEach(row => tbody.appendChild(row));
}

56
app/static/index.js Normal file
View File

@ -0,0 +1,56 @@
import { ScraperUtils } from './scraper_utils.js';
class ScraperApp {
constructor() {
this.utils = new ScraperUtils();
this.form = document.getElementById('scrapingForm');
this.stopButton = document.getElementById('stopButton');
this.startButton = document.getElementById('startButton');
this.init();
}
init() {
this.utils.checkScrapingStatus();
this.addEventListeners();
}
async startScraping(event) {
event.preventDefault(); // Prevent default form submission
const formData = new FormData(this.form);
try {
const response = await fetch('/start_scraping', {
method: 'POST',
body: formData
});
const data = await response.json();
if (data.status === "Scraping started") {
this.utils.checkScrapingStatus(); // Update UI
}
} catch (error) {
console.error('Error starting scraping:', error);
}
}
async stopScraping() {
try {
const response = await fetch('/stop_scraping', {
method: 'POST'
});
const data = await response.json();
if (data.status === "Scraping stopped") {
this.utils.checkScrapingStatus(); // Update UI
}
} catch (error) {
console.error('Error stopping scraping:', error);
}
}
addEventListeners() {
this.form.addEventListener('submit', (event) => this.startScraping(event));
this.stopButton.addEventListener('click', () => this.stopScraping());
}
}
document.addEventListener('DOMContentLoaded', () => {
new ScraperApp();
});

97
app/static/log_viewer.js Normal file
View File

@ -0,0 +1,97 @@
class LogViewerApp {
constructor() {
this.logsElement = document.getElementById('logs');
this.prevPageButton = document.getElementById('prevPage');
this.nextPageButton = document.getElementById('nextPage');
this.pageInfo = document.getElementById('pageInfo');
this.currentPage = 0;
this.linesPerPage = null;
this.autoRefreshInterval = null;
this.init();
}
async init() {
await this.fetchConfig();
await this.checkScrapingStatus();
this.addEventListeners();
}
async fetchConfig() {
try {
const response = await fetch('/config/lines_per_page');
const data = await response.json();
this.linesPerPage = data.lines_per_page;
this.fetchLogs(this.currentPage);
} catch (error) {
console.error('Error fetching config:', error);
}
}
async fetchLogs(page) {
try {
const response = await fetch(`/logfile?page=${page}&lines_per_page=${this.linesPerPage}`);
const data = await response.json();
if (data.error) {
this.logsElement.textContent = data.error;
} else {
this.logsElement.innerHTML = data.log.map((line, index) => {
const lineNumber = data.start_line - index;
return `<span class="line-number">${lineNumber}</span> ${line}`;
}).join('');
this.updatePagination(data.total_lines);
}
} catch (error) {
console.error('Error fetching logs:', error);
}
}
updatePagination(totalLines) {
this.prevPageButton.disabled = this.currentPage === 0;
this.nextPageButton.disabled = (this.currentPage + 1) * this.linesPerPage >= totalLines;
this.pageInfo.textContent = `Page ${this.currentPage + 1} of ${Math.ceil(totalLines / this.linesPerPage)}`;
}
startAutoRefresh() {
this.autoRefreshInterval = setInterval(() => this.fetchLogs(this.currentPage), 5000);
}
stopAutoRefresh() {
clearInterval(this.autoRefreshInterval);
}
async checkScrapingStatus() {
try {
const response = await fetch('/scraping_status');
const data = await response.json();
if (data.scraping_active) {
this.startAutoRefresh();
} else {
this.stopAutoRefresh();
}
this.fetchLogs(this.currentPage);
} catch (error) {
console.error('Error checking scraping status:', error);
}
}
addEventListeners() {
this.prevPageButton.addEventListener('click', () => {
if (this.currentPage > 0) {
this.currentPage--;
this.fetchLogs(this.currentPage);
}
});
this.nextPageButton.addEventListener('click', () => {
this.currentPage++;
this.fetchLogs(this.currentPage);
});
}
}
// Initialize the application when DOM is fully loaded
document.addEventListener('DOMContentLoaded', () => new LogViewerApp());

203
app/static/scraper_utils.js Normal file
View File

@ -0,0 +1,203 @@
export class ScraperUtils {
constructor() {
this.activityIndicator = document.getElementById('activity_indicator');
this.endTimeElement = document.getElementById('end_time');
this.serverTimeElement = document.getElementById('server_time');
this.timeLeftElement = document.getElementById('time-left'); // New element for countdown
this.stopButton = document.getElementById('stopButton');
this.startButton = document.getElementById('startButton');
this.statusContainer = document.getElementById('status_container');
this.loadingIndicator = document.getElementById('loading_indicator');
this.statusContent = document.querySelectorAll('#status_content');
this.serverTime = null;
this.endTime = null;
this.pollInterval = null; // Add this line
this.init();
}
async init() {
this.showLoadingIndicator();
try {
await Promise.all([
this.updateServerTime(),
this.checkScrapingStatus()
]);
} catch (error) {
console.error("Error during initialization:", error);
}
// Start polling for status updates
this.startPolling();
// Only start the clock and wait for end time if scraping is active
if (this.activityIndicator.textContent === 'Active') {
if (!this.endTime) {
try {
await this.fetchEndTime();
} catch (error) {
console.error("Error fetching end time:", error);
}
}
if (this.serverTime && this.endTime) {
this.startClock();
}
}
// Hide loading indicator regardless of scraping status
this.hideLoadingIndicator();
}
startPolling() {
// Poll every 2 seconds
this.pollInterval = setInterval(async () => {
await this.checkScrapingStatus();
}, 2000);
}
stopPolling() {
if (this.pollInterval) {
clearInterval(this.pollInterval);
this.pollInterval = null;
}
}
showLoadingIndicator() {
this.statusContainer.classList.remove('d-none');
this.loadingIndicator.classList.remove('d-none');
this.statusContent.forEach(element => element.classList.add('d-none'));
}
hideLoadingIndicator() {
this.loadingIndicator.classList.add('d-none');
this.statusContent.forEach(element => element.classList.remove('d-none'));
}
async checkScrapingStatus() {
try {
const response = await fetch('/scraping_status');
const data = await response.json();
if (data.scraping_active) {
if (this.startButton) this.startButton.disabled = true;
if (this.stopButton) this.stopButton.disabled = false;
this.activityIndicator.classList.remove('text-bg-danger');
this.activityIndicator.classList.add('text-bg-success');
this.activityIndicator.textContent = 'Active';
// Fetch end time if we don't have it yet
if (!this.endTime) {
await this.fetchEndTime();
}
this.endTimeElement.classList.remove('d-none');
this.timeLeftElement.classList.remove('d-none');
} else {
if (this.startButton) this.startButton.disabled = false;
if (this.stopButton) this.stopButton.disabled = true;
this.activityIndicator.classList.remove('text-bg-success');
this.activityIndicator.classList.add('text-bg-danger');
this.activityIndicator.textContent = 'Inactive';
this.endTimeElement.classList.add('d-none');
this.timeLeftElement.classList.add('d-none');
// Reset end time when inactive
this.endTime = null;
}
} catch (error) {
console.error('Error checking scraping status:', error);
}
}
async updateServerTime() {
try {
const response = await fetch('/server_time');
const data = await response.json();
this.serverTime = new Date(data.server_time.replace(' ', 'T'));
this.serverTimeElement.textContent = `Server Time (TCT): ${this.formatDateToHHMMSS(this.serverTime)}`;
} catch (error) {
console.error('Error fetching server time:', error);
}
}
async fetchEndTime() {
if (this.endTime) return;
try {
const response = await fetch('/scraping_get_end_time');
const data = await response.json();
if (data.end_time) {
this.endTime = new Date(data.end_time);
this.endTimeElement.textContent = `Running until ${this.formatDateToYYYYMMDDHHMMSS(this.endTime)} TCT`;
}
} catch (error) {
this.endTimeElement.textContent = 'Error fetching end time';
console.error('Error fetching end time:', error);
}
}
startClock() {
const updateClock = () => {
if (this.serverTime) {
this.serverTime.setSeconds(this.serverTime.getSeconds() + 1);
this.serverTimeElement.textContent = `Server Time (TCT): ${this.formatDateToHHMMSS(this.serverTime)}`;
}
if (this.endTime && this.serverTime) {
const timeLeft = this.endTime - this.serverTime;
this.timeLeftElement.textContent = `Time Left: ${timeLeft > 0 ? this.formatMillisecondsToHHMMSS(timeLeft) : '00:00:00'}`;
}
};
// Immediately update the clock
updateClock();
// Continue updating every second
setInterval(updateClock, 1000);
}
formatDateToYYYYMMDDHHMMSS(date) {
if (!(date instanceof Date) || isNaN(date)) {
console.error('Invalid date:', date);
return '';
}
return `${date.getFullYear()}-${String(date.getMonth() + 1).padStart(2, '0')}-${String(date.getDate()).padStart(2, '0')} ` +
`${String(date.getHours()).padStart(2, '0')}:${String(date.getMinutes()).padStart(2, '0')}:${String(date.getSeconds()).padStart(2, '0')}`;
}
formatDateToHHMMSS(date) {
if (!(date instanceof Date) || isNaN(date)) {
console.error('Invalid date:', date);
return '';
}
return `${String(date.getHours()).padStart(2, '0')}:${String(date.getMinutes()).padStart(2, '0')}:${String(date.getSeconds()).padStart(2, '0')}`;
}
formatMillisecondsToHHMMSS(ms) {
const totalSeconds = Math.floor(ms / 1000);
const hours = Math.floor(totalSeconds / 3600);
const minutes = Math.floor((totalSeconds % 3600) / 60);
const seconds = totalSeconds % 60;
return `${String(hours).padStart(2, '0')}:${String(minutes).padStart(2, '0')}:${String(seconds).padStart(2, '0')}`;
}
// Add cleanup method
cleanup() {
this.stopPolling();
}
}
// Add event listener for page unload
window.addEventListener('unload', () => {
if (window.scraperUtils) {
window.scraperUtils.cleanup();
}
});

217
app/static/style.css Normal file
View File

@ -0,0 +1,217 @@
/* LIGHT MODE (default) */
:root {
--bs-body-bg: #f8f9fa; /* Light background */
--bs-body-color: #212529; /* Dark text */
--bs-primary: #007bff;
--bs-primary-bg-subtle: #cce5ff;
--bs-primary-border-subtle: #80bdff;
--bs-primary-text-emphasis: #004085;
--bs-secondary: #6c757d;
--bs-secondary-bg-subtle: #e2e3e5;
--bs-secondary-border-subtle: #c8cbcf;
--bs-secondary-text-emphasis: #383d41;
--bs-success: #198754;
--bs-success-bg-subtle: #d4edda;
--bs-success-border-subtle: #a3cfbb;
--bs-success-text-emphasis: #155724;
--bs-danger: #dc3545;
--bs-danger-bg-subtle: #f8d7da;
--bs-danger-border-subtle: #f1aeb5;
--bs-danger-text-emphasis: #721c24;
--bs-warning: #ffc107;
--bs-warning-bg-subtle: #fff3cd;
--bs-warning-border-subtle: #ffeeba;
--bs-warning-text-emphasis: #856404;
--bs-info: #17a2b8;
--bs-info-bg-subtle: #d1ecf1;
--bs-info-border-subtle: #bee5eb;
--bs-info-text-emphasis: #0c5460;
--bs-light: #f8f9fa;
--bs-light-bg-subtle: #ffffff;
--bs-light-border-subtle: #d6d8db;
--bs-light-text-emphasis: #6c757d;
--bs-dark: #343a40;
--bs-dark-bg-subtle: #212529;
--bs-dark-border-subtle: #1d2124;
--bs-dark-text-emphasis: #ffffff;
--bs-border-color: #dee2e6; /* Default border color */
}
/* DARK MODE */
[data-bs-theme="dark"] {
--bs-body-bg: #121212;
--bs-body-color: #e9ecef;
--bs-primary: #1e90ff;
--bs-primary-bg-subtle: #1c2b36;
--bs-primary-border-subtle: #374b58;
--bs-primary-text-emphasis: #a0c4ff;
--bs-secondary: #adb5bd;
--bs-secondary-bg-subtle: #2d3238;
--bs-secondary-border-subtle: #3e444a;
--bs-secondary-text-emphasis: #ced4da;
--bs-success: #00c851;
--bs-success-bg-subtle: #1b3425;
--bs-success-border-subtle: #3b6147;
--bs-success-text-emphasis: #b9f6ca;
--bs-danger: #ff4444;
--bs-danger-bg-subtle: #381717;
--bs-danger-border-subtle: #633030;
--bs-danger-text-emphasis: #ffcccb;
--bs-warning: #ffbb33;
--bs-warning-bg-subtle: #3a2b19;
--bs-warning-border-subtle: #67512e;
--bs-warning-text-emphasis: #ffd700;
--bs-info: #33b5e5;
--bs-info-bg-subtle: #182e38;
--bs-info-border-subtle: #305564;
--bs-info-text-emphasis: #66d1ff;
--bs-light: #343a40;
--bs-light-bg-subtle: #2c3137;
--bs-light-border-subtle: #464b50;
--bs-light-text-emphasis: #e9ecef;
--bs-dark: #ffffff;
--bs-dark-bg-subtle: #f8f9fa;
--bs-dark-border-subtle: #e9ecef;
--bs-dark-text-emphasis: #121212;
--bs-border-color: #495057;
}
[data-bs-theme="dark"] .shadow {
box-shadow: var(--bs-box-shadow) !important;
}
[data-bs-theme="dark"] .shadow-sm {
box-shadow: var(--bs-box-shadow-sm) !important;
}
[data-bs-theme="dark"] .shadow-lg {
box-shadow: var(--bs-box-shadow-lg) !important;
}
:root {
--bs-primary: var(--primary);
--bs-secondary: var(--secondary);
--bs-body-bg: var(--background);
--bs-body-color: var(--text-color);
}
[data-bs-theme="dark"] {
--bs-primary: var(--primary);
--bs-secondary: var(--secondary);
--bs-body-bg: var(--background);
--bs-body-color: var(--text-color);
}
/* Dark Mode Toggle Button */
/* Hide the default checkbox */
#color-mode-toggle input[type=checkbox] {
height: 0;
width: 0;
visibility: hidden;
}
/* Style the switch */
#color-mode-toggle label {
cursor: pointer;
width: 70px;
height: 30px;
background: grey;
display: flex;
align-items: center;
justify-content: space-between;
border-radius: 30px;
position: relative;
padding: 5px 15px;
box-shadow: inset 0 0 5px rgba(0, 0, 0, 0.3);
}
/* The moving toggle circle */
#color-mode-toggle label:after {
content: '';
position: absolute;
top: 5px;
left: 5px;
width: 20px;
height: 20px;
background: white;
border-radius: 50%;
transition: 0.3s;
}
/* Sun and Moon Icons */
.icon {
font-size: 15px;
position: absolute;
transition: 0.3s;
}
/* Position Sun on the left */
.sun {
left: 10px;
/* color: var(--bs-dark) */
color: var(--sun-color);
}
/* Position Moon on the right */
.moon {
right: 10px;
/* color: var(--bs-light); */
color: var(--sun-color);
}
/* Move the toggle circle when checked */
#color-mode-toggle input:checked + label {
background: var(--bs-light);
}
#color-mode-toggle input:checked + label:after {
left: calc(100% - 25px);
background: var(--bs-dark);
}
/* Hide moon when in dark mode */
#color-mode-toggle input:checked + label .sun {
opacity: 100;
}
#color-mode-toggle input:checked + label .moon {
opacity: 0;
}
/* Hide sun when in light mode */
#color-mode-toggle input:not(:checked) + label .moon {
opacity: 100;
}
#color-mode-toggle input:not(:checked) + label .sun {
opacity: 0;
}
.line-number {
display: inline-block;
width: 30px;
text-align: right;
margin-right: 10px;
color: #888;
}

93
app/tasks.py Normal file
View File

@ -0,0 +1,93 @@
from celery import Celery
from app.models import Scraper
import redis
from datetime import timedelta
from flask import current_app
def create_celery():
celery = Celery('tasks', broker='redis://localhost:6379/0')
celery.conf.update(
task_serializer='json',
accept_content=['json'],
result_serializer='json',
timezone='UTC'
)
return celery
def init_celery(app):
"""Initialize Celery with Flask app context"""
celery = create_celery()
celery.conf.update(app.config)
class ContextTask(celery.Task):
def __call__(self, *args, **kwargs):
with app.app_context():
return self.run(*args, **kwargs)
celery.Task = ContextTask
return celery
celery = create_celery() # This will be initialized properly in app/__init__.py
def get_redis():
return redis.StrictRedis(
host='localhost',
port=6379,
db=0,
decode_responses=True
)
@celery.task
def start_scraping_task(faction_id, fetch_interval, run_interval, config_dict):
"""
Start scraping task with serializable parameters
Args:
faction_id: ID of the faction to scrape
fetch_interval: Interval between fetches in seconds
run_interval: How long to run the scraper in days
config_dict: Dictionary containing configuration
"""
try:
redis_client = get_redis()
# Set current faction ID at task start
redis_client.set("current_faction_id", str(faction_id))
scraper = Scraper(
faction_id=faction_id,
fetch_interval=int(fetch_interval),
run_interval=int(run_interval),
config=config_dict
)
scraper.start_scraping()
return {"status": "success"}
except Exception as e:
# Clean up Redis state on error
redis_client = get_redis()
redis_client.delete("current_faction_id")
return {"status": "error", "message": str(e)}
@celery.task
def stop_scraping_task(faction_id):
"""Stop scraping task and clean up Redis state"""
try:
redis_client = get_redis()
# Clean up Redis state
redis_client.hset(f"scraper:{faction_id}", "scraping_active", "0")
redis_client.delete(f"scraper:{faction_id}")
# Clean up current_faction_id if it matches
current_id = redis_client.get("current_faction_id")
if current_id and current_id == str(faction_id):
redis_client.delete("current_faction_id")
# Revoke any running tasks for this faction
celery.control.revoke(
celery.current_task.request.id,
terminate=True,
signal='SIGTERM'
)
return {"status": "success", "message": f"Stopped scraping for faction {faction_id}"}
except Exception as e:
return {"status": "error", "message": str(e)}

100
app/templates/analyze.html Normal file
View File

@ -0,0 +1,100 @@
{% extends 'base.html' %}
{% block content %}
<section class="container-fluid d-flex justify-content-center">
<div class="container-md my-5 mb-3 mx-2 shadow-lg p-4">
<div class="container-sm">
<div class="row">
<div class="col">
<h2>User Activity Distribution</h2>
</div>
</div>
<div class="row">
<div class="col">
<form method="POST" action="{{ url_for('views.analyze') }}">
<!-- Dropdown for selecting data file -->
<label for="data_file" class="form-label">Choose Data File:</label>
<select name="data_file" id="data_file" class="form-select">
{% if data_files %}
{% for file in data_files %}
{{ file }}
{{ selected_file }}
<option value="{{ file }}" {% if file == selected_file %}selected{% endif %}>{{ file.split('/')[-1] }}</option>
{% endfor %}
{% else %}
<option disabled>No CSV files found</option>
{% endif %}
</select>
<!-- Analysis Selection Table -->
<label for="analyses" class="form-label">Select Analyses:</label>
<table id="analysesTable" class="table table-bordered table-striped">
<thead>
<tr>
<th width="2%"><input type="checkbox" id="checkAllAnalyses" class="form-check-input" onclick="checkAllCheckboxes('analysesTable', 'checkAllAnalyses')"></th>
<th>Analysis Name</th>
<th>Description</th>
</tr>
</thead>
<tbody>
{% if analyses %}
{% for analysis in analyses %}
<tr>
<td>
<input class="form-check-input" type="checkbox" name="analyses" value="{{ analysis.name }}"
{% if analysis.name in selected_analyses %}checked{% endif %}>
</td>
<td>{{ analysis.name }}</td>
<td>{{ analysis.description }}</td>
</tr>
{% endfor %}
{% else %}
<tr>
<td colspan="3" class="text-center">No analyses available</td>
</tr>
{% endif %}
</tbody>
</table>
<button type="submit" class="btn btn-primary mt-3">Run Analyses</button>
</form>
</div>
</div>
{% include 'includes/error.html' %}
</div>
</div>
</section>
{% if plot_url %}
<section class="container-fluid d-flex justify-content-center">
<div class="container-md my-1 mx-2 shadow-lg p-4">
<div class="container-sm">
<div class="row mt-4">
<div class="col">
<h4>Selected File: {{ selected_file.split('/')[-1] }}</h4>
<img src="{{ plot_url }}" class="img-fluid rounded shadow" alt="User Activity Distribution">
</div>
</div>
</div>
</div>
</section>
{% endif %}
{% if results %}
{% for analysis_name, result in results.items() %}
<section class="container-fluid d-flex justify-content-center">
<div class="container-md my-2 mx-2 shadow p-4 pt-0">
<div class="container-sm">
<div class="results mt-4">
<h3>{{ analysis_name }}</h3>
<div class="analysis-output">
{{ result | safe }} <!-- This allows HTML output -->
</div>
</div>
</div>
</div>
</section>
{% endfor %}
{% endif %}
{% endblock %}

32
app/templates/base.html Normal file
View File

@ -0,0 +1,32 @@
<!-- app/templates/layouts/base.html -->
<!DOCTYPE html>
<html lang="en">
<head>
{% block head %}
<meta charset="UTF-8">
<title>TornActivityTracker{% block title %}{% endblock %}</title>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
{% block styles %}
{{ bootstrap.load_css() }}
<link rel="stylesheet" href="{{url_for('static', filename='style.css')}}">
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap-icons/font/bootstrap-icons.css">
{% endblock %}
{% endblock %}
</head>
<body>
<header>
{% include 'includes/navigation.html' %}
</header>
<main>
{% block content %}
{% endblock %}
</main>
<footer>
{% include 'includes/footer.html' %}
</footer>
{% block scripts %}
{% include 'includes/scripts.html' %}
{% endblock %}
</body>
</html>

View File

@ -0,0 +1,102 @@
{% extends 'base.html' %}
{% block content %}
<section class="container-fluid d-flex justify-content-center">
<div class="container-md my-5 mx-2 shadow-lg p-4 ">
<div class="container-sm">
<div class="row">
<div class="col">
<h2>Data Files</h2>
</div>
<div class="col text-end">
<div class="btn-group btn-group-sm">
<button class="btn btn-warning" onclick="deleteSelectedFiles()">Delete Selected Files</button>
<button class="btn btn-success" onclick="downloadSelectedFiles()">Download Selected Files</button>
</div>
</div>
</div>
</div>
<table id="dataFilesTable" class="table table-striped table-bordered table-hover">
<thead>
<tr>
<th width="2%"><input type="checkbox" class="form-check-input" id="checkAllData" onclick="checkAllCheckboxes('dataFilesTable', 'checkAllData')"></th>
<th onclick="sortTable(1, 'dataFilesTable')">File Name</th>
<th onclick="sortTable(2, 'dataFilesTable')">Last Modified</th>
<th onclick="sortTable(3, 'dataFilesTable')">Created</th>
<th onclick="sortTable(4, 'dataFilesTable')">Size</th>
<th>Action</th>
<th>Status</th>
</tr>
</thead>
<tbody>
{% for file in files.data %}
<tr>
<td><input type="checkbox" name="fileCheckbox" class="form-check-input" value="{{ url_for('download_data_file', filename=file.name_display) }}"{{ ' disabled' if file.active }}></td>
<td><a href="{{ url_for('download_data_file', filename=file.name_display) }}" target="_blank">{{ file.name_display }}</a></td>
<td>{{ file.last_modified | datetimeformat }}</td>
<td>{{ file.created | datetimeformat }}</td>
<td>{{ file.size }}</td>
<td>
<button class="btn btn-sm btn-warning" onclick="deleteFiles(['{{ file.name }}'])"{{ ' disabled' if file.active }}>Delete</button>
</td>
<td>
<span id="status-{{ file.name_display }}" class="badge {{ 'bg-danger' if file.active else 'bg-success' }}">
{{ 'In Use' if file.active else 'Available' }}
</span>
</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
</section>
<section class="container-fluid d-flex justify-content-center">
<div class="container-md my-5 mx-2 shadow-lg p-4 ">
<div class="container-sm">
<div class="row">
<div class="col">
<h2>Log Files</h2>
</div>
<div class="col">
<div class="btn-group btn-group-sm">
<button class="btn btn-warning" onclick="deleteSelectedFiles()">Delete Selected Files</button>
<button class="btn btn-success" onclick="downloadSelectedFiles()">Download Selected Files</button>
</div>
</div>
</div>
</div>
<table id="logFilesTable" class="table table-striped table-bordered table-hover">
<thead>
<tr>
<th width="2%"><input type="checkbox" id="checkAllLog" class="form-check-input" onclick="checkAllCheckboxes('logFilesTable', 'checkAllLog')"></th>
<th onclick="sortTable(1, 'logFilesTable')">File Name</th>
<th onclick="sortTable(2, 'logFilesTable')">Last Modified</th>
<th onclick="sortTable(3, 'logFilesTable')">Created</th>
<th onclick="sortTable(4, 'logFilesTable')">Size</th>
<th>Action</th>
<th>Status</th>
</tr>
</thead>
<tbody>
{% for file in files.log %}
<tr>
<td><input type="checkbox" name="fileCheckbox" class="form-check-input" value="{{ url_for('download_log_file', filename=file.name_display) }}"{{ ' disabled' if file.active }}></td>
<td><a href="{{ url_for('download_log_file', filename=file.name_display) }}" target="_blank">{{ file.name_display }}</a></td>
<td>{{ file.last_modified | datetimeformat }}</td>
<td>{{ file.created | datetimeformat }}</td>
<td>{{ file.size }}</td>
<td>
<button class="btn btn-sm btn-warning" onclick="deleteFiles(['{{ file.name }}'])"{{ ' disabled' if file.active }}>Delete</button>
</td>
<td>
<span id="status-{{ file.name_display }}" class="badge {{ 'bg-danger' if file.active else 'bg-success' }}">
{{ 'In Use' if file.active else 'Available' }}
</span>
</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
</section>
<script src="{{url_for('.static', filename='download_results.js')}}"></script>
{% endblock %}

View File

@ -0,0 +1,6 @@
{% if error %}
<div class="alert alert-danger alert-dismissible fade show mt-3" role="alert">
<strong>Error:</strong> {{ error }}
<button type="button" class="btn-close" data-bs-dismiss="alert" aria-label="Close"></button>
</div>
{% endif %}

View File

View File

@ -0,0 +1,39 @@
<nav class="navbar navbar-nav navbar-expand-md bg-primary">
<div class="container-fluid">
<a class="navbar-brand" href="/">{{ main_config.APP_TITLE }}</a>
{% from 'bootstrap4/nav.html' import render_nav_item %}
{{ render_nav_item('views.analyze', 'Data Visualization') }}
{{ render_nav_item('download_results', 'Files') }}
{{ render_nav_item('log_viewer', 'Logs') }}
<div class="d-flex" id="color-mode-toggle">
<input type="checkbox" id="bd-theme" />
<label for="bd-theme">
<span class="icon sun"><i class="bi bi-brightness-high"></i></span>
<span class="icon moon"><i class="bi bi-moon-stars"></i></span>
</label>
</div>
</div>
</nav>
<div id="status_container" class="container-fluid d-flex justify-content-center">
<div class="container-md my-1 shadow p-4 pb-0 m-1 w-50" id="status_badges">
<div id="loading_indicator" class="alert alert-info">Loading...</div>
<div id="status_content">
<div class="row justify-content-center">
<div class="col col-6 p-1">
<div id="activity_indicator" class="alert alert-danger fw-bolder">Inactive</div>
</div>
<div class="col col-6 p-1">
<div id="server_time" class="alert alert-primary">Server Time (TCT):</div>
</div>
</div>
<div class="row justify-content-center">
<div class="col col-6 p-1">
<div id="end_time" class="alert alert-info">Running until:</div>
</div>
<div class="col p-1">
<div id="time-left" class="alert alert-info">Time Left:</div>
</div>
</div>
</div>
</div>
</div>

View File

@ -0,0 +1,3 @@
{{ bootstrap.load_js() }}
<script src="{{url_for('static', filename='color_mode.js')}}"></script>
<script type="module" src="{{ url_for('static', filename='common.js') }}"></script>

34
app/templates/index.html Normal file
View File

@ -0,0 +1,34 @@
{% extends 'base.html' %}
{% block content %}
<section id="scrapingFormContainer" class="container-fluid d-flex justify-content-center">
<div class="container-md my-5 mx-2 shadow-lg p-4 ">
<div class="row">
<div class="col">
<h2>Scraper</h2>
</div>
<div class="col text-end">
</div>
</div>
<form id="scrapingForm" method="POST" action="{{ url_for('start_scraping') }}">
{{ form.hidden_tag() }}
<div class="form-group">
{{ form.faction_id.label(class="form-control-label") }}
{{ form.faction_id(class="form-control") }}
</div>
<div class="form-group">
{{ form.fetch_interval.label(class="form-control-label") }}
{{ form.fetch_interval(class="form-control") }}
</div>
<div class="form-group">
{{ form.run_interval.label(class="form-control-label") }}
{{ form.run_interval(class="form-control") }}
</div>
</form>
<div class="btn-group btn-group m-2" role="group">
{{ form.submit(class="btn btn-success", type="submit", id="startButton", form="scrapingForm") }}
<button class="btn btn-warning" type="button" id="stopButton">Stop</button>
</div>
</div>
</section>
<script type="module" src="{{url_for('static', filename='index.js')}}"></script>
{% endblock content %}

View File

@ -0,0 +1,22 @@
{% extends 'base.html' %}
{% block content %}
<section id="resultsContainer" class="container-fluid d-flex justify-content-center">
<div class="container-md my-5 mx-2 shadow-lg p-4" style="height: 500px;">
<div class="row">
<div class="col-8">
<h2>Logs</h2>
<pre id="logs" class="pre-scrollable" style="height: 350px; overflow:scroll;"><code></code></pre>
<div class="btn-group btn-group-sm">
<button class="btn btn-primary" id="prevPage">Previous</button>
<button class="btn btn-primary" id="pageInfo" disabled>Page 1 of 1</button>
<button class="btn btn-primary" id="nextPage">Next</button>
</div>
</div>
<div class="col">
<h2>Stats</h2>
</div>
</div>
</div>
</section>
<script src="{{url_for('static', filename='log_viewer.js')}}"></script>
{% endblock content %}

82
app/util.py Normal file
View File

@ -0,0 +1,82 @@
import os
import zipfile
from datetime import datetime, timedelta
from flask import current_app
from app.config import load_config
def create_zip(file_paths, zip_name, app):
temp_dir = os.path.abspath(app.config['TEMP']['TEMP_DIR'])
zip_path = os.path.join(temp_dir, zip_name)
with zipfile.ZipFile(zip_path, 'w') as zipf:
for file_path in file_paths:
zipf.write(file_path, os.path.basename(file_path))
print(f"Zip file created: {zip_path}")
return zip_path
def delete_old_zips():
temp_dir = os.path.abspath(current_app.config['TEMP']['TEMP_DIR'])
now = datetime.now()
for filename in os.listdir(temp_dir):
if filename.endswith('.zip'):
file_path = os.path.join(temp_dir, filename)
if now - datetime.fromtimestamp(os.path.getmtime(file_path)) > timedelta(hours=1):
os.remove(file_path)
def tail(filename, n):
stat = os.stat(filename)
n = int(n)
if stat.st_size == 0 or n == 0:
yield ''
return
page_size = int(current_app.config['LOGGING']['TAIL_PAGE_SIZE'])
offsets = []
count = _n = n if n >= 0 else -n
last_byte_read = last_nl_byte = starting_offset = stat.st_size - 1
with open(filename, 'r') as f:
while count > 0:
starting_byte = last_byte_read - page_size
if last_byte_read == 0:
offsets.append(0)
break
elif starting_byte < 0:
f.seek(0)
text = f.read(last_byte_read)
else:
f.seek(starting_byte)
text = f.read(page_size)
for i in range(-1, -1*len(text)-1, -1):
last_byte_read -= 1
if text[i] == '\n':
last_nl_byte = last_byte_read
starting_offset = last_nl_byte + 1
offsets.append(starting_offset)
count -= 1
offsets = offsets[len(offsets)-_n:]
offsets.reverse()
with open(filename, 'r') as f:
for i, offset in enumerate(offsets):
f.seek(offset)
if i == len(offsets) - 1:
yield f.read()
else:
bytes_to_read = offsets[i+1] - offset
yield f.read(bytes_to_read)
def get_size(path):
size = os.path.getsize(path)
if size < 1024:
return f"{size} bytes"
elif size < pow(1024,2):
return f"{round(size/1024, 2)} KB"
elif size < pow(1024,3):
return f"{round(size/(pow(1024,2)), 2)} MB"
elif size < pow(1024,4):
return f"{round(size/(pow(1024,3)), 2)} GB"

146
app/views.py Normal file
View File

@ -0,0 +1,146 @@
import os
import glob
from flask import render_template, Blueprint, current_app, request
from app.tasks import get_redis
from app.forms import ScrapingForm
from app.util import get_size
from app.config import load_config
from app.api import scraper as scraper
from app.analysis import load_data, load_analysis_modules
from datetime import datetime
views_bp = Blueprint("views", __name__)
def sizeof_fmt(num, suffix="B"):
"""Convert bytes to human readable format"""
for unit in ["", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi"]:
if abs(num) < 1024.0:
return f"{num:3.1f} {unit}{suffix}"
num /= 1024.0
return f"{num:.1f} Yi{suffix}"
def register_views(app):
@app.route('/')
def index():
form = ScrapingForm()
return render_template('index.html', form=form)
@app.route('/results')
def results():
return render_template('results.html')
@app.route('/log_viewer')
def log_viewer():
return render_template('log_viewer.html')
@app.route('/download_results')
def download_results():
# Get the current active log file and data file from Redis and app config
redis_client = get_redis()
current_faction_id = redis_client.get("current_faction_id")
active_data_file = None
if current_faction_id:
active_data_file = redis_client.hget(f"scraper:{current_faction_id}", "data_file_name")
active_log_file = app.config['LOG_FILE_NAME']
def get_file_info(file_path, file_type='data'):
stats = os.stat(file_path)
name = os.path.basename(file_path)
# Determine if file is active
is_active = False
if file_type == 'data' and active_data_file:
is_active = os.path.abspath(file_path) == os.path.abspath(active_data_file)
elif file_type == 'log' and active_log_file:
is_active = os.path.basename(file_path) == os.path.basename(active_log_file)
return {
'name': file_path, # Full path for internal use
'name_display': name, # Just filename for display
'last_modified': stats.st_mtime, # Send timestamp instead of datetime
'created': stats.st_ctime, # Send timestamp instead of datetime
'size': sizeof_fmt(stats.st_size),
'active': is_active
}
data_files = []
log_files = []
# Get data files
data_dir = os.path.abspath(app.config['DATA']['DATA_DIR'])
if os.path.exists(data_dir):
for file in glob.glob(os.path.join(data_dir, "*.csv")):
data_files.append(get_file_info(file, 'data'))
# Get log files
log_dir = os.path.abspath(app.config['LOGGING']['LOG_DIR'])
if os.path.exists(log_dir):
for file in glob.glob(os.path.join(log_dir, "*.log")):
log_files.append(get_file_info(file, 'log'))
# Sort files by modification time, newest first
data_files.sort(key=lambda x: x['last_modified'], reverse=True)
log_files.sort(key=lambda x: x['last_modified'], reverse=True)
files = {
'data': data_files,
'log': log_files
}
return render_template('download_results.html', files=files)
views_bp = Blueprint("views", __name__)
@views_bp.route("/analyze", methods=["GET", "POST"])
def analyze():
analysis_modules = load_analysis_modules() # Load available analyses
data_dir = current_app.config.get("DATA", {}).get("DATA_DIR")
selected_file = None
selected_analyses = []
# Find all available CSV files
data_files = sorted(
glob.glob(os.path.join(data_dir, "*.csv")),
key=os.path.getmtime,
reverse=True
) if data_dir else []
context = {
"data_files": data_files,
"analyses": analysis_modules,
"selected_file": selected_file,
"selected_analyses": selected_analyses
}
if request.method == "POST":
selected_analyses = request.form.getlist("analyses")
selected_file = request.form.get("data_file")
if not selected_file:
context["error"] = "No file selected."
return render_template("analyze.html", **context)
df = load_data(selected_file)
results = {}
for analysis in analysis_modules:
if analysis.name in selected_analyses:
results[analysis.name] = analysis.execute(df) # Some may return HTML
context["results"] = results
return render_template("analyze.html", **context)
@views_bp.route('/server_time')
def server_time():
current_time = datetime.now(datetime.timezone.utc).strftime('%Y-%m-%d %H:%M:%S')
return {'server_time': current_time}
app.register_blueprint(views_bp)

0
data/.gitkeep Normal file
View File

34
docker-compose.yml Normal file
View File

@ -0,0 +1,34 @@
version: '3.8'
services:
redis:
image: redis:alpine
restart: always
volumes:
- .:/app/redis
app:
build: .
container_name: app
restart: always
command: sh ./entrypoint.sh
depends_on:
- redis
volumes:
- .:/app
expose:
- 8000
env_file:
- ./.env
nginx:
build:
context: .
dockerfile: nginx/Dockerfile
container_name: nginx
ports:
- "80:80"
volumes:
- .:/app/nginx
depends_on:
- app

BIN
dump.rdb Normal file

Binary file not shown.

18
entrypoint.sh Normal file
View File

@ -0,0 +1,18 @@
#!/bin/sh
if [ "$BROKER" = "redis" ]
then
echo "Waiting for redis..."
while ! nc -zv $BROKER_HOST $BROKER_PORT; do
sleep 10
done
echo "Redis started"
fi
echo "Starting app..."
cd /app
rm -f celery.pid
touch celery.pid
/home/app/.local/bin/uwsgi --ini uwsgi.ini
exec "$@"

34
example_config.ini Normal file
View File

@ -0,0 +1,34 @@
# All main config options will be passed to template engine
[MAIN]
APP_TITLE = 'Torn User Activity Grabber'
[DEFAULT]
SECRET_KEY = your_secret_key
API_KEY = your_api_key
[LOGGING]
VIEW_MAX_LINES = 500
VIEW_PAGE_LINES = 50
TAIL_PAGE_SIZE = 100
LOG_DIR = log/
[BOOTSTRAP]
BOOTSTRAP_SERVE_LOCAL = False
BOOTSTRAP_BTN_STYLE = 'primary'
BOOTSTRAP_BTN_SIZE = 'sm'
BOOTSTRAP_ICON_SIZE = '1em'
BOOTSTRAP_ICON_COLOR = None
BOOTSTRAP_BOOTSWATCH_THEME = litera
BOOTSTRAP_MSG_CATEGORY = 'primary'
BOOTSTRAP_TABLE_VIEW_TITLE = 'View'
BOOTSTRAP_TABLE_EDIT_TITLE = 'Edit'
BOOTSTRAP_TABLE_DELETE_TITLE = 'Delete'
BOOTSTRAP_TABLE_NEW_TITLE = 'New'
BOOTSTRAP_FORM_GROUP_CLASSES = 'mb-3'
BOOTSTRAP_FORM_INLINE_CLASSES = 'row row-cols-lg-auto g-3 align-items-center'
[DATA]
DATA_DIR = data/
[TEMP]
TEMP_DIR = temp/

20
fly.toml Normal file
View File

@ -0,0 +1,20 @@
# fly.toml app configuration file generated for tornactivitytracker on 2025-02-11T02:59:23+01:00
#
# See https://fly.io/docs/reference/configuration/ for information about how to use this file.
#
app = 'tornactivitytracker'
primary_region = 'fra'
[build]
[http_service]
internal_port = 8080
force_https = true
auto_stop_machines = 'stop'
auto_start_machines = true
min_machines_running = 0
processes = ['app']
[[vm]]
size = 'shared-cpu-2x'

7
howToTest.txt Normal file
View File

@ -0,0 +1,7 @@
celery -A app.celery worker --loglevel=info
redis-server
run.py
python stop_scraping.py

0
log/.gitkeep Normal file
View File

4
nginx/Dockerfile Normal file
View File

@ -0,0 +1,4 @@
FROM nginx:stable-alpine
COPY ./nginx/nginx.conf /etc/nginx/conf.d/default.conf
EXPOSE 80

7
nginx/nginx.conf Normal file
View File

@ -0,0 +1,7 @@
server {
listen 80;
location / {
include uwsgi_params;
uwsgi_pass app:8000;
}
}

15
requirements.in Normal file
View File

@ -0,0 +1,15 @@
# requirements.in
Flask
Flask-WTF
Bootstrap-Flask
pandas
requests
matplotlib
seaborn
configparser
plotly
configobj
redis
celery
gunicorn
uWSGI

1
requirements.sh Normal file
View File

@ -0,0 +1 @@
pip-compile requirements.in > requirements.txt

135
requirements.txt Normal file
View File

@ -0,0 +1,135 @@
#
# This file is autogenerated by pip-compile with Python 3.13
# by the following command:
#
# pip-compile
#
amqp==5.3.1
# via kombu
billiard==4.2.1
# via celery
blinker==1.9.0
# via flask
bootstrap-flask==2.4.1
# via -r requirements.in
celery==5.4.0
# via -r requirements.in
certifi==2025.1.31
# via requests
charset-normalizer==3.4.1
# via requests
click==8.1.8
# via
# celery
# click-didyoumean
# click-plugins
# click-repl
# flask
click-didyoumean==0.3.1
# via celery
click-plugins==1.1.1
# via celery
click-repl==0.3.0
# via celery
configobj==5.0.9
# via -r requirements.in
configparser==7.1.0
# via -r requirements.in
contourpy==1.3.1
# via matplotlib
cycler==0.12.1
# via matplotlib
flask==3.1.0
# via
# -r requirements.in
# bootstrap-flask
# flask-wtf
flask-wtf==1.2.2
# via -r requirements.in
fonttools==4.56.0
# via matplotlib
gunicorn==23.0.0
# via -r requirements.in
idna==3.10
# via requests
itsdangerous==2.2.0
# via
# flask
# flask-wtf
jinja2==3.1.5
# via flask
kiwisolver==1.4.8
# via matplotlib
kombu==5.4.2
# via celery
markupsafe==3.0.2
# via
# jinja2
# werkzeug
# wtforms
matplotlib==3.10.0
# via
# -r requirements.in
# seaborn
narwhals==1.27.1
# via plotly
numpy==2.2.3
# via
# contourpy
# matplotlib
# pandas
# seaborn
packaging==24.2
# via
# gunicorn
# matplotlib
# plotly
pandas==2.2.3
# via
# -r requirements.in
# seaborn
pillow==11.1.0
# via matplotlib
plotly==6.0.0
# via -r requirements.in
prompt-toolkit==3.0.50
# via click-repl
pyparsing==3.2.1
# via matplotlib
python-dateutil==2.9.0.post0
# via
# celery
# matplotlib
# pandas
pytz==2025.1
# via pandas
redis==5.2.1
# via -r requirements.in
requests==2.32.3
# via -r requirements.in
seaborn==0.13.2
# via -r requirements.in
six==1.17.0
# via python-dateutil
tzdata==2025.1
# via
# celery
# kombu
# pandas
urllib3==2.3.0
# via requests
uwsgi==2.0.28
# via -r requirements.in
vine==5.1.0
# via
# amqp
# celery
# kombu
wcwidth==0.2.13
# via prompt-toolkit
werkzeug==3.1.3
# via flask
wtforms==3.2.1
# via
# bootstrap-flask
# flask-wtf

6
run.py Normal file
View File

@ -0,0 +1,6 @@
from app import create_app
app = create_app()
if __name__ == '__main__':
app.run(debug=True)

50
stop_scraping.py Normal file
View File

@ -0,0 +1,50 @@
import redis
import argparse
def get_redis():
return redis.StrictRedis(
host='localhost',
port=6379,
db=0,
decode_responses=True
)
def stop_scraping(flush=False, force=False):
redis_client = get_redis()
if flush:
redis_client.flushall()
print("Flushed all Redis data")
return True
current_faction_id = redis_client.get("current_faction_id")
if not current_faction_id:
print("No active scraping session found.")
return False if not force else True
redis_client.hset(f"scraper:{current_faction_id}", "scraping_active", "0")
print(f"Sent stop signal to scraping process for faction {current_faction_id}")
return True
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Stop the Torn Activity Tracker scraping process.')
parser.add_argument('--force', action='store_true', help='Force stop even if no active session is found')
parser.add_argument('--flush', action='store_true', help='Flush all Redis data (WARNING: This will clear ALL Redis data)')
args = parser.parse_args()
if args.flush:
if input("WARNING: This will delete ALL Redis data. Are you sure? (y/N) ").lower() != 'y':
print("Operation cancelled.")
exit(0)
success = stop_scraping(flush=args.flush, force=args.force)
if not success and args.force:
print("Forcing stop for all potential scraping processes...")
redis_client = get_redis()
# Get all scraper keys
for key in redis_client.keys("scraper:*"):
redis_client.hset(key, "scraping_active", "0")
print("Sent stop signal to all potential scraping processes.")

0
temp/.gitkeep Normal file
View File

13
uwsgi.ini Normal file
View File

@ -0,0 +1,13 @@
[uwsgi]
module = run:app
callable = app
wsgi-file = run.py
master = true
processes = 4
smart-attach-daemon=./celery.pid /home/app/.local/bin/celery -A app.celery_worker worker --loglevel=info --pidfile=./celery.pid
socket = :8000
vacuum = true
die-on-term = true