diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0c8d3ac --- /dev/null +++ b/.gitignore @@ -0,0 +1,199 @@ +# Created by https://www.toptal.com/developers/gitignore/api/flask +# Edit at https://www.toptal.com/developers/gitignore?templates=flask + +### Flask ### +instance/* +!instance/.gitignore +.webassets-cache +.env + +### Flask.Python Stack ### +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +# End of https://www.toptal.com/developers/gitignore/api/flask + +# Created by https://www.toptal.com/developers/gitignore/api/visualstudiocode +# Edit at https://www.toptal.com/developers/gitignore?templates=visualstudiocode + +### VisualStudioCode ### +.vscode/* +!.vscode/settings.json +!.vscode/tasks.json +!.vscode/launch.json +!.vscode/extensions.json +!.vscode/*.code-snippets + +# Local History for Visual Studio Code +.history/ + +# Built Visual Studio Code Extensions +*.vsix + +### VisualStudioCode Patch ### +# Ignore all local history of files +.history +.ionide + +# End of https://www.toptal.com/developers/gitignore/api/visualstudiocode + +# Exclude data files +*.csv +*.zip +config.ini \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..3638426 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,21 @@ +FROM python:3.9-alpine + +ENV PATH="$PATH:/home/app/.local/bin" +RUN apk update \ + && apk add netcat-openbsd \ + && apk add --virtual py3-pip build-base \ + && apk add --virtual python3-dev gcc libc-dev linux-headers pcre-dev + +WORKDIR /app +COPY . /app +RUN chmod +x /app/entrypoint.sh + +RUN adduser -D app +RUN echo 'permit app as root' > /etc/doas.conf + +USER app + +RUN pip install --upgrade pip \ + && pip install -r requirements.txt + +EXPOSE 8000 diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..80b2e74 --- /dev/null +++ b/LICENSE @@ -0,0 +1,426 @@ +Attribution-ShareAlike 4.0 International + +======================================================================= + +Creative Commons Corporation ("Creative Commons") is not a law firm and +does not provide legal services or legal advice. Distribution of +Creative Commons public licenses does not create a lawyer-client or +other relationship. Creative Commons makes its licenses and related +information available on an "as-is" basis. Creative Commons gives no +warranties regarding its licenses, any material licensed under their +terms and conditions, or any related information. Creative Commons +disclaims all liability for damages resulting from their use to the +fullest extent possible. + +Using Creative Commons Public Licenses + +Creative Commons public licenses provide a standard set of terms and +conditions that creators and other rights holders may use to share +original works of authorship and other material subject to copyright +and certain other rights specified in the public license below. The +following considerations are for informational purposes only, are not +exhaustive, and do not form part of our licenses. + + Considerations for licensors: Our public licenses are + intended for use by those authorized to give the public + permission to use material in ways otherwise restricted by + copyright and certain other rights. Our licenses are + irrevocable. Licensors should read and understand the terms + and conditions of the license they choose before applying it. + Licensors should also secure all rights necessary before + applying our licenses so that the public can reuse the + material as expected. Licensors should clearly mark any + material not subject to the license. This includes other CC- + licensed material, or material used under an exception or + limitation to copyright. More considerations for licensors: + wiki.creativecommons.org/Considerations_for_licensors + + Considerations for the public: By using one of our public + licenses, a licensor grants the public permission to use the + licensed material under specified terms and conditions. If + the licensor's permission is not necessary for any reason--for + example, because of any applicable exception or limitation to + copyright--then that use is not regulated by the license. Our + licenses grant only permissions under copyright and certain + other rights that a licensor has authority to grant. Use of + the licensed material may still be restricted for other + reasons, including because others have copyright or other + rights in the material. A licensor may make special requests, + such as asking that all changes be marked or described. + Although not required by our licenses, you are encouraged to + respect those requests where reasonable. More considerations + for the public: + wiki.creativecommons.org/Considerations_for_licensees + +======================================================================= + +Creative Commons Attribution-ShareAlike 4.0 International Public +License + +By exercising the Licensed Rights (defined below), You accept and agree +to be bound by the terms and conditions of this Creative Commons +Attribution-ShareAlike 4.0 International Public License ("Public +License"). To the extent this Public License may be interpreted as a +contract, You are granted the Licensed Rights in consideration of Your +acceptance of these terms and conditions, and the Licensor grants You +such rights in consideration of benefits the Licensor receives from +making the Licensed Material available under these terms and +conditions. + + +Section 1 -- Definitions. + + a. Adapted Material means material subject to Copyright and Similar + Rights that is derived from or based upon the Licensed Material + and in which the Licensed Material is translated, altered, + arranged, transformed, or otherwise modified in a manner requiring + permission under the Copyright and Similar Rights held by the + Licensor. For purposes of this Public License, where the Licensed + Material is a musical work, performance, or sound recording, + Adapted Material is always produced where the Licensed Material is + synched in timed relation with a moving image. + + b. Adapter's License means the license You apply to Your Copyright + and Similar Rights in Your contributions to Adapted Material in + accordance with the terms and conditions of this Public License. + + c. BY-SA Compatible License means a license listed at + creativecommons.org/compatiblelicenses, approved by Creative + Commons as essentially the equivalent of this Public License. + + d. Copyright and Similar Rights means copyright and/or similar rights + closely related to copyright including, without limitation, + performance, broadcast, sound recording, and Sui Generis Database + Rights, without regard to how the rights are labeled or + categorized. For purposes of this Public License, the rights + specified in Section 2(b)(1)-(2) are not Copyright and Similar + Rights. + + e. Effective Technological Measures means those measures that, in the + absence of proper authority, may not be circumvented under laws + fulfilling obligations under Article 11 of the WIPO Copyright + Treaty adopted on December 20, 1996, and/or similar international + agreements. + + f. Exceptions and Limitations means fair use, fair dealing, and/or + any other exception or limitation to Copyright and Similar Rights + that applies to Your use of the Licensed Material. + + g. License Elements means the license attributes listed in the name + of a Creative Commons Public License. The License Elements of this + Public License are Attribution and ShareAlike. + + h. Licensed Material means the artistic or literary work, database, + or other material to which the Licensor applied this Public + License. + + i. Licensed Rights means the rights granted to You subject to the + terms and conditions of this Public License, which are limited to + all Copyright and Similar Rights that apply to Your use of the + Licensed Material and that the Licensor has authority to license. + + j. Licensor means the individual(s) or entity(ies) granting rights + under this Public License. + + k. Share means to provide material to the public by any means or + process that requires permission under the Licensed Rights, such + as reproduction, public display, public performance, distribution, + dissemination, communication, or importation, and to make material + available to the public including in ways that members of the + public may access the material from a place and at a time + individually chosen by them. + + l. Sui Generis Database Rights means rights other than copyright + resulting from Directive 96/9/EC of the European Parliament and of + the Council of 11 March 1996 on the legal protection of databases, + as amended and/or succeeded, as well as other essentially + equivalent rights anywhere in the world. + + m. You means the individual or entity exercising the Licensed Rights + under this Public License. Your has a corresponding meaning. + + +Section 2 -- Scope. + + a. License grant. + + 1. Subject to the terms and conditions of this Public License, + the Licensor hereby grants You a worldwide, royalty-free, + non-sublicensable, non-exclusive, irrevocable license to + exercise the Licensed Rights in the Licensed Material to: + + a. reproduce and Share the Licensed Material, in whole or + in part; and + + b. produce, reproduce, and Share Adapted Material. + + 2. Exceptions and Limitations. For the avoidance of doubt, where + Exceptions and Limitations apply to Your use, this Public + License does not apply, and You do not need to comply with + its terms and conditions. + + 3. Term. The term of this Public License is specified in Section + 6(a). + + 4. Media and formats; technical modifications allowed. The + Licensor authorizes You to exercise the Licensed Rights in + all media and formats whether now known or hereafter created, + and to make technical modifications necessary to do so. The + Licensor waives and/or agrees not to assert any right or + authority to forbid You from making technical modifications + necessary to exercise the Licensed Rights, including + technical modifications necessary to circumvent Effective + Technological Measures. For purposes of this Public License, + simply making modifications authorized by this Section 2(a) + (4) never produces Adapted Material. + + 5. Downstream recipients. + + a. Offer from the Licensor -- Licensed Material. Every + recipient of the Licensed Material automatically + receives an offer from the Licensor to exercise the + Licensed Rights under the terms and conditions of this + Public License. + + b. Additional offer from the Licensor -- Adapted Material. + Every recipient of Adapted Material from You + automatically receives an offer from the Licensor to + exercise the Licensed Rights in the Adapted Material + under the conditions of the Adapter's License You apply. + + c. No downstream restrictions. You may not offer or impose + any additional or different terms or conditions on, or + apply any Effective Technological Measures to, the + Licensed Material if doing so restricts exercise of the + Licensed Rights by any recipient of the Licensed + Material. + + 6. No endorsement. Nothing in this Public License constitutes or + may be construed as permission to assert or imply that You + are, or that Your use of the Licensed Material is, connected + with, or sponsored, endorsed, or granted official status by, + the Licensor or others designated to receive attribution as + provided in Section 3(a)(1)(A)(i). + + b. Other rights. + + 1. Moral rights, such as the right of integrity, are not + licensed under this Public License, nor are publicity, + privacy, and/or other similar personality rights; however, to + the extent possible, the Licensor waives and/or agrees not to + assert any such rights held by the Licensor to the limited + extent necessary to allow You to exercise the Licensed + Rights, but not otherwise. + + 2. Patent and trademark rights are not licensed under this + Public License. + + 3. To the extent possible, the Licensor waives any right to + collect royalties from You for the exercise of the Licensed + Rights, whether directly or through a collecting society + under any voluntary or waivable statutory or compulsory + licensing scheme. In all other cases the Licensor expressly + reserves any right to collect such royalties. + + +Section 3 -- License Conditions. + +Your exercise of the Licensed Rights is expressly made subject to the +following conditions. + + a. Attribution. + + 1. If You Share the Licensed Material (including in modified + form), You must: + + a. retain the following if it is supplied by the Licensor + with the Licensed Material: + + i. identification of the creator(s) of the Licensed + Material and any others designated to receive + attribution, in any reasonable manner requested by + the Licensor (including by pseudonym if + designated); + + ii. a copyright notice; + + iii. a notice that refers to this Public License; + + iv. a notice that refers to the disclaimer of + warranties; + + v. a URI or hyperlink to the Licensed Material to the + extent reasonably practicable; + + b. indicate if You modified the Licensed Material and + retain an indication of any previous modifications; and + + c. indicate the Licensed Material is licensed under this + Public License, and include the text of, or the URI or + hyperlink to, this Public License. + + 2. You may satisfy the conditions in Section 3(a)(1) in any + reasonable manner based on the medium, means, and context in + which You Share the Licensed Material. For example, it may be + reasonable to satisfy the conditions by providing a URI or + hyperlink to a resource that includes the required + information. + + 3. If requested by the Licensor, You must remove any of the + information required by Section 3(a)(1)(A) to the extent + reasonably practicable. + + b. ShareAlike. + + In addition to the conditions in Section 3(a), if You Share + Adapted Material You produce, the following conditions also apply. + + 1. The Adapter's License You apply must be a Creative Commons + license with the same License Elements, this version or + later, or a BY-SA Compatible License. + + 2. You must include the text of, or the URI or hyperlink to, the + Adapter's License You apply. You may satisfy this condition + in any reasonable manner based on the medium, means, and + context in which You Share Adapted Material. + + 3. You may not offer or impose any additional or different terms + or conditions on, or apply any Effective Technological + Measures to, Adapted Material that restrict exercise of the + rights granted under the Adapter's License You apply. + + +Section 4 -- Sui Generis Database Rights. + +Where the Licensed Rights include Sui Generis Database Rights that +apply to Your use of the Licensed Material: + + a. for the avoidance of doubt, Section 2(a)(1) grants You the right + to extract, reuse, reproduce, and Share all or a substantial + portion of the contents of the database; + + b. if You include all or a substantial portion of the database + contents in a database in which You have Sui Generis Database + Rights, then the database in which You have Sui Generis Database + Rights (but not its individual contents) is Adapted Material, + including for purposes of Section 3(b); and + + c. You must comply with the conditions in Section 3(a) if You Share + all or a substantial portion of the contents of the database. + +For the avoidance of doubt, this Section 4 supplements and does not +replace Your obligations under this Public License where the Licensed +Rights include other Copyright and Similar Rights. + + +Section 5 -- Disclaimer of Warranties and Limitation of Liability. + + a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE + EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS + AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF + ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS, + IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION, + WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR + PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS, + ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT + KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT + ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU. + + b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE + TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION, + NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT, + INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES, + COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR + USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN + ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR + DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR + IN PART, THIS LIMITATION MAY NOT APPLY TO YOU. + + c. The disclaimer of warranties and limitation of liability provided + above shall be interpreted in a manner that, to the extent + possible, most closely approximates an absolute disclaimer and + waiver of all liability. + + +Section 6 -- Term and Termination. + + a. This Public License applies for the term of the Copyright and + Similar Rights licensed here. However, if You fail to comply with + this Public License, then Your rights under this Public License + terminate automatically. + + b. Where Your right to use the Licensed Material has terminated under + Section 6(a), it reinstates: + + 1. automatically as of the date the violation is cured, provided + it is cured within 30 days of Your discovery of the + violation; or + + 2. upon express reinstatement by the Licensor. + + For the avoidance of doubt, this Section 6(b) does not affect any + right the Licensor may have to seek remedies for Your violations + of this Public License. + + c. For the avoidance of doubt, the Licensor may also offer the + Licensed Material under separate terms or conditions or stop + distributing the Licensed Material at any time; however, doing so + will not terminate this Public License. + + d. Sections 1, 5, 6, 7, and 8 survive termination of this Public + License. + + +Section 7 -- Other Terms and Conditions. CC BY-SA 4 + a. The Licensor shall not be bound by any additional or different + terms or conditions communicated by You unless expressly agreed. + + b. Any arrangements, understandings, or agreements regarding the + Licensed Material not stated herein are separate from and + independent of the terms and conditions of this Public License. + + +Section 8 -- Interpretation. + + a. For the avoidance of doubt, this Public License does not, and + shall not be interpreted to, reduce, limit, restrict, or impose + conditions on any use of the Licensed Material that could lawfully + be made without permission under this Public License. + + b. To the extent possible, if any provision of this Public License is + deemed unenforceable, it shall be automatically reformed to the + minimum extent necessary to make it enforceable. If the provision + cannot be reformed, it shall be severed from this Public License + without affecting the enforceability of the remaining terms and + conditions. + + c. No term or condition of this Public License will be waived and no + failure to comply consented to unless expressly agreed to by the + Licensor. + + d. Nothing in this Public License constitutes or may be interpreted + as a limitation upon, or waiver of, any privileges and immunities + that apply to the Licensor or You, including from the legal + processes of any jurisdiction or authority. + + +======================================================================= + +Creative Commons is not a party to its public +licenses. Notwithstanding, Creative Commons may elect to apply one of +its public licenses to material it publishes and in those instances +will be considered the “Licensor.” The text of the Creative Commons +public licenses is dedicated to the public domain under the CC0 Public +Domain Dedication. Except for the limited purpose of indicating that +material is shared under a Creative Commons public license or as +otherwise permitted by the Creative Commons policies published at +creativecommons.org/policies, Creative Commons does not authorize the +use of the trademark "Creative Commons" or any other trademark or logo +of Creative Commons without its prior written consent including, +without limitation, in connection with any unauthorized modifications +to any of its public licenses or any other arrangements, +understandings, or agreements concerning use of licensed material. For +the avoidance of doubt, this paragraph does not form part of the +public licenses. + +Creative Commons may be contacted at creativecommons.org. diff --git a/README.md b/README.md new file mode 100644 index 0000000..1b29f41 --- /dev/null +++ b/README.md @@ -0,0 +1,257 @@ +# Torn User Activity Tracker + +> [!WARNING] +> **Development is still in its early stages; do not put it to productive use!** + +## Features + +Multiple users control a single activity tracker using Torn's API. + +- Start and stop scraping user activity data +- View real-time logs +- Download data and log files +- View scraping results +- Plugin based analysis system +- Toggle between light and dark mode + +**Note:** Many features are not fully implemented yet, but the activity tracker/grabber works as intended. + +## Planned Features + +- Additional analyses plugins +- Selector for Torn API data to choose which data shall be tracked +- log viewer + +## Requirements + +- Python 3.8+ +- Flask +- Flask-Bootstrap +- Flask-WTF +- Pandas +- Requests +- Redis +- Celery +- uWSGI + +Redis currently has to run locally, but this will be changed in the future. See file tasks.py: + +```python +# tasks.py +def get_redis(): + return redis.StrictRedis( + host='localhost', + port=6379, + db=0, + decode_responses=True + ) +``` + +## Installation + +### Docker + +#### Prerequisites +- Docker +- Docker Compose + +#### Steps to Deploy + +1. Clone the repository: +```bash +git clone +cd TornActivityTracker +``` + +2. Configure environment variables: +- Copy the example .env file and modify if needed +```bash +cp .env.example .env +``` + +3. Build and start the containers: +```bash +docker-compose up -d --build +``` + +This will start: +- The main Flask application +- Redis for task queue management +- Nginx as reverse proxy + +The application will be available at `http://localhost:80` + +#### Maintenance + +To view logs: +```bash +docker-compose logs -f +``` + +To stop the application: +```bash +docker-compose down +``` + +To rebuild and restart: +```bash +docker-compose up -d --build +``` + +### Manual + +1. Clone the repository: + +```sh +git clone https://github.com/MichaelB7/TornActivityTracker.git +cd TornActivityTracker +``` + +2. Create a virtual environment and activate it: + +```sh +python3 -m venv venv +source venv/bin/activate # On Windows use: .\venv\Scripts\activate +``` + +3. Install the required packages: + +```sh +pip install -r requirements.txt +``` + +4. Start Redis server locally: +```sh +redis-server +``` + +5. Set up your configuration: +Create a `config.ini` file in the root directory by copying `example_config.ini`: + +```sh +cp example_config.ini config.ini +``` + +Then edit `config.ini` with your settings: + +```ini +[DEFAULT] +SECRET_KEY = your_secret_key +API_KEY = your_api_key +# ...rest of the config settings... +``` + +6. Start the Celery worker: +```sh +celery -A app.celery_worker worker --loglevel=info +``` + +7. Run the Flask application: +```sh +flask run +``` + +The application will be available at `http://127.0.0.1:5000/` + +## Adding an Analysis Module + +This guide explains how to add a new analysis module using the provided base classes: `BasePlotlyAnalysis` and `BasePlotAnalysis`. These base classes ensure a structured workflow for data preparation, transformation, and visualization. + +### 1. Choosing the Right Base Class +Before implementing an analysis module, decide on the appropriate base class: +- **`BasePlotlyAnalysis`**: Use this for interactive plots with **Plotly** that generate **HTML** outputs. +- **`BasePlotAnalysis`**: Use this for static plots with **Matplotlib/Seaborn** that generate **PNG** image files. +- **`BaseAnalysis`**: Use this for any other type of analysis with **text** or **HTML** output for max flexibility. + +### 2. Naming Convention +Follow a structured naming convention for consistency: +- **File name:** `plotly_.py` for Plotly analyses, `plot_.py` for Matplotlib-based analyses. +- **Class name:** Use PascalCase and a descriptive suffix: + - Example for Plotly: `PlotlyActivityHeatmap` + - Example for Matplotlib: `PlotUserSessionDuration` + +### 3. Data Structure +The following DataFrame structure is passed to analysis classes: + +| user_id | name | last_action | status | timestamp | prev_timestamp | was_active | hour | +|----------|-----------|----------------------|--------|-----------------------------|----------------|------------|------| +| XXXXXXX | UserA | 2025-02-08 17:58:11 | Okay | 2025-02-08 18:09:41.867984056 | NaT | False | 18 | +| XXXXXXX | UserB | 2025-02-08 17:00:10 | Okay | 2025-02-08 18:09:42.427846909 | NaT | False | 18 | +| XXXXXXX | UserC | 2025-02-08 16:31:52 | Okay | 2025-02-08 18:09:42.823201895 | NaT | False | 18 | +| XXXXXXX | UserD | 2025-02-06 23:57:24 | Okay | 2025-02-08 18:09:43.179914951 | NaT | False | 18 | +| XXXXXXX | UserE | 2025-02-06 06:33:40 | Okay | 2025-02-08 18:09:43.434650898 | NaT | False | 18 | + +Note that the first X rows, depending on the number of the members, will always contain empty values in prev_timestamp as there has to be a previous timestamp .... + +### 4. Implementing an Analysis Module +Each analysis module should define two key methods: +- `transform_data(self, df: pd.DataFrame) -> pd.DataFrame`: Processes the input data for plotting. +- `plot_data(self, df: pd.DataFrame)`: Generates and saves the plot. + +#### Example: Adding a Plotly Heatmap +Below is an example of how to create a new analysis module using `BasePlotlyAnalysis`. + +```python +import pandas as pd +import plotly.graph_objects as go +from .basePlotlyAnalysis import BasePlotlyAnalysis + +class PlotlyActivityHeatmap(BasePlotlyAnalysis): + """ + Displays user activity trends over multiple days using an interactive heatmap. + """ + name = "Activity Heatmap (Interactive)" + description = "Displays user activity trends over multiple days." + plot_filename = "activity_heatmap.html" + + def transform_data(self, df: pd.DataFrame) -> pd.DataFrame: + df['hour'] = df['timestamp'].dt.hour + active_counts = df[df['was_active']].pivot_table( + index='name', + columns='hour', + values='was_active', + aggfunc='sum', + fill_value=0 + ).reset_index() + return active_counts.melt(id_vars='name', var_name='hour', value_name='activity_count') + + def plot_data(self, df: pd.DataFrame): + df = df.pivot(index='name', columns='hour', values='activity_count').fillna(0) + self.fig = go.Figure(data=go.Heatmap( + z=df.values, x=df.columns, y=df.index, colorscale='Viridis', + colorbar=dict(title='Activity Count') + )) + self.fig.update_layout(title='User Activity Heatmap', xaxis_title='Hour', yaxis_title='User') +``` + +#### Example: Adding a Static Matplotlib Plot +Below is an example of a Matplotlib-based analysis module using `BasePlotAnalysis`. + +```python +import pandas as pd +import matplotlib.pyplot as plt +from .basePlotAnalysis import BasePlotAnalysis + +class PlotUserSessionDuration(BasePlotAnalysis): + """ + Displays a histogram of user session durations. + """ + name = "User Session Duration Histogram" + description = "Histogram of session durations." + plot_filename = "session_duration.png" + + def transform_data(self, df: pd.DataFrame) -> pd.DataFrame: + df['session_duration'] = (df['last_action'] - df['timestamp']).dt.total_seconds() + return df + + def plot_data(self, df: pd.DataFrame): + plt.figure(figsize=(10, 6)) + plt.hist(df['session_duration'].dropna(), bins=30, edgecolor='black') + plt.xlabel('Session Duration (seconds)') + plt.ylabel('Frequency') + plt.title('User Session Duration Histogram') +``` + +## License + +All assets and code are under the [CC BY-SA 4.0](http://creativecommons.org/licenses/by-sa/4.0/) LICENSE and in the public domain unless specified otherwise. \ No newline at end of file diff --git a/app/__init__.py b/app/__init__.py new file mode 100644 index 0000000..9356270 --- /dev/null +++ b/app/__init__.py @@ -0,0 +1,56 @@ +import os +from flask import Flask +from flask_bootstrap import Bootstrap5 +from datetime import datetime + +from app.views import register_views +from app.api import register_api +from app.config import load_config +from app.filters import register_filters +from app.tasks import celery + +from app.logging_config import init_logger + +def create_app(config=None): + app = Flask(__name__) + + if config is None: + config = load_config() + app.config.update(config) + + os.environ['TZ'] = 'UTC' + + app.config['SECRET_KEY'] = config['DEFAULT']['SECRET_KEY'] + + # Move bootstrap settings to root level + for key, value in config.get('BOOTSTRAP', {}).items(): + app.config[key.upper()] = value + + # Initialize Celery + celery.conf.update(app.config) + + bootstrap = Bootstrap5(app) + + # Store the entire config in Flask app + app.config.update(config) + + # Initialize other settings + app.config['SCRAPING_ACTIVE'] = False + app.config['SCRAPING_THREAD'] = None + app.config['DATA_FILE_NAME'] = None + app.config['LOG_FILE_NAME'] = "log/" + datetime.now().strftime('%Y-%m-%d-%H-%M') + '.log' + + # Initialize logging + app.logger = init_logger(app.config) + + # Register routes + register_views(app) + register_api(app) + register_filters(app) + + @app.context_processor + def inject_main_config(): + main_config = app.config.get('MAIN', {}) + return dict(main_config=main_config) + + return app \ No newline at end of file diff --git a/app/analysis/__init__.py b/app/analysis/__init__.py new file mode 100644 index 0000000..5853f86 --- /dev/null +++ b/app/analysis/__init__.py @@ -0,0 +1,34 @@ +import os +import pkgutil +import importlib +import inspect +from abc import ABC + +from .base import BaseAnalysis + +import pandas as pd + +def load_analysis_modules(): + analysis_modules = [] + package_path = __path__[0] + + for _, module_name, _ in pkgutil.iter_modules([package_path]): + module = importlib.import_module(f"app.analysis.{module_name}") + + for _, obj in inspect.getmembers(module, inspect.isclass): + # Exclude abstract classes (like BasePlotAnalysis) + if issubclass(obj, BaseAnalysis) and obj is not BaseAnalysis and not inspect.isabstract(obj): + analysis_modules.append(obj()) # Instantiate only concrete classes + + return analysis_modules + +def load_data(file_path: str) -> pd.DataFrame: + """Loads the scraped data from a CSV file into a Pandas DataFrame.""" + if not os.path.exists(file_path): + raise FileNotFoundError(f"File {file_path} not found.") + + df = pd.read_csv(file_path) + df["timestamp"] = pd.to_datetime(df["timestamp"], errors="coerce") + df["last_action"] = pd.to_datetime(df["last_action"], errors="coerce") + + return df \ No newline at end of file diff --git a/app/analysis/base.py b/app/analysis/base.py new file mode 100644 index 0000000..3730b19 --- /dev/null +++ b/app/analysis/base.py @@ -0,0 +1,11 @@ +from abc import ABC, abstractmethod +import pandas as pd + +class BaseAnalysis(ABC): + name = "Base Analysis" + description = "This is a base analysis module." + + @abstractmethod + def execute(self, df: pd.DataFrame): + """Run analysis on the given DataFrame""" + pass diff --git a/app/analysis/basePlotAnalysis.py b/app/analysis/basePlotAnalysis.py new file mode 100644 index 0000000..262bc5f --- /dev/null +++ b/app/analysis/basePlotAnalysis.py @@ -0,0 +1,77 @@ +import os +import pandas as pd +import matplotlib.pyplot as plt +import seaborn as sns +from abc import ABC, abstractmethod + +from .base import BaseAnalysis +from app.analysis.data_utils import prepare_data, mk_plotdir + +import matplotlib +matplotlib.use('Agg') + +# ------------------------------------------- +# Base Class for All Plot Analyses +# ------------------------------------------- +class BasePlotAnalysis(BaseAnalysis, ABC): + """ + Base class for all plot-based analyses. + It enforces a structure for: + - Data preparation + - Transformation + - Plot generation + - Memory cleanup + + Attributes: + plot_filename (str): The filename for the output plot. + alt_text (str): The alt text for the plot. + """ + plot_filename = "default_plot.png" + alt_text = "Default Alt Text" + + def execute(self, df: pd.DataFrame): + """ + Executes the full analysis pipeline. + + Parameters: + df (pd.DataFrame): The input DataFrame containing user activity data. + + Returns: + str: HTML img tag containing the URL to the generated plot. + """ + df = prepare_data(df) # Step 1: Prepare data + + paths = mk_plotdir(self.plot_filename) + self.output_path, self.plot_url = paths['output_path'], paths['plot_url'] + + df = self.transform_data(df) # Step 2: Transform data (implemented by subclass) + self.plot_data(df) # Step 3: Create the plot + + plt.savefig(self.output_path, bbox_inches="tight") + plt.close() + + del df # Step 4: Free memory + return f'{self.note}' + + @abstractmethod + def transform_data(self, df: pd.DataFrame) -> pd.DataFrame: + """ + Subclasses must define how they transform the data. + + Parameters: + df (pd.DataFrame): The input DataFrame containing user activity data. + + Returns: + pd.DataFrame: The transformed DataFrame. + """ + pass + + @abstractmethod + def plot_data(self, df: pd.DataFrame): + """ + Subclasses must define how they generate the plot. + + Parameters: + df (pd.DataFrame): The transformed DataFrame containing data to be plotted. + """ + pass diff --git a/app/analysis/basePlotlyAnalysis.py b/app/analysis/basePlotlyAnalysis.py new file mode 100644 index 0000000..bc04660 --- /dev/null +++ b/app/analysis/basePlotlyAnalysis.py @@ -0,0 +1,73 @@ +import os +import pandas as pd +import plotly.graph_objects as go +from abc import ABC, abstractmethod + +from .base import BaseAnalysis +from app.analysis.data_utils import prepare_data, mk_plotdir + +# ------------------------------------------- +# Base Class for All Plotly Plot Analyses +# ------------------------------------------- +class BasePlotlyAnalysis(BaseAnalysis, ABC): + """ + Base class for all Plotly plot-based analyses. + It enforces a structure for: + - Data preparation + - Transformation + - Plot generation + - Memory cleanup + + Attributes: + plot_filename (str): The filename for the output plot. + alt_text (str): The alt text for the plot. + """ + plot_filename = "default_plot.html" + alt_text = "Default Alt Text" + + def execute(self, df: pd.DataFrame): + """ + Executes the full analysis pipeline. + + Parameters: + df (pd.DataFrame): The input DataFrame containing user activity data. + + Returns: + str: HTML iframe containing the URL to the generated plot. + """ + df = prepare_data(df) # Step 1: Prepare data + + paths = mk_plotdir(self.plot_filename) + self.output_path, self.plot_url = paths['output_path'], paths['plot_url'] + + df = self.transform_data(df) # Step 2: Transform data (implemented by subclass) + self.plot_data(df) # Step 3: Create the plot + + # Save the plot as an HTML file + self.fig.write_html(self.output_path) + + del df # Step 4: Free memory + return f'' + + @abstractmethod + def transform_data(self, df: pd.DataFrame) -> pd.DataFrame: + """ + Subclasses must define how they transform the data. + + Parameters: + df (pd.DataFrame): The input DataFrame containing user activity data. + + Returns: + pd.DataFrame: The transformed DataFrame. + """ + pass + + @abstractmethod + def plot_data(self, df: pd.DataFrame): + """ + Subclasses must define how they generate the plot. + + Parameters: + df (pd.DataFrame): The transformed DataFrame containing data to be plotted. + """ + pass \ No newline at end of file diff --git a/app/analysis/data_utils.py b/app/analysis/data_utils.py new file mode 100644 index 0000000..c48dfc1 --- /dev/null +++ b/app/analysis/data_utils.py @@ -0,0 +1,45 @@ +from flask import current_app, url_for +import os +import pandas as pd + +def prepare_data(df): + """ + Prepares the data for analysis by converting timestamps, calculating previous timestamps, + determining active status, and extracting the hour from the timestamp. + + Parameters: + df (pd.DataFrame): The input DataFrame containing user activity data. + + Returns: + pd.DataFrame: The processed DataFrame with additional columns for analysis. + + The returned DataFrame will have the following columns: + user_id name last_action status timestamp prev_timestamp was_active hour + 0 12345678 UserName 2025-02-08 17:58:11 Okay 2025-02-08 18:09:41.867984056 NaT False 18 + """ + df["timestamp"] = pd.to_datetime(df["timestamp"]) + df["last_action"] = pd.to_datetime(df["last_action"]) + df["prev_timestamp"] = df.groupby("user_id")["timestamp"].shift(1) + df["was_active"] = (df["timestamp"] - df["last_action"]) <= pd.Timedelta(seconds=60) + df["was_active"] = df["was_active"].fillna(False) + df['hour'] = df['timestamp'].dt.hour + return df + +def mk_plotdir(output_filename): + """ + Creates the directory for storing plots and generates the output path and URL for the plot. + + Parameters: + output_filename (str): The filename for the output plot. + + Returns: + dict: A dictionary containing the output path and plot URL. + """ + plots_dir = os.path.join(current_app.root_path, "static", "plots") + os.makedirs(plots_dir, exist_ok=True) + + output_path = os.path.join(plots_dir, output_filename) + + plot_url = url_for('static', filename=f'plots/{output_filename}', _external=True) + + return {'output_path': output_path, 'plot_url': plot_url} \ No newline at end of file diff --git a/app/analysis/plot_bar_activity-user.py b/app/analysis/plot_bar_activity-user.py new file mode 100644 index 0000000..2f78ca7 --- /dev/null +++ b/app/analysis/plot_bar_activity-user.py @@ -0,0 +1,51 @@ +import pandas as pd +import matplotlib.pyplot as plt +import seaborn as sns +from .basePlotAnalysis import BasePlotAnalysis +from flask import current_app, url_for + +import matplotlib +matplotlib.use('Agg') + +class PlotTopActiveUsers(BasePlotAnalysis): + """ + Class for analyzing the most active users and generating a bar chart. + + Attributes: + name (str): The name of the analysis. + description (str): A brief description of the analysis. + plot_filename (str): The filename for the output plot. + note (str): Additional notes for the analysis. + """ + name = "Top Active Users" + description = "Displays the most active users based on their number of recorded actions." + plot_filename = "bar_activity-per-user.png" + note = "" + + def transform_data(self, df: pd.DataFrame) -> pd.DataFrame: + """ + Transform data for the bar plot. + + Parameters: + df (pd.DataFrame): The input DataFrame containing user activity data. + + Returns: + pd.DataFrame: The transformed DataFrame with active counts per user. + """ + df = df[df['was_active'] == True].groupby('name').size().reset_index(name='active_count') + return df + + def plot_data(self, df: pd.DataFrame): + """ + Generate bar plot. + + Parameters: + df (pd.DataFrame): The transformed DataFrame containing active counts per user. + """ + # create a barplot from active counts sorted by active count + plt.figure(figsize=(10, 6)) + sns.barplot(x='active_count', y='name', data=df.sort_values('active_count', ascending=False)) + plt.xticks(rotation=90) + plt.title('Minutes Active') + plt.xlabel('Player') + plt.ylabel('Active Count') \ No newline at end of file diff --git a/app/analysis/plot_bar_peak_hours.py b/app/analysis/plot_bar_peak_hours.py new file mode 100644 index 0000000..a090b56 --- /dev/null +++ b/app/analysis/plot_bar_peak_hours.py @@ -0,0 +1,53 @@ +import os +import pandas as pd +import matplotlib.pyplot as plt +import seaborn as sns +from .basePlotAnalysis import BasePlotAnalysis + +import matplotlib +matplotlib.use('Agg') + +class PlotPeakHours(BasePlotAnalysis): + """ + Class for analyzing peak activity hours and generating a bar chart. + + Attributes: + name (str): The name of the analysis. + description (str): A brief description of the analysis. + plot_filename (str): The filename for the output plot. + note (str): Additional notes for the analysis. + """ + + name = "Peak Hours Analysis" + description = "Identifies peak activity hours using a bar chart." + plot_filename = "peak_hours.png" + note = "" + + def transform_data(self, df: pd.DataFrame) -> pd.DataFrame: + """ + Transform data to add was_active column and extract peak hours. See data_utils.py. + + Parameters: + df (pd.DataFrame): The input DataFrame containing user activity data. + + Returns: + pd.DataFrame: The transformed DataFrame with additional columns for analysis. + """ + return df + + def plot_data(self, df: pd.DataFrame): + """ + Generate bar chart for peak hours. + + Parameters: + df (pd.DataFrame): The transformed DataFrame containing user activity data. + """ + peak_hours = df[df["was_active"]]["hour"].value_counts().sort_index() + + plt.figure(figsize=(12, 5)) + sns.barplot(x=peak_hours.index, y=peak_hours.values, hue=peak_hours.values, palette="coolwarm") + + plt.xlabel("Hour of the Day") + plt.ylabel("Activity Count") + plt.title("Peak Hours of User Activity") + plt.xticks(range(0, 24)) diff --git a/app/analysis/plot_heat_user-activity-hour.py b/app/analysis/plot_heat_user-activity-hour.py new file mode 100644 index 0000000..05de909 --- /dev/null +++ b/app/analysis/plot_heat_user-activity-hour.py @@ -0,0 +1,55 @@ +import pandas as pd +import matplotlib.pyplot as plt +import seaborn as sns +from .basePlotAnalysis import BasePlotAnalysis + +import matplotlib +matplotlib.use('Agg') + +class PlotActivityHeatmap(BasePlotAnalysis): + """ + Class for analyzing user activity trends over multiple days and generating a heatmap. + + Attributes: + name (str): The name of the analysis. + description (str): A brief description of the analysis. + plot_filename (str): The filename for the output plot. + note (str): Additional notes for the analysis. + """ + name = "Activity Heatmap" + description = "Displays user activity trends over multiple days using a heatmap. Generates a downloadable PNG image." + plot_filename = "activity_heatmap.png" + note = "" + + def transform_data(self, df: pd.DataFrame) -> pd.DataFrame: + """ + Transform data for the heatmap. + + Parameters: + df (pd.DataFrame): The input DataFrame containing user activity data. + + Returns: + pd.DataFrame: The transformed DataFrame with activity counts by hour. + """ + active_counts = df[df['was_active']].pivot_table( + index='name', + columns='hour', + values='was_active', + aggfunc='sum', + fill_value=0 + ) + active_counts['total_active_minutes'] = active_counts.sum(axis=1) + return active_counts.sort_values(by='total_active_minutes', ascending=False) + + def plot_data(self, df: pd.DataFrame): + """ + Generate heatmap plot. + + Parameters: + df (pd.DataFrame): The transformed DataFrame containing activity counts by hour. + """ + plt.figure(figsize=(12, 8)) + sns.heatmap(df.loc[:, df.columns != 'total_active_minutes'], cmap='viridis', cbar_kws={'label': 'Count of was_active == True'}) + plt.xlabel('Hour of Day') + plt.ylabel('User ID') + plt.title('User Activity Heatmap') diff --git a/app/analysis/plot_line_activity-user.py b/app/analysis/plot_line_activity-user.py new file mode 100644 index 0000000..0396d46 --- /dev/null +++ b/app/analysis/plot_line_activity-user.py @@ -0,0 +1,67 @@ +import pandas as pd +import matplotlib.pyplot as plt +import seaborn as sns +from .basePlotAnalysis import BasePlotAnalysis +from flask import current_app, url_for + +import matplotlib +matplotlib.use('Agg') + +class PlotLineActivityAllUsers(BasePlotAnalysis): + """ + Class for analyzing user activity trends over multiple days and generating a line graph. + + Attributes: + name (str): The name of the analysis. + description (str): A brief description of the analysis. + plot_filename (str): The filename for the output plot. + note (str): Additional notes for the analysis. + """ + name = "Activity Line Graph (All Users)" + description = "This analysis shows the activity line graph for all users. Gneerates a downloadable PNG image." + plot_filename = "line_activity-all_users.png" + note = "" + + def transform_data(self, df: pd.DataFrame) -> pd.DataFrame: + """ + Transform data for the line plot. + + Parameters: + df (pd.DataFrame): The input DataFrame containing user activity data. + + Returns: + pd.DataFrame: The transformed DataFrame with activity counts by hour. + """ + df['hour'] = df['timestamp'].dt.hour + df = df[df['was_active'] == True].pivot_table(index='name', columns='hour', values='was_active', aggfunc='sum', fill_value=0) + df['total_active_minutes'] = df.sum(axis=1) + df = df.sort_values(by='total_active_minutes', ascending=False).drop('total_active_minutes', axis=1) + + cumulative_sum_row = df.cumsum().iloc[-1] + df.loc['Cumulative Sum'] = cumulative_sum_row + + return df + + def plot_data(self, df: pd.DataFrame): + """ + Generate line graph for user activity throughout the day. + + Parameters: + df (pd.DataFrame): The transformed DataFrame containing activity counts by hour. + """ + plt.figure(figsize=(12, 6)) + + # Plot each user's activity + for index, row in df.iterrows(): + if index == 'Cumulative Sum': + plt.plot(row.index, row.values, label=index, linewidth=3, color='black') # Bold line for cumulative sum + else: + plt.plot(row.index, row.values, label=index) + + # Add labels and title + plt.xlabel('Hour of Day') + plt.ylabel('Activity Count') + plt.title('User Activity Throughout the Day') + plt.legend(loc='upper left', bbox_to_anchor=(1, 1)) + + plt.grid(True) diff --git a/app/analysis/plotly_heat_user-activity.py b/app/analysis/plotly_heat_user-activity.py new file mode 100644 index 0000000..51462aa --- /dev/null +++ b/app/analysis/plotly_heat_user-activity.py @@ -0,0 +1,82 @@ +import pandas as pd +import plotly.express as px +import plotly.graph_objects as go + +from .basePlotlyAnalysis import BasePlotlyAnalysis +from flask import current_app, url_for + +class PlotlyActivityHeatmap(BasePlotlyAnalysis): + """ + Class for analyzing user activity trends over multiple days and generating an interactive heatmap. + + Attributes: + name (str): The name of the analysis. + description (str): A brief description of the analysis. + plot_filename (str): The filename for the output plot. + note (str): Additional notes for the analysis. + """ + name = "Activity Heatmap (Interactive)" + description = "Displays user activity trends over multiple days using an interactive heatmap." + plot_filename = "activity_heatmap.html" + note = "" + + def transform_data(self, df: pd.DataFrame) -> pd.DataFrame: + """ + Transform data for the heatmap. + + Parameters: + df (pd.DataFrame): The input DataFrame containing user activity data. + + Returns: + pd.DataFrame: The transformed DataFrame with activity counts by hour. + """ + df['hour'] = df['timestamp'].dt.hour + active_counts = df[df['was_active']].pivot_table( + index='name', + columns='hour', + values='was_active', + aggfunc='sum', + fill_value=0 + ).reset_index() + + # Ensure all hours are represented + all_hours = pd.DataFrame({'hour': range(24)}) + active_counts = active_counts.melt(id_vars='name', var_name='hour', value_name='activity_count') + active_counts = active_counts.merge(all_hours, on='hour', how='right').fillna(0) + active_counts['hour'] = active_counts['hour'].astype(int) # Ensure hour is treated as numeric + return active_counts + + def plot_data(self, df: pd.DataFrame): + """ + Generate heatmap plot. + + Parameters: + df (pd.DataFrame): The transformed DataFrame containing activity counts by hour. + """ + df = df.pivot(index='name', columns='hour', values='activity_count').fillna(0) + + # Create a Plotly heatmap + self.fig = go.Figure(data=go.Heatmap( + z=df.values, + x=df.columns, + y=df.index, + colorscale='Viridis', + colorbar=dict(title='Count of was_active == True') + )) + + # Update layout + self.fig.update_layout( + title='User Activity Heatmap', + xaxis_title='Hour of Day', + yaxis_title='User ID', + xaxis=dict(tickmode='linear', dtick=1, range=[0, 23]), # Ensure x-axis covers all hours + template='plotly_white' + ) + + self.fig.update_traces( + hovertemplate="
".join([ + "Hour: %{x}", + "Name: %{y}", + "Activity: %{z}", + ]) +) diff --git a/app/analysis/plotly_line_activity-user.py b/app/analysis/plotly_line_activity-user.py new file mode 100644 index 0000000..0e49e5d --- /dev/null +++ b/app/analysis/plotly_line_activity-user.py @@ -0,0 +1,65 @@ +import pandas as pd +import plotly.graph_objects as go +from plotly.subplots import make_subplots +from .basePlotlyAnalysis import BasePlotlyAnalysis +from flask import current_app, url_for + +class PlotlyLineActivityAllUsers(BasePlotlyAnalysis): + """ + Class for analyzing user activity trends over multiple days and generating an interactive line graph. + + Attributes: + name (str): The name of the analysis. + description (str): A brief description of the analysis. + plot_filename (str): The filename for the output plot. + note (str): Additional notes for the analysis. + """ + name = "Activity Line Graph (All Users, Interactive)" + description = "This analysis shows the activity line graph for all users. The graph is interactive and can be used to explore the data." + plot_filename = "line_activity-all_users.html" + note = "" + + def transform_data(self, df: pd.DataFrame) -> pd.DataFrame: + """ + Transform data for the line plot. + + Parameters: + df (pd.DataFrame): The input DataFrame containing user activity data. + + Returns: + pd.DataFrame: The transformed DataFrame with activity counts by hour. + """ + df['hour'] = df['timestamp'].dt.hour + df = df[df['was_active'] == True].pivot_table(index='name', columns='hour', values='was_active', aggfunc='sum', fill_value=0) + df['total_active_minutes'] = df.sum(axis=1) + df = df.sort_values(by='total_active_minutes', ascending=False).drop('total_active_minutes', axis=1) + + cumulative_sum_row = df.cumsum().iloc[-1] + df.loc['Cumulative Sum'] = cumulative_sum_row + + return df + + def plot_data(self, df: pd.DataFrame): + """ + Generate interactive line graph for user activity throughout the day. + + Parameters: + df (pd.DataFrame): The transformed DataFrame containing activity counts by hour. + """ + self.fig = make_subplots() + + # Plot each user's activity + for index, row in df.iterrows(): + if index == 'Cumulative Sum': + self.fig.add_trace(go.Scatter(x=row.index, y=row.values, mode='lines', name=index, line=dict(width=3, color='black'))) # Bold line for cumulative sum + else: + self.fig.add_trace(go.Scatter(x=row.index, y=row.values, mode='lines', name=index)) + + self.fig.update_layout( + title='User Activity Throughout the Day', + xaxis_title='Hour of Day', + yaxis_title='Activity Count', + legend_title='User', + legend=dict(x=1, y=1), + template='plotly_white' + ) \ No newline at end of file diff --git a/app/analysis/table_statistics.py b/app/analysis/table_statistics.py new file mode 100644 index 0000000..49769bf --- /dev/null +++ b/app/analysis/table_statistics.py @@ -0,0 +1,31 @@ +import pandas as pd +from .base import BaseAnalysis +from flask import render_template_string + +class GenerateStatistics(BaseAnalysis): + name = "Test Statistics (Placeholder)" + description = "Generates activity statistics grouped by hour." + + def execute(self, df: pd.DataFrame): + df["hour"] = df["timestamp"].dt.hour + statistics = df.groupby("hour").size().reset_index(name="count") + + # Convert statistics DataFrame to HTML + table_html = statistics.to_html(classes="table table-bordered table-striped") + + # Wrap it in Bootstrap styling + html_content = render_template_string( + """ +
+
+

Activity Statistics

+
+
+ {{ table_html | safe }} +
+
+ """, + table_html=table_html + ) + + return html_content diff --git a/app/api.py b/app/api.py new file mode 100644 index 0000000..a5177c4 --- /dev/null +++ b/app/api.py @@ -0,0 +1,230 @@ +from flask import jsonify, request, Response, send_from_directory, current_app +import threading +import os +import glob +from datetime import datetime +import pandas as pd + +from app.models import Scraper +from app.util import create_zip, delete_old_zips, tail +from app.config import load_config +from app.forms import ScrapingForm +from app.tasks import start_scraping_task, stop_scraping_task, get_redis + +scraping_thread = None +scraper = None +scrape_lock = threading.Lock() + +def register_api(app): + @app.route('/start_scraping', methods=['POST']) + def start_scraping(): + form = ScrapingForm() + if form.validate_on_submit(): + redis_client = get_redis() + faction_id = form.faction_id.data + + # Check if scraping is already active + if redis_client.hget(f"scraper:{faction_id}", "scraping_active") == "1": + return jsonify({"status": "Scraping already in progress"}) + + # Convert config to a serializable dict with only needed values + config_dict = { + 'DATA': {'DATA_DIR': current_app.config['DATA']['DATA_DIR']}, + 'DEFAULT': {'API_KEY': current_app.config['DEFAULT']['API_KEY']} + } + + start_scraping_task.delay( + faction_id, + int(form.fetch_interval.data), # Ensure this is an int + int(form.run_interval.data), # Ensure this is an int + config_dict + ) + return jsonify({"status": "Scraping started"}) + return jsonify({"status": "Invalid form data"}) + + @app.route('/stop_scraping', methods=['POST']) + def stop_scraping(): + redis_client = get_redis() + faction_id = redis_client.get("current_faction_id") + if not faction_id: + return jsonify({"status": "No active scraping session"}) + + stop_scraping_task.delay(faction_id) + return jsonify({"status": "Stopping scraping"}) + + @app.route('/logfile', methods=['GET']) + def logfile(): + log_file_name = current_app.logger.handlers[0].baseFilename + + page = int(request.args.get('page', 0)) # Page number + lines_per_page = int(request.args.get('lines_per_page', current_app.config['LOGGING']['VIEW_PAGE_LINES'])) # Lines per page + log_file_path = log_file_name # Path to the current log file + + if not os.path.isfile(log_file_path): + current_app.logger.error("Log file not found") + return jsonify({"error": "Log file not found"}), 404 + + log_lines = list(tail(log_file_path, current_app.config['LOGGING']['VIEW_MAX_LINES'])) + + log_lines = log_lines[::-1] # Reverse the list + + start = page * lines_per_page + end = start + lines_per_page + paginated_lines = log_lines[start:end] if start < len(log_lines) else [] + + return jsonify({ + "log": paginated_lines, + "total_lines": len(log_lines), + "pages": (len(log_lines) + lines_per_page - 1) // lines_per_page, + "start_line": len(log_lines) - start + }) + + + @app.route('/download_files', methods=['POST']) + def download_files(): + delete_old_zips() # Clean up old zip files + + file_paths = request.json.get('file_paths') + if not file_paths: + return jsonify({"error": "No files specified"}), 400 + + # Get the absolute path of the parent directory + parent_dir = os.path.abspath(os.path.join(app.root_path, os.pardir)) + + # Validate and correct file paths + valid_file_paths = [] + for file_path in file_paths: + if file_path.startswith('/data/'): + corrected_path = file_path.lstrip('/') + full_path = os.path.join(parent_dir, corrected_path) + if os.path.isfile(full_path): + valid_file_paths.append(full_path) + elif file_path.startswith('/log/'): + corrected_path = file_path.lstrip('/') + full_path = os.path.join(parent_dir, corrected_path) + if os.path.isfile(full_path): + valid_file_paths.append(full_path) + + if not valid_file_paths: + return jsonify({"error": "No valid files specified"}), 400 + + # Create a unique zip file name + zip_name = f"files_{datetime.now().strftime('%Y%m%d%H%M%S')}.zip" + zip_path = create_zip(valid_file_paths, zip_name, app) + + # Log the directory and file path for debugging + current_app.logger.debug(f"Sending file from directory: temp, file: {zip_name}") + + return download_tmp_file(zip_name) + + @app.route('/delete_files', methods=['POST']) + def delete_files(): + log_file_name = current_app.logger.handlers[0].baseFilename + file_paths = request.json.get('file_paths', []) + + if not file_paths: + return jsonify({"error": "No files specified"}), 400 + + errors = [] + data_dir = os.path.abspath(current_app.config['DATA']['DATA_DIR']) + log_dir = os.path.abspath(current_app.config['LOGGING']['LOG_DIR']) + + for file_path in file_paths: + if file_path.startswith('/data/'): + full_file_path = os.path.join(data_dir, file_path.lstrip('/data/')) + elif file_path.startswith('/log/'): + full_file_path = os.path.join(log_dir, file_path.lstrip('/log/')) + else: + errors.append({"file": file_path, "error": "File not in allowed directory"}) + continue + + # Check if the file is in either the logs or the data files folder + #if not (full_file_path.startswith(data_dir) or full_file_path.startswith(log_dir)): + # errors.append({"file": file_path, "error": "File not in allowed directory"}) + # continue + + # Check if it's the currently active log file + if full_file_path == log_file_name: + errors.append({"file": file_path, "error": "Cannot delete active log file."}) + continue + + # Check if it's an active data file + if scraper and scraper.data_file_name == full_file_path: + errors.append({"file": file_path, "error": "Cannot delete active data file."}) + continue + + if not os.path.isfile(full_file_path): + errors.append({"file": file_path, "error": "File not found"}) + continue + + try: + os.remove(full_file_path) + except Exception as e: + errors.append({"file": file_path, "error": str(e)}) + + if errors: + return jsonify({"errors": errors}), 207 # Multi-Status response + return jsonify({"success": True}), 200 + + @app.route('/data/') + def download_data_file(filename): + data_dir = os.path.abspath(current_app.config['DATA']['DATA_DIR']) + file_path = os.path.join(data_dir, filename) + + return send_from_directory(directory=data_dir, path=filename, as_attachment=True) + + @app.route('/log/') + def download_log_file(filename): + log_dir = os.path.abspath(current_app.config['LOGGING']['LOG_DIR']) + file_path = os.path.join(log_dir, filename) + + return send_from_directory(directory=log_dir, path=filename, as_attachment=True) + + @app.route('/tmp/') + def download_tmp_file(filename): + tmp_dir = os.path.abspath(current_app.config['TEMP']['TEMP_DIR']) + file_path = os.path.join(tmp_dir, filename) + + return send_from_directory(directory=tmp_dir, path=filename, as_attachment=True) + + @app.route('/config/lines_per_page') + def get_lines_per_page(): + lines_per_page = current_app.config['LOGGING']['VIEW_PAGE_LINES'] + return jsonify({"lines_per_page": lines_per_page}) + + @app.route('/scraping_status', methods=['GET']) + def scraping_status(): + redis_client = get_redis() + current_faction_id = redis_client.get("current_faction_id") + + if not current_faction_id: + return jsonify({"scraping_active": False}) + + scraping_active = redis_client.hget(f"scraper:{current_faction_id}", "scraping_active") + + # If we have a faction_id but scraping is not active, clean up the stale state + if not scraping_active or scraping_active == "0": + redis_client.delete("current_faction_id") + return jsonify({"scraping_active": False}) + + return jsonify({ + "scraping_active": True, + "faction_id": current_faction_id + }) + + @app.route('/scraping_get_end_time') + def scraping_get_end_time(): + redis_client = get_redis() + current_faction_id = redis_client.get("current_faction_id") + + if not current_faction_id: + return jsonify({"scraping_active": False}) + + end_time = redis_client.hget(f"scraper:{current_faction_id}", "end_time") + if not end_time: + return jsonify({"scraping_active": False}) + + return jsonify({ + "end_time": end_time, + "faction_id": current_faction_id + }) diff --git a/app/config.py b/app/config.py new file mode 100644 index 0000000..71ef4bd --- /dev/null +++ b/app/config.py @@ -0,0 +1,8 @@ +from configobj import ConfigObj +import os + +def load_config(): + config_path = os.path.join(os.path.dirname(__file__), '..', 'config.ini') + + # Load config while preserving sections as nested dicts + return ConfigObj(config_path) diff --git a/app/filters.py b/app/filters.py new file mode 100644 index 0000000..60ded1f --- /dev/null +++ b/app/filters.py @@ -0,0 +1,15 @@ +from flask import Blueprint, request, jsonify +from datetime import datetime + +def register_filters(app): + @app.template_filter('datetimeformat') + def datetimeformat(value): + """Convert datetime or timestamp to formatted string""" + if isinstance(value, datetime): + dt = value + else: + try: + dt = datetime.fromtimestamp(float(value)) + except (ValueError, TypeError): + return str(value) + return dt.strftime('%Y-%m-%d %H:%M:%S') \ No newline at end of file diff --git a/app/forms.py b/app/forms.py new file mode 100644 index 0000000..b4c5039 --- /dev/null +++ b/app/forms.py @@ -0,0 +1,9 @@ +from flask_wtf import FlaskForm +from wtforms import StringField, IntegerField, SubmitField +from wtforms.validators import DataRequired + +class ScrapingForm(FlaskForm): + faction_id = StringField('Faction ID', validators=[DataRequired()], default='9686') + fetch_interval = IntegerField('Fetch Interval (seconds)', validators=[DataRequired()], default=60) + run_interval = IntegerField('Run Interval (days)', validators=[DataRequired()], default=1) + submit = SubmitField('Start') \ No newline at end of file diff --git a/app/logging_config.py b/app/logging_config.py new file mode 100644 index 0000000..3f9f0b1 --- /dev/null +++ b/app/logging_config.py @@ -0,0 +1,34 @@ +import logging +from logging.handlers import QueueHandler +from queue import Queue +import os +from datetime import datetime + +from flask import current_app + +def init_logger(config): + LOG_DIR = config.get('LOGGING', {}).get('LOG_DIR', 'log') + + if not os.path.exists(LOG_DIR): + os.makedirs(LOG_DIR) + + log_file_name = os.path.join(LOG_DIR, datetime.now().strftime('%Y-%m-%d-%H-%M') + '.log') + + logger = logging.getLogger(__name__) + logger.setLevel(logging.DEBUG) + + file_handler = logging.FileHandler(log_file_name, mode='w') + file_handler.setLevel(logging.DEBUG) + formatter = logging.Formatter('%(asctime)s - %(levelname)s: %(message)s', + datefmt='%m/%d/%Y %I:%M:%S %p') + file_handler.setFormatter(formatter) + logger.addHandler(file_handler) + + log_queue = Queue() + queue_handler = QueueHandler(log_queue) + queue_handler.setLevel(logging.DEBUG) + logger.addHandler(queue_handler) + + logger.debug("Logger initialized") + + return logger \ No newline at end of file diff --git a/app/models.py b/app/models.py new file mode 100644 index 0000000..4c2647a --- /dev/null +++ b/app/models.py @@ -0,0 +1,194 @@ +from typing import List, Dict, Optional +import requests +import pandas as pd +import os +import time +from datetime import datetime, timedelta +from requests.exceptions import ConnectionError, Timeout, RequestException +import redis +import threading + +from flask import current_app + +class Scraper: + _instances = {} # Track all instances by faction_id + _lock = threading.Lock() + + def __new__(cls, faction_id, *args, **kwargs): + with cls._lock: + # Stop any existing instance for this faction + if faction_id in cls._instances: + old_instance = cls._instances[faction_id] + old_instance.stop_scraping() + + instance = super().__new__(cls) + cls._instances[faction_id] = instance + return instance + + def __init__(self, faction_id, fetch_interval, run_interval, config): + # Only initialize if not already initialized + if not hasattr(self, 'faction_id'): + self.redis_client = redis.StrictRedis( + host='localhost', port=6379, db=0, decode_responses=True + ) + self.faction_id = faction_id + self.fetch_interval = fetch_interval + self.run_interval = run_interval + self.API_KEY = config['DEFAULT']['API_KEY'] + self.data_file_name = os.path.join( + config['DATA']['DATA_DIR'], + f"{faction_id}-{datetime.now().strftime('%Y-%m-%d-%H-%M')}.csv" + ) + self.end_time = datetime.now() + timedelta(days=int(run_interval)) + + # Store scraper state in Redis + self.redis_client.hmset(f"scraper:{faction_id}", { + "faction_id": faction_id, + "fetch_interval": fetch_interval, + "run_interval": run_interval, + "end_time": self.end_time.isoformat(), + "data_file_name": self.data_file_name, + "scraping_active": "0", + "api_key": self.API_KEY + }) + + @property + def scraping_active(self): + return bool(int(self.redis_client.hget(f"scraper:{self.faction_id}", "scraping_active"))) + + @scraping_active.setter + def scraping_active(self, value): + self.redis_client.hset(f"scraper:{self.faction_id}", "scraping_active", "1" if value else "0") + + def fetch_faction_data(self): + url = f"https://api.torn.com/faction/{self.faction_id}?selections=&key={self.API_KEY}" + response = requests.get(url) + if response.status_code == 200: + return response.json() + current_app.logger.warning(f"Failed to fetch faction data for faction ID {self.faction_id}. Response: {response.text}") + return None + + def fetch_user_activity(self, user_id): + url = f"https://api.torn.com/user/{user_id}?selections=basic,profile&key={self.API_KEY}" + retries = 3 + for attempt in range(retries): + try: + response = requests.get(url, timeout=10) + response.raise_for_status() + return response.json() + except ConnectionError as e: + current_app.logger.error(f"Connection error while fetching user activity for user ID {user_id}: {e}") + except Timeout as e: + current_app.logger.error(f"Timeout error while fetching user activity for user ID {user_id}: {e}") + except RequestException as e: + current_app.logger.error(f"Error while fetching user activity for user ID {user_id}: {e}") + if attempt < retries - 1: + current_app.logger.debug(f"Retrying {attempt + 1}/{retries} for user {user_id}") + time.sleep(2 ** attempt) # Exponential backoff + return None + + def start_scraping(self) -> None: + """Starts the scraping process until the end time is reached or stopped manually.""" + self.scraping_active = True + + current_app.logger.info(f"Starting scraping for faction ID {self.faction_id}") + current_app.logger.debug(f"Fetch interval: {self.fetch_interval}s, Run interval: {self.run_interval} days, End time: {self.end_time}") + + MAX_FAILURES = 5 + failure_count = 0 + + while datetime.now() < self.end_time and self.scraping_active: + current_app.logger.info(f"Fetching data at {datetime.now()}") + faction_data = self.fetch_faction_data() + + if not faction_data or "members" not in faction_data: + current_app.logger.warning(f"No faction data found for ID {self.faction_id} (Failure {failure_count + 1}/{MAX_FAILURES})") + failure_count += 1 + if failure_count >= MAX_FAILURES: + current_app.logger.error(f"Max failures reached ({MAX_FAILURES}). Stopping scraping.") + break + time.sleep(self.fetch_interval) + continue + + current_app.logger.info(f"Fetched {len(faction_data['members'])} members for faction {self.faction_id}") + failure_count = 0 # Reset failure count on success + user_activity_data = self.process_faction_members(faction_data["members"]) + self.save_data(user_activity_data) + + current_app.logger.info(f"Data appended to {self.data_file_name}") + time.sleep(self.fetch_interval) + + self.handle_scraping_end() + + + def process_faction_members(self, members: Dict[str, Dict]) -> List[Dict]: + """Processes and retrieves user activity for all faction members.""" + user_activity_data = [] + for user_id in members.keys(): + user_activity = self.fetch_user_activity(user_id) + if user_activity: + user_activity_data.append({ + "user_id": user_id, + "name": user_activity.get("name", ""), + "last_action": user_activity.get("last_action", {}).get("timestamp", 0), + "status": user_activity.get("status", {}).get("state", ""), + "timestamp": datetime.now().timestamp(), + }) + current_app.logger.info(f"Fetched data for user {user_id} ({user_activity.get('name', '')})") + else: + current_app.logger.warning(f"Failed to fetch data for user {user_id}") + + return user_activity_data + + def save_data(self, user_activity_data: List[Dict]) -> None: + """Saves user activity data to a CSV file.""" + if not user_activity_data: + current_app.logger.warning("No data to save.") + return + + df = pd.DataFrame(user_activity_data) + df["last_action"] = pd.to_datetime(df["last_action"], unit="s") + df["timestamp"] = pd.to_datetime(df["timestamp"], unit="s") + + file_exists = os.path.isfile(self.data_file_name) + + try: + with open(self.data_file_name, "a" if file_exists else "w") as f: + df.to_csv(f, mode="a" if file_exists else "w", header=not file_exists, index=False) + current_app.logger.info(f"Data successfully saved to {self.data_file_name}") + except Exception as e: + current_app.logger.error(f"Error saving data to {self.data_file_name}: {e}") + + def cleanup_redis_state(self): + """Clean up all Redis state for this scraper instance""" + if hasattr(self, 'faction_id'): + self.redis_client.delete(f"scraper:{self.faction_id}") + current_id = self.redis_client.get("current_faction_id") + if current_id and current_id == str(self.faction_id): + self.redis_client.delete("current_faction_id") + # Remove from instances tracking + with self._lock: + if self.faction_id in self._instances: + del self._instances[self.faction_id] + + def handle_scraping_end(self) -> None: + """Handles cleanup and logging when scraping ends.""" + if not self.scraping_active: + current_app.logger.warning(f"Scraping stopped manually at {datetime.now()}") + elif datetime.now() >= self.end_time: + current_app.logger.warning(f"Scraping stopped due to timeout at {datetime.now()} (Run interval: {self.run_interval} days)") + else: + current_app.logger.error(f"Unexpected stop at {datetime.now()}") + + current_app.logger.info("Scraping completed.") + self.scraping_active = False + self.cleanup_redis_state() + + def stop_scraping(self): + self.scraping_active = False + self.cleanup_redis_state() + current_app.logger.debug(f"Scraping stopped for faction {self.faction_id}") + + def __del__(self): + """Ensure Redis cleanup on object destruction""" + self.cleanup_redis_state() \ No newline at end of file diff --git a/app/static/color_mode.js b/app/static/color_mode.js new file mode 100644 index 0000000..35f9945 --- /dev/null +++ b/app/static/color_mode.js @@ -0,0 +1,26 @@ + +document.addEventListener('DOMContentLoaded', () => { + const themeToggle = document.getElementById('bd-theme'); + + // Check if a theme preference is saved in localStorage + const savedTheme = localStorage.getItem('theme'); + + if (savedTheme === 'dark') { + themeToggle.checked = true; + document.documentElement.setAttribute('data-bs-theme', 'dark'); + } else { + themeToggle.checked = false; + document.documentElement.setAttribute('data-bs-theme', 'light'); + } + + // Add event listener to toggle theme on checkbox change + themeToggle.addEventListener('change', () => { + if (themeToggle.checked) { + document.documentElement.setAttribute('data-bs-theme', 'dark'); + localStorage.setItem('theme', 'dark'); + } else { + document.documentElement.setAttribute('data-bs-theme', 'light'); + localStorage.setItem('theme', 'light'); + } + }); +}); \ No newline at end of file diff --git a/app/static/common.js b/app/static/common.js new file mode 100644 index 0000000..92cc934 --- /dev/null +++ b/app/static/common.js @@ -0,0 +1,38 @@ +import { ScraperUtils } from './scraper_utils.js'; + +class Common { + constructor() { + this.utils = new ScraperUtils(); + this.addEventListeners(); + this.scheduleUpdates(); + } + + scheduleUpdates() { + // Ensure server time updates every minute but only after initial fetch + setTimeout(() => { + setInterval(() => this.utils.updateServerTime(), 60000); + }, 5000); // Delay first scheduled update to prevent duplicate initial request + } + + addEventListeners() { + if (this.utils.stopButton) { + this.utils.stopButton.addEventListener('click', () => this.utils.checkScrapingStatus()); + } + } +} + +document.addEventListener('DOMContentLoaded', () => { + new Common(); +}); + +window.checkAllCheckboxes = function(tableId, checkAllId) { + var table = document.getElementById(tableId); + var checkAll = document.getElementById(checkAllId); + var checkboxes = table.querySelectorAll('input[type="checkbox"]'); + + checkboxes.forEach(function(checkbox) { + if (!checkbox.disabled) { + checkbox.checked = checkAll.checked; + } + }); +}; \ No newline at end of file diff --git a/app/static/download_results.js b/app/static/download_results.js new file mode 100644 index 0000000..b5d18e5 --- /dev/null +++ b/app/static/download_results.js @@ -0,0 +1,96 @@ +async function deleteFiles(filePaths) { + try { + const response = await fetch('/delete_files', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ file_paths: filePaths }) + }); + + const data = await response.json(); + + if (data.success) { + alert('Files deleted successfully'); + location.reload(); + } else { + alert(`Error deleting files: ${JSON.stringify(data.errors)}`); + } + } catch (error) { + console.error('Error:', error); + alert('An error occurred while deleting files.'); + } +} + +function getSelectedFiles() { + return Array.from(document.querySelectorAll('input[name="fileCheckbox"]:checked')) + .map(checkbox => checkbox.value); +} + +function deleteSelectedFiles() { + const selectedFiles = getSelectedFiles(); + if (selectedFiles.length > 0) { + deleteFiles(selectedFiles); + } else { + alert('No files selected'); + } +} + +async function downloadSelectedFiles() { + const selectedFiles = getSelectedFiles(); + if (selectedFiles.length === 0) { + alert('No files selected'); + return; + } + + try { + const response = await fetch('/download_files', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ file_paths: selectedFiles }) + }); + + if (!response.ok) { + const errorData = await response.json(); + throw new Error(errorData.error || 'Failed to download files.'); + } + + const blob = await response.blob(); + if (blob.type !== 'application/zip') { + throw new Error('Received invalid ZIP file.'); + } + + const url = window.URL.createObjectURL(blob); + const a = document.createElement('a'); + a.href = url; + a.download = 'files.zip'; + document.body.appendChild(a); + a.click(); + a.remove(); + window.URL.revokeObjectURL(url); + } catch (error) { + console.error('Download error:', error); + alert(`Error: ${error.message}`); + } +} + +function sortTable(columnIndex, tableId) { + const table = document.getElementById(tableId); + const tbody = table.querySelector('tbody'); + const rows = Array.from(tbody.rows); + const isAscending = table.dataset.sortAsc === 'true'; + + rows.sort((rowA, rowB) => { + const cellA = rowA.cells[columnIndex].innerText.trim().toLowerCase(); + const cellB = rowB.cells[columnIndex].innerText.trim().toLowerCase(); + return cellA.localeCompare(cellB) * (isAscending ? 1 : -1); + }); + + // Toggle sorting order for next click + table.dataset.sortAsc = !isAscending; + + // Reinsert sorted rows + rows.forEach(row => tbody.appendChild(row)); +} \ No newline at end of file diff --git a/app/static/index.js b/app/static/index.js new file mode 100644 index 0000000..9c9aed1 --- /dev/null +++ b/app/static/index.js @@ -0,0 +1,56 @@ +import { ScraperUtils } from './scraper_utils.js'; + +class ScraperApp { + constructor() { + this.utils = new ScraperUtils(); + this.form = document.getElementById('scrapingForm'); + this.stopButton = document.getElementById('stopButton'); + this.startButton = document.getElementById('startButton'); + this.init(); + } + + init() { + this.utils.checkScrapingStatus(); + this.addEventListeners(); + } + + async startScraping(event) { + event.preventDefault(); // Prevent default form submission + const formData = new FormData(this.form); + try { + const response = await fetch('/start_scraping', { + method: 'POST', + body: formData + }); + const data = await response.json(); + if (data.status === "Scraping started") { + this.utils.checkScrapingStatus(); // Update UI + } + } catch (error) { + console.error('Error starting scraping:', error); + } + } + + async stopScraping() { + try { + const response = await fetch('/stop_scraping', { + method: 'POST' + }); + const data = await response.json(); + if (data.status === "Scraping stopped") { + this.utils.checkScrapingStatus(); // Update UI + } + } catch (error) { + console.error('Error stopping scraping:', error); + } + } + + addEventListeners() { + this.form.addEventListener('submit', (event) => this.startScraping(event)); + this.stopButton.addEventListener('click', () => this.stopScraping()); + } +} + +document.addEventListener('DOMContentLoaded', () => { + new ScraperApp(); +}); diff --git a/app/static/log_viewer.js b/app/static/log_viewer.js new file mode 100644 index 0000000..8f40880 --- /dev/null +++ b/app/static/log_viewer.js @@ -0,0 +1,97 @@ +class LogViewerApp { + constructor() { + this.logsElement = document.getElementById('logs'); + this.prevPageButton = document.getElementById('prevPage'); + this.nextPageButton = document.getElementById('nextPage'); + this.pageInfo = document.getElementById('pageInfo'); + + this.currentPage = 0; + this.linesPerPage = null; + this.autoRefreshInterval = null; + + this.init(); + } + + async init() { + await this.fetchConfig(); + await this.checkScrapingStatus(); + this.addEventListeners(); + } + + async fetchConfig() { + try { + const response = await fetch('/config/lines_per_page'); + const data = await response.json(); + this.linesPerPage = data.lines_per_page; + this.fetchLogs(this.currentPage); + } catch (error) { + console.error('Error fetching config:', error); + } + } + + async fetchLogs(page) { + try { + const response = await fetch(`/logfile?page=${page}&lines_per_page=${this.linesPerPage}`); + const data = await response.json(); + + if (data.error) { + this.logsElement.textContent = data.error; + } else { + this.logsElement.innerHTML = data.log.map((line, index) => { + const lineNumber = data.start_line - index; + return `${lineNumber} ${line}`; + }).join(''); + + this.updatePagination(data.total_lines); + } + } catch (error) { + console.error('Error fetching logs:', error); + } + } + + updatePagination(totalLines) { + this.prevPageButton.disabled = this.currentPage === 0; + this.nextPageButton.disabled = (this.currentPage + 1) * this.linesPerPage >= totalLines; + this.pageInfo.textContent = `Page ${this.currentPage + 1} of ${Math.ceil(totalLines / this.linesPerPage)}`; + } + + startAutoRefresh() { + this.autoRefreshInterval = setInterval(() => this.fetchLogs(this.currentPage), 5000); + } + + stopAutoRefresh() { + clearInterval(this.autoRefreshInterval); + } + + async checkScrapingStatus() { + try { + const response = await fetch('/scraping_status'); + const data = await response.json(); + if (data.scraping_active) { + this.startAutoRefresh(); + } else { + this.stopAutoRefresh(); + } + this.fetchLogs(this.currentPage); + } catch (error) { + console.error('Error checking scraping status:', error); + } + } + + addEventListeners() { + this.prevPageButton.addEventListener('click', () => { + if (this.currentPage > 0) { + this.currentPage--; + this.fetchLogs(this.currentPage); + } + }); + + this.nextPageButton.addEventListener('click', () => { + this.currentPage++; + this.fetchLogs(this.currentPage); + }); + } +} + +// Initialize the application when DOM is fully loaded +document.addEventListener('DOMContentLoaded', () => new LogViewerApp()); \ No newline at end of file diff --git a/app/static/scraper_utils.js b/app/static/scraper_utils.js new file mode 100644 index 0000000..b7d137d --- /dev/null +++ b/app/static/scraper_utils.js @@ -0,0 +1,203 @@ +export class ScraperUtils { + constructor() { + this.activityIndicator = document.getElementById('activity_indicator'); + this.endTimeElement = document.getElementById('end_time'); + this.serverTimeElement = document.getElementById('server_time'); + this.timeLeftElement = document.getElementById('time-left'); // New element for countdown + this.stopButton = document.getElementById('stopButton'); + this.startButton = document.getElementById('startButton'); + this.statusContainer = document.getElementById('status_container'); + this.loadingIndicator = document.getElementById('loading_indicator'); + this.statusContent = document.querySelectorAll('#status_content'); + + this.serverTime = null; + this.endTime = null; + this.pollInterval = null; // Add this line + + this.init(); + } + + async init() { + this.showLoadingIndicator(); + + try { + await Promise.all([ + this.updateServerTime(), + this.checkScrapingStatus() + ]); + } catch (error) { + console.error("Error during initialization:", error); + } + + // Start polling for status updates + this.startPolling(); + + // Only start the clock and wait for end time if scraping is active + if (this.activityIndicator.textContent === 'Active') { + if (!this.endTime) { + try { + await this.fetchEndTime(); + } catch (error) { + console.error("Error fetching end time:", error); + } + } + + if (this.serverTime && this.endTime) { + this.startClock(); + } + } + + // Hide loading indicator regardless of scraping status + this.hideLoadingIndicator(); + } + + startPolling() { + // Poll every 2 seconds + this.pollInterval = setInterval(async () => { + await this.checkScrapingStatus(); + }, 2000); + } + + stopPolling() { + if (this.pollInterval) { + clearInterval(this.pollInterval); + this.pollInterval = null; + } + } + + showLoadingIndicator() { + this.statusContainer.classList.remove('d-none'); + this.loadingIndicator.classList.remove('d-none'); + this.statusContent.forEach(element => element.classList.add('d-none')); + } + + hideLoadingIndicator() { + this.loadingIndicator.classList.add('d-none'); + this.statusContent.forEach(element => element.classList.remove('d-none')); + } + + async checkScrapingStatus() { + try { + const response = await fetch('/scraping_status'); + const data = await response.json(); + + if (data.scraping_active) { + if (this.startButton) this.startButton.disabled = true; + if (this.stopButton) this.stopButton.disabled = false; + + this.activityIndicator.classList.remove('text-bg-danger'); + this.activityIndicator.classList.add('text-bg-success'); + this.activityIndicator.textContent = 'Active'; + + // Fetch end time if we don't have it yet + if (!this.endTime) { + await this.fetchEndTime(); + } + + this.endTimeElement.classList.remove('d-none'); + this.timeLeftElement.classList.remove('d-none'); + } else { + if (this.startButton) this.startButton.disabled = false; + if (this.stopButton) this.stopButton.disabled = true; + + this.activityIndicator.classList.remove('text-bg-success'); + this.activityIndicator.classList.add('text-bg-danger'); + this.activityIndicator.textContent = 'Inactive'; + + this.endTimeElement.classList.add('d-none'); + this.timeLeftElement.classList.add('d-none'); + + // Reset end time when inactive + this.endTime = null; + } + } catch (error) { + console.error('Error checking scraping status:', error); + } + } + + async updateServerTime() { + try { + const response = await fetch('/server_time'); + const data = await response.json(); + this.serverTime = new Date(data.server_time.replace(' ', 'T')); + + this.serverTimeElement.textContent = `Server Time (TCT): ${this.formatDateToHHMMSS(this.serverTime)}`; + } catch (error) { + console.error('Error fetching server time:', error); + } + } + + + async fetchEndTime() { + if (this.endTime) return; + + try { + const response = await fetch('/scraping_get_end_time'); + const data = await response.json(); + if (data.end_time) { + this.endTime = new Date(data.end_time); + this.endTimeElement.textContent = `Running until ${this.formatDateToYYYYMMDDHHMMSS(this.endTime)} TCT`; + } + } catch (error) { + this.endTimeElement.textContent = 'Error fetching end time'; + console.error('Error fetching end time:', error); + } + } + + startClock() { + const updateClock = () => { + if (this.serverTime) { + this.serverTime.setSeconds(this.serverTime.getSeconds() + 1); + this.serverTimeElement.textContent = `Server Time (TCT): ${this.formatDateToHHMMSS(this.serverTime)}`; + } + + if (this.endTime && this.serverTime) { + const timeLeft = this.endTime - this.serverTime; + this.timeLeftElement.textContent = `Time Left: ${timeLeft > 0 ? this.formatMillisecondsToHHMMSS(timeLeft) : '00:00:00'}`; + } + }; + + // Immediately update the clock + updateClock(); + + // Continue updating every second + setInterval(updateClock, 1000); + } + + formatDateToYYYYMMDDHHMMSS(date) { + if (!(date instanceof Date) || isNaN(date)) { + console.error('Invalid date:', date); + return ''; + } + return `${date.getFullYear()}-${String(date.getMonth() + 1).padStart(2, '0')}-${String(date.getDate()).padStart(2, '0')} ` + + `${String(date.getHours()).padStart(2, '0')}:${String(date.getMinutes()).padStart(2, '0')}:${String(date.getSeconds()).padStart(2, '0')}`; + } + + formatDateToHHMMSS(date) { + if (!(date instanceof Date) || isNaN(date)) { + console.error('Invalid date:', date); + return ''; + } + return `${String(date.getHours()).padStart(2, '0')}:${String(date.getMinutes()).padStart(2, '0')}:${String(date.getSeconds()).padStart(2, '0')}`; + } + + formatMillisecondsToHHMMSS(ms) { + const totalSeconds = Math.floor(ms / 1000); + const hours = Math.floor(totalSeconds / 3600); + const minutes = Math.floor((totalSeconds % 3600) / 60); + const seconds = totalSeconds % 60; + return `${String(hours).padStart(2, '0')}:${String(minutes).padStart(2, '0')}:${String(seconds).padStart(2, '0')}`; + } + + // Add cleanup method + cleanup() { + this.stopPolling(); + } +} + +// Add event listener for page unload +window.addEventListener('unload', () => { + if (window.scraperUtils) { + window.scraperUtils.cleanup(); + } +}); diff --git a/app/static/style.css b/app/static/style.css new file mode 100644 index 0000000..4742a63 --- /dev/null +++ b/app/static/style.css @@ -0,0 +1,217 @@ +/* LIGHT MODE (default) */ +:root { + --bs-body-bg: #f8f9fa; /* Light background */ + --bs-body-color: #212529; /* Dark text */ + + --bs-primary: #007bff; + --bs-primary-bg-subtle: #cce5ff; + --bs-primary-border-subtle: #80bdff; + --bs-primary-text-emphasis: #004085; + + --bs-secondary: #6c757d; + --bs-secondary-bg-subtle: #e2e3e5; + --bs-secondary-border-subtle: #c8cbcf; + --bs-secondary-text-emphasis: #383d41; + + --bs-success: #198754; + --bs-success-bg-subtle: #d4edda; + --bs-success-border-subtle: #a3cfbb; + --bs-success-text-emphasis: #155724; + + --bs-danger: #dc3545; + --bs-danger-bg-subtle: #f8d7da; + --bs-danger-border-subtle: #f1aeb5; + --bs-danger-text-emphasis: #721c24; + + --bs-warning: #ffc107; + --bs-warning-bg-subtle: #fff3cd; + --bs-warning-border-subtle: #ffeeba; + --bs-warning-text-emphasis: #856404; + + --bs-info: #17a2b8; + --bs-info-bg-subtle: #d1ecf1; + --bs-info-border-subtle: #bee5eb; + --bs-info-text-emphasis: #0c5460; + + --bs-light: #f8f9fa; + --bs-light-bg-subtle: #ffffff; + --bs-light-border-subtle: #d6d8db; + --bs-light-text-emphasis: #6c757d; + + --bs-dark: #343a40; + --bs-dark-bg-subtle: #212529; + --bs-dark-border-subtle: #1d2124; + --bs-dark-text-emphasis: #ffffff; + + --bs-border-color: #dee2e6; /* Default border color */ +} + +/* DARK MODE */ +[data-bs-theme="dark"] { + --bs-body-bg: #121212; + --bs-body-color: #e9ecef; + + --bs-primary: #1e90ff; + --bs-primary-bg-subtle: #1c2b36; + --bs-primary-border-subtle: #374b58; + --bs-primary-text-emphasis: #a0c4ff; + + --bs-secondary: #adb5bd; + --bs-secondary-bg-subtle: #2d3238; + --bs-secondary-border-subtle: #3e444a; + --bs-secondary-text-emphasis: #ced4da; + + --bs-success: #00c851; + --bs-success-bg-subtle: #1b3425; + --bs-success-border-subtle: #3b6147; + --bs-success-text-emphasis: #b9f6ca; + + --bs-danger: #ff4444; + --bs-danger-bg-subtle: #381717; + --bs-danger-border-subtle: #633030; + --bs-danger-text-emphasis: #ffcccb; + + --bs-warning: #ffbb33; + --bs-warning-bg-subtle: #3a2b19; + --bs-warning-border-subtle: #67512e; + --bs-warning-text-emphasis: #ffd700; + + --bs-info: #33b5e5; + --bs-info-bg-subtle: #182e38; + --bs-info-border-subtle: #305564; + --bs-info-text-emphasis: #66d1ff; + + --bs-light: #343a40; + --bs-light-bg-subtle: #2c3137; + --bs-light-border-subtle: #464b50; + --bs-light-text-emphasis: #e9ecef; + + --bs-dark: #ffffff; + --bs-dark-bg-subtle: #f8f9fa; + --bs-dark-border-subtle: #e9ecef; + --bs-dark-text-emphasis: #121212; + + --bs-border-color: #495057; +} + + +[data-bs-theme="dark"] .shadow { + box-shadow: var(--bs-box-shadow) !important; +} + +[data-bs-theme="dark"] .shadow-sm { + box-shadow: var(--bs-box-shadow-sm) !important; +} + +[data-bs-theme="dark"] .shadow-lg { + box-shadow: var(--bs-box-shadow-lg) !important; +} + +:root { + --bs-primary: var(--primary); + --bs-secondary: var(--secondary); + --bs-body-bg: var(--background); + --bs-body-color: var(--text-color); +} + +[data-bs-theme="dark"] { + --bs-primary: var(--primary); + --bs-secondary: var(--secondary); + --bs-body-bg: var(--background); + --bs-body-color: var(--text-color); +} + + +/* Dark Mode Toggle Button */ +/* Hide the default checkbox */ +#color-mode-toggle input[type=checkbox] { + height: 0; + width: 0; + visibility: hidden; +} + + +/* Style the switch */ +#color-mode-toggle label { + cursor: pointer; + width: 70px; + height: 30px; + background: grey; + display: flex; + align-items: center; + justify-content: space-between; + border-radius: 30px; + position: relative; + padding: 5px 15px; + box-shadow: inset 0 0 5px rgba(0, 0, 0, 0.3); +} + +/* The moving toggle circle */ +#color-mode-toggle label:after { + content: ''; + position: absolute; + top: 5px; + left: 5px; + width: 20px; + height: 20px; + background: white; + border-radius: 50%; + transition: 0.3s; +} + +/* Sun and Moon Icons */ +.icon { + font-size: 15px; + position: absolute; + transition: 0.3s; +} + +/* Position Sun on the left */ +.sun { + left: 10px; + /* color: var(--bs-dark) */ + color: var(--sun-color); +} + +/* Position Moon on the right */ +.moon { + right: 10px; + /* color: var(--bs-light); */ + color: var(--sun-color); +} + +/* Move the toggle circle when checked */ +#color-mode-toggle input:checked + label { + background: var(--bs-light); +} + +#color-mode-toggle input:checked + label:after { + left: calc(100% - 25px); + background: var(--bs-dark); +} + +/* Hide moon when in dark mode */ +#color-mode-toggle input:checked + label .sun { + opacity: 100; +} + +#color-mode-toggle input:checked + label .moon { + opacity: 0; +} + +/* Hide sun when in light mode */ +#color-mode-toggle input:not(:checked) + label .moon { + opacity: 100; +} + +#color-mode-toggle input:not(:checked) + label .sun { + opacity: 0; +} + +.line-number { + display: inline-block; + width: 30px; + text-align: right; + margin-right: 10px; + color: #888; +} \ No newline at end of file diff --git a/app/tasks.py b/app/tasks.py new file mode 100644 index 0000000..895c18e --- /dev/null +++ b/app/tasks.py @@ -0,0 +1,93 @@ +from celery import Celery +from app.models import Scraper +import redis +from datetime import timedelta +from flask import current_app + +def create_celery(): + celery = Celery('tasks', broker='redis://localhost:6379/0') + celery.conf.update( + task_serializer='json', + accept_content=['json'], + result_serializer='json', + timezone='UTC' + ) + return celery + +def init_celery(app): + """Initialize Celery with Flask app context""" + celery = create_celery() + celery.conf.update(app.config) + + class ContextTask(celery.Task): + def __call__(self, *args, **kwargs): + with app.app_context(): + return self.run(*args, **kwargs) + + celery.Task = ContextTask + return celery + +celery = create_celery() # This will be initialized properly in app/__init__.py + +def get_redis(): + return redis.StrictRedis( + host='localhost', + port=6379, + db=0, + decode_responses=True + ) + +@celery.task +def start_scraping_task(faction_id, fetch_interval, run_interval, config_dict): + """ + Start scraping task with serializable parameters + Args: + faction_id: ID of the faction to scrape + fetch_interval: Interval between fetches in seconds + run_interval: How long to run the scraper in days + config_dict: Dictionary containing configuration + """ + try: + redis_client = get_redis() + # Set current faction ID at task start + redis_client.set("current_faction_id", str(faction_id)) + + scraper = Scraper( + faction_id=faction_id, + fetch_interval=int(fetch_interval), + run_interval=int(run_interval), + config=config_dict + ) + scraper.start_scraping() + return {"status": "success"} + except Exception as e: + # Clean up Redis state on error + redis_client = get_redis() + redis_client.delete("current_faction_id") + return {"status": "error", "message": str(e)} + +@celery.task +def stop_scraping_task(faction_id): + """Stop scraping task and clean up Redis state""" + try: + redis_client = get_redis() + + # Clean up Redis state + redis_client.hset(f"scraper:{faction_id}", "scraping_active", "0") + redis_client.delete(f"scraper:{faction_id}") + + # Clean up current_faction_id if it matches + current_id = redis_client.get("current_faction_id") + if current_id and current_id == str(faction_id): + redis_client.delete("current_faction_id") + + # Revoke any running tasks for this faction + celery.control.revoke( + celery.current_task.request.id, + terminate=True, + signal='SIGTERM' + ) + + return {"status": "success", "message": f"Stopped scraping for faction {faction_id}"} + except Exception as e: + return {"status": "error", "message": str(e)} diff --git a/app/templates/analyze.html b/app/templates/analyze.html new file mode 100644 index 0000000..753099c --- /dev/null +++ b/app/templates/analyze.html @@ -0,0 +1,100 @@ +{% extends 'base.html' %} + +{% block content %} + +
+
+
+
+
+

User Activity Distribution

+
+
+
+
+
+ + + + + + + + + + + + + + + + {% if analyses %} + {% for analysis in analyses %} + + + + + + {% endfor %} + {% else %} + + + + {% endif %} + +
Analysis NameDescription
+ + {{ analysis.name }}{{ analysis.description }}
No analyses available
+ +
+
+
+ {% include 'includes/error.html' %} +
+
+
+ +{% if plot_url %} +
+
+
+
+
+

Selected File: {{ selected_file.split('/')[-1] }}

+ User Activity Distribution +
+
+
+
+
+{% endif %} + +{% if results %} +{% for analysis_name, result in results.items() %} +
+
+
+
+

{{ analysis_name }}

+
+ {{ result | safe }} +
+
+
+
+
+{% endfor %} +{% endif %} + +{% endblock %} diff --git a/app/templates/base.html b/app/templates/base.html new file mode 100644 index 0000000..c2029a7 --- /dev/null +++ b/app/templates/base.html @@ -0,0 +1,32 @@ + + + + + {% block head %} + + TornActivityTracker{% block title %}{% endblock %} + + + {% block styles %} + {{ bootstrap.load_css() }} + + + {% endblock %} + {% endblock %} + + +
+ {% include 'includes/navigation.html' %} +
+
+ {% block content %} + {% endblock %} +
+
+ {% include 'includes/footer.html' %} +
+ {% block scripts %} + {% include 'includes/scripts.html' %} + {% endblock %} + + \ No newline at end of file diff --git a/app/templates/download_results.html b/app/templates/download_results.html new file mode 100644 index 0000000..4eecbd5 --- /dev/null +++ b/app/templates/download_results.html @@ -0,0 +1,102 @@ +{% extends 'base.html' %} +{% block content %} +
+
+
+
+
+

Data Files

+
+
+
+ + +
+
+
+
+ + + + + + + + + + + + + + {% for file in files.data %} + + + + + + + + + + {% endfor %} + +
File NameLast ModifiedCreatedSizeActionStatus
{{ file.name_display }}{{ file.last_modified | datetimeformat }}{{ file.created | datetimeformat }}{{ file.size }} + + + + {{ 'In Use' if file.active else 'Available' }} + +
+
+
+
+
+
+
+
+

Log Files

+
+
+
+ + +
+
+
+
+ + + + + + + + + + + + + + {% for file in files.log %} + + + + + + + + + + {% endfor %} + +
File NameLast ModifiedCreatedSizeActionStatus
{{ file.name_display }}{{ file.last_modified | datetimeformat }}{{ file.created | datetimeformat }}{{ file.size }} + + + + {{ 'In Use' if file.active else 'Available' }} + +
+
+
+ +{% endblock %} diff --git a/app/templates/includes/error.html b/app/templates/includes/error.html new file mode 100644 index 0000000..c260bc8 --- /dev/null +++ b/app/templates/includes/error.html @@ -0,0 +1,6 @@ +{% if error %} + +{% endif %} diff --git a/app/templates/includes/footer.html b/app/templates/includes/footer.html new file mode 100644 index 0000000..e69de29 diff --git a/app/templates/includes/navigation.html b/app/templates/includes/navigation.html new file mode 100644 index 0000000..73f9b5b --- /dev/null +++ b/app/templates/includes/navigation.html @@ -0,0 +1,39 @@ + +
+
+
Loading...
+
+
+
+
Inactive
+
+
+
Server Time (TCT):
+
+
+
+
+
Running until:
+
+
+
Time Left:
+
+
+
+
+
\ No newline at end of file diff --git a/app/templates/includes/scripts.html b/app/templates/includes/scripts.html new file mode 100644 index 0000000..f560c92 --- /dev/null +++ b/app/templates/includes/scripts.html @@ -0,0 +1,3 @@ +{{ bootstrap.load_js() }} + + \ No newline at end of file diff --git a/app/templates/index.html b/app/templates/index.html new file mode 100644 index 0000000..b04c4d0 --- /dev/null +++ b/app/templates/index.html @@ -0,0 +1,34 @@ +{% extends 'base.html' %} +{% block content %} +
+
+
+
+

Scraper

+
+
+
+
+
+ {{ form.hidden_tag() }} +
+ {{ form.faction_id.label(class="form-control-label") }} + {{ form.faction_id(class="form-control") }} +
+
+ {{ form.fetch_interval.label(class="form-control-label") }} + {{ form.fetch_interval(class="form-control") }} +
+
+ {{ form.run_interval.label(class="form-control-label") }} + {{ form.run_interval(class="form-control") }} +
+
+
+ {{ form.submit(class="btn btn-success", type="submit", id="startButton", form="scrapingForm") }} + +
+
+
+ +{% endblock content %} \ No newline at end of file diff --git a/app/templates/log_viewer.html b/app/templates/log_viewer.html new file mode 100644 index 0000000..35ff6ad --- /dev/null +++ b/app/templates/log_viewer.html @@ -0,0 +1,22 @@ +{% extends 'base.html' %} +{% block content %} +
+
+
+
+

Logs

+
+
+ + + +
+
+
+

Stats

+
+
+
+
+ +{% endblock content %} \ No newline at end of file diff --git a/app/util.py b/app/util.py new file mode 100644 index 0000000..29025bc --- /dev/null +++ b/app/util.py @@ -0,0 +1,82 @@ +import os +import zipfile +from datetime import datetime, timedelta +from flask import current_app + +from app.config import load_config + +def create_zip(file_paths, zip_name, app): + temp_dir = os.path.abspath(app.config['TEMP']['TEMP_DIR']) + zip_path = os.path.join(temp_dir, zip_name) + with zipfile.ZipFile(zip_path, 'w') as zipf: + for file_path in file_paths: + zipf.write(file_path, os.path.basename(file_path)) + print(f"Zip file created: {zip_path}") + return zip_path + +def delete_old_zips(): + temp_dir = os.path.abspath(current_app.config['TEMP']['TEMP_DIR']) + now = datetime.now() + for filename in os.listdir(temp_dir): + if filename.endswith('.zip'): + file_path = os.path.join(temp_dir, filename) + if now - datetime.fromtimestamp(os.path.getmtime(file_path)) > timedelta(hours=1): + os.remove(file_path) + +def tail(filename, n): + stat = os.stat(filename) + n = int(n) + if stat.st_size == 0 or n == 0: + yield '' + return + + page_size = int(current_app.config['LOGGING']['TAIL_PAGE_SIZE']) + offsets = [] + count = _n = n if n >= 0 else -n + + last_byte_read = last_nl_byte = starting_offset = stat.st_size - 1 + + with open(filename, 'r') as f: + while count > 0: + starting_byte = last_byte_read - page_size + if last_byte_read == 0: + offsets.append(0) + break + elif starting_byte < 0: + f.seek(0) + text = f.read(last_byte_read) + else: + f.seek(starting_byte) + text = f.read(page_size) + + for i in range(-1, -1*len(text)-1, -1): + last_byte_read -= 1 + if text[i] == '\n': + last_nl_byte = last_byte_read + starting_offset = last_nl_byte + 1 + offsets.append(starting_offset) + count -= 1 + + offsets = offsets[len(offsets)-_n:] + offsets.reverse() + + with open(filename, 'r') as f: + for i, offset in enumerate(offsets): + f.seek(offset) + + if i == len(offsets) - 1: + yield f.read() + else: + bytes_to_read = offsets[i+1] - offset + yield f.read(bytes_to_read) + +def get_size(path): + size = os.path.getsize(path) + if size < 1024: + return f"{size} bytes" + elif size < pow(1024,2): + return f"{round(size/1024, 2)} KB" + elif size < pow(1024,3): + return f"{round(size/(pow(1024,2)), 2)} MB" + elif size < pow(1024,4): + return f"{round(size/(pow(1024,3)), 2)} GB" \ No newline at end of file diff --git a/app/views.py b/app/views.py new file mode 100644 index 0000000..e89671c --- /dev/null +++ b/app/views.py @@ -0,0 +1,146 @@ +import os +import glob +from flask import render_template, Blueprint, current_app, request + +from app.tasks import get_redis + +from app.forms import ScrapingForm +from app.util import get_size +from app.config import load_config +from app.api import scraper as scraper + +from app.analysis import load_data, load_analysis_modules + +from datetime import datetime + +views_bp = Blueprint("views", __name__) + +def sizeof_fmt(num, suffix="B"): + """Convert bytes to human readable format""" + for unit in ["", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi"]: + if abs(num) < 1024.0: + return f"{num:3.1f} {unit}{suffix}" + num /= 1024.0 + return f"{num:.1f} Yi{suffix}" + +def register_views(app): + @app.route('/') + def index(): + form = ScrapingForm() + return render_template('index.html', form=form) + + @app.route('/results') + def results(): + return render_template('results.html') + + @app.route('/log_viewer') + def log_viewer(): + return render_template('log_viewer.html') + + @app.route('/download_results') + def download_results(): + # Get the current active log file and data file from Redis and app config + redis_client = get_redis() + current_faction_id = redis_client.get("current_faction_id") + + active_data_file = None + if current_faction_id: + active_data_file = redis_client.hget(f"scraper:{current_faction_id}", "data_file_name") + + active_log_file = app.config['LOG_FILE_NAME'] + + def get_file_info(file_path, file_type='data'): + stats = os.stat(file_path) + name = os.path.basename(file_path) + + # Determine if file is active + is_active = False + if file_type == 'data' and active_data_file: + is_active = os.path.abspath(file_path) == os.path.abspath(active_data_file) + elif file_type == 'log' and active_log_file: + is_active = os.path.basename(file_path) == os.path.basename(active_log_file) + + return { + 'name': file_path, # Full path for internal use + 'name_display': name, # Just filename for display + 'last_modified': stats.st_mtime, # Send timestamp instead of datetime + 'created': stats.st_ctime, # Send timestamp instead of datetime + 'size': sizeof_fmt(stats.st_size), + 'active': is_active + } + + data_files = [] + log_files = [] + + # Get data files + data_dir = os.path.abspath(app.config['DATA']['DATA_DIR']) + if os.path.exists(data_dir): + for file in glob.glob(os.path.join(data_dir, "*.csv")): + data_files.append(get_file_info(file, 'data')) + + # Get log files + log_dir = os.path.abspath(app.config['LOGGING']['LOG_DIR']) + if os.path.exists(log_dir): + for file in glob.glob(os.path.join(log_dir, "*.log")): + log_files.append(get_file_info(file, 'log')) + + # Sort files by modification time, newest first + data_files.sort(key=lambda x: x['last_modified'], reverse=True) + log_files.sort(key=lambda x: x['last_modified'], reverse=True) + + files = { + 'data': data_files, + 'log': log_files + } + + return render_template('download_results.html', files=files) + + views_bp = Blueprint("views", __name__) + + @views_bp.route("/analyze", methods=["GET", "POST"]) + def analyze(): + analysis_modules = load_analysis_modules() # Load available analyses + data_dir = current_app.config.get("DATA", {}).get("DATA_DIR") + + selected_file = None + selected_analyses = [] + + # Find all available CSV files + data_files = sorted( + glob.glob(os.path.join(data_dir, "*.csv")), + key=os.path.getmtime, + reverse=True + ) if data_dir else [] + + context = { + "data_files": data_files, + "analyses": analysis_modules, + "selected_file": selected_file, + "selected_analyses": selected_analyses + } + + if request.method == "POST": + selected_analyses = request.form.getlist("analyses") + selected_file = request.form.get("data_file") + + if not selected_file: + context["error"] = "No file selected." + return render_template("analyze.html", **context) + + df = load_data(selected_file) + results = {} + + for analysis in analysis_modules: + if analysis.name in selected_analyses: + results[analysis.name] = analysis.execute(df) # Some may return HTML + + context["results"] = results + + return render_template("analyze.html", **context) + + @views_bp.route('/server_time') + def server_time(): + current_time = datetime.now(datetime.timezone.utc).strftime('%Y-%m-%d %H:%M:%S') + return {'server_time': current_time} + + app.register_blueprint(views_bp) diff --git a/data/.gitkeep b/data/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..324036a --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,34 @@ +version: '3.8' + +services: + redis: + image: redis:alpine + restart: always + volumes: + - .:/app/redis + + app: + build: . + container_name: app + restart: always + command: sh ./entrypoint.sh + depends_on: + - redis + volumes: + - .:/app + expose: + - 8000 + env_file: + - ./.env + + nginx: + build: + context: . + dockerfile: nginx/Dockerfile + container_name: nginx + ports: + - "80:80" + volumes: + - .:/app/nginx + depends_on: + - app diff --git a/dump.rdb b/dump.rdb new file mode 100644 index 0000000..d4d5666 Binary files /dev/null and b/dump.rdb differ diff --git a/entrypoint.sh b/entrypoint.sh new file mode 100644 index 0000000..938eee9 --- /dev/null +++ b/entrypoint.sh @@ -0,0 +1,18 @@ +#!/bin/sh + +if [ "$BROKER" = "redis" ] +then + echo "Waiting for redis..." + while ! nc -zv $BROKER_HOST $BROKER_PORT; do + sleep 10 + done + echo "Redis started" +fi + +echo "Starting app..." +cd /app +rm -f celery.pid +touch celery.pid +/home/app/.local/bin/uwsgi --ini uwsgi.ini + +exec "$@" diff --git a/example_config.ini b/example_config.ini new file mode 100644 index 0000000..7fc6a9d --- /dev/null +++ b/example_config.ini @@ -0,0 +1,34 @@ +# All main config options will be passed to template engine +[MAIN] +APP_TITLE = 'Torn User Activity Grabber' + +[DEFAULT] +SECRET_KEY = your_secret_key +API_KEY = your_api_key + +[LOGGING] +VIEW_MAX_LINES = 500 +VIEW_PAGE_LINES = 50 +TAIL_PAGE_SIZE = 100 +LOG_DIR = log/ + +[BOOTSTRAP] +BOOTSTRAP_SERVE_LOCAL = False +BOOTSTRAP_BTN_STYLE = 'primary' +BOOTSTRAP_BTN_SIZE = 'sm' +BOOTSTRAP_ICON_SIZE = '1em' +BOOTSTRAP_ICON_COLOR = None +BOOTSTRAP_BOOTSWATCH_THEME = litera +BOOTSTRAP_MSG_CATEGORY = 'primary' +BOOTSTRAP_TABLE_VIEW_TITLE = 'View' +BOOTSTRAP_TABLE_EDIT_TITLE = 'Edit' +BOOTSTRAP_TABLE_DELETE_TITLE = 'Delete' +BOOTSTRAP_TABLE_NEW_TITLE = 'New' +BOOTSTRAP_FORM_GROUP_CLASSES = 'mb-3' +BOOTSTRAP_FORM_INLINE_CLASSES = 'row row-cols-lg-auto g-3 align-items-center' + +[DATA] +DATA_DIR = data/ + +[TEMP] +TEMP_DIR = temp/ diff --git a/fly.toml b/fly.toml new file mode 100644 index 0000000..0defc87 --- /dev/null +++ b/fly.toml @@ -0,0 +1,20 @@ +# fly.toml app configuration file generated for tornactivitytracker on 2025-02-11T02:59:23+01:00 +# +# See https://fly.io/docs/reference/configuration/ for information about how to use this file. +# + +app = 'tornactivitytracker' +primary_region = 'fra' + +[build] + +[http_service] + internal_port = 8080 + force_https = true + auto_stop_machines = 'stop' + auto_start_machines = true + min_machines_running = 0 + processes = ['app'] + +[[vm]] + size = 'shared-cpu-2x' diff --git a/howToTest.txt b/howToTest.txt new file mode 100644 index 0000000..112063f --- /dev/null +++ b/howToTest.txt @@ -0,0 +1,7 @@ +celery -A app.celery worker --loglevel=info + +redis-server + +run.py + +python stop_scraping.py diff --git a/log/.gitkeep b/log/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/nginx/Dockerfile b/nginx/Dockerfile new file mode 100644 index 0000000..40df99d --- /dev/null +++ b/nginx/Dockerfile @@ -0,0 +1,4 @@ +FROM nginx:stable-alpine + +COPY ./nginx/nginx.conf /etc/nginx/conf.d/default.conf +EXPOSE 80 diff --git a/nginx/nginx.conf b/nginx/nginx.conf new file mode 100644 index 0000000..bbeb399 --- /dev/null +++ b/nginx/nginx.conf @@ -0,0 +1,7 @@ +server { + listen 80; + location / { + include uwsgi_params; + uwsgi_pass app:8000; + } +} diff --git a/requirements.in b/requirements.in new file mode 100644 index 0000000..4228a9b --- /dev/null +++ b/requirements.in @@ -0,0 +1,15 @@ +# requirements.in +Flask +Flask-WTF +Bootstrap-Flask +pandas +requests +matplotlib +seaborn +configparser +plotly +configobj +redis +celery +gunicorn +uWSGI \ No newline at end of file diff --git a/requirements.sh b/requirements.sh new file mode 100644 index 0000000..9bc0d8d --- /dev/null +++ b/requirements.sh @@ -0,0 +1 @@ +pip-compile requirements.in > requirements.txt \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..f6d49bf --- /dev/null +++ b/requirements.txt @@ -0,0 +1,135 @@ +# +# This file is autogenerated by pip-compile with Python 3.13 +# by the following command: +# +# pip-compile +# +amqp==5.3.1 + # via kombu +billiard==4.2.1 + # via celery +blinker==1.9.0 + # via flask +bootstrap-flask==2.4.1 + # via -r requirements.in +celery==5.4.0 + # via -r requirements.in +certifi==2025.1.31 + # via requests +charset-normalizer==3.4.1 + # via requests +click==8.1.8 + # via + # celery + # click-didyoumean + # click-plugins + # click-repl + # flask +click-didyoumean==0.3.1 + # via celery +click-plugins==1.1.1 + # via celery +click-repl==0.3.0 + # via celery +configobj==5.0.9 + # via -r requirements.in +configparser==7.1.0 + # via -r requirements.in +contourpy==1.3.1 + # via matplotlib +cycler==0.12.1 + # via matplotlib +flask==3.1.0 + # via + # -r requirements.in + # bootstrap-flask + # flask-wtf +flask-wtf==1.2.2 + # via -r requirements.in +fonttools==4.56.0 + # via matplotlib +gunicorn==23.0.0 + # via -r requirements.in +idna==3.10 + # via requests +itsdangerous==2.2.0 + # via + # flask + # flask-wtf +jinja2==3.1.5 + # via flask +kiwisolver==1.4.8 + # via matplotlib +kombu==5.4.2 + # via celery +markupsafe==3.0.2 + # via + # jinja2 + # werkzeug + # wtforms +matplotlib==3.10.0 + # via + # -r requirements.in + # seaborn +narwhals==1.27.1 + # via plotly +numpy==2.2.3 + # via + # contourpy + # matplotlib + # pandas + # seaborn +packaging==24.2 + # via + # gunicorn + # matplotlib + # plotly +pandas==2.2.3 + # via + # -r requirements.in + # seaborn +pillow==11.1.0 + # via matplotlib +plotly==6.0.0 + # via -r requirements.in +prompt-toolkit==3.0.50 + # via click-repl +pyparsing==3.2.1 + # via matplotlib +python-dateutil==2.9.0.post0 + # via + # celery + # matplotlib + # pandas +pytz==2025.1 + # via pandas +redis==5.2.1 + # via -r requirements.in +requests==2.32.3 + # via -r requirements.in +seaborn==0.13.2 + # via -r requirements.in +six==1.17.0 + # via python-dateutil +tzdata==2025.1 + # via + # celery + # kombu + # pandas +urllib3==2.3.0 + # via requests +uwsgi==2.0.28 + # via -r requirements.in +vine==5.1.0 + # via + # amqp + # celery + # kombu +wcwidth==0.2.13 + # via prompt-toolkit +werkzeug==3.1.3 + # via flask +wtforms==3.2.1 + # via + # bootstrap-flask + # flask-wtf diff --git a/run.py b/run.py new file mode 100644 index 0000000..523d51a --- /dev/null +++ b/run.py @@ -0,0 +1,6 @@ +from app import create_app + +app = create_app() + +if __name__ == '__main__': + app.run(debug=True) \ No newline at end of file diff --git a/stop_scraping.py b/stop_scraping.py new file mode 100644 index 0000000..13192bb --- /dev/null +++ b/stop_scraping.py @@ -0,0 +1,50 @@ +import redis +import argparse + +def get_redis(): + return redis.StrictRedis( + host='localhost', + port=6379, + db=0, + decode_responses=True + ) + +def stop_scraping(flush=False, force=False): + redis_client = get_redis() + + if flush: + redis_client.flushall() + print("Flushed all Redis data") + return True + + current_faction_id = redis_client.get("current_faction_id") + + if not current_faction_id: + print("No active scraping session found.") + return False if not force else True + + redis_client.hset(f"scraper:{current_faction_id}", "scraping_active", "0") + print(f"Sent stop signal to scraping process for faction {current_faction_id}") + return True + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Stop the Torn Activity Tracker scraping process.') + parser.add_argument('--force', action='store_true', help='Force stop even if no active session is found') + parser.add_argument('--flush', action='store_true', help='Flush all Redis data (WARNING: This will clear ALL Redis data)') + + args = parser.parse_args() + + if args.flush: + if input("WARNING: This will delete ALL Redis data. Are you sure? (y/N) ").lower() != 'y': + print("Operation cancelled.") + exit(0) + + success = stop_scraping(flush=args.flush, force=args.force) + + if not success and args.force: + print("Forcing stop for all potential scraping processes...") + redis_client = get_redis() + # Get all scraper keys + for key in redis_client.keys("scraper:*"): + redis_client.hset(key, "scraping_active", "0") + print("Sent stop signal to all potential scraping processes.") diff --git a/temp/.gitkeep b/temp/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/uwsgi.ini b/uwsgi.ini new file mode 100644 index 0000000..a24684f --- /dev/null +++ b/uwsgi.ini @@ -0,0 +1,13 @@ +[uwsgi] +module = run:app +callable = app +wsgi-file = run.py + +master = true +processes = 4 +smart-attach-daemon=./celery.pid /home/app/.local/bin/celery -A app.celery_worker worker --loglevel=info --pidfile=./celery.pid + +socket = :8000 + +vacuum = true +die-on-term = true