Skip to content

Add integration code for restricted algorithm implementations #50

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Sep 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
151 changes: 151 additions & 0 deletions arima/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
# ingore protected source code
ptsa/
setup.py

########################################
# Python.gitignore from github/gitignore
########################################

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
.pybuilder/
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock

# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# Cython debug symbols
cython_debug/

########################################


**.png
11 changes: 11 additions & 0 deletions arima/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
FROM ghcr.io/timeeval/python3-base:0.3.0

LABEL maintainer="thorsten.papenbrock@hpi.de"

ENV ALGORITHM_MAIN="/app/algorithm.py"

COPY requirements.txt /app/
RUN pip install -r /app/requirements.txt;

COPY ptsa /app/ptsa
COPY algorithm.py /app/
3 changes: 2 additions & 1 deletion arima/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@
| Source Code | https://github.com/johnpaparrizos/AnomalyDetection/tree/master/code/ptsa |
| Learning type | unsupervised |
| Input dimensionality | univariate |

|||

After receiving the original source code from the authors, place the directory `ptsa` into this folder.

## Notes

The ptsa algorithms require sklearn in version 19 to 23. This is checked in the utility.py. Our python image, however, uses a newer sklearn version, which is 24.1 or higher. Hence we removed the check:
Expand Down
128 changes: 128 additions & 0 deletions arima/algorithm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
#!/usr/bin/env python3

import json
import sys
import argparse
import numpy as np

from dataclasses import dataclass

from ptsa.models.arima import ARIMA
from ptsa.models.distance import Euclidean
from ptsa.models.distance import Mahalanobis
from ptsa.models.distance import Garch
from ptsa.models.distance import SSA
from ptsa.models.distance import Fourier
from ptsa.models.distance import DTW
from ptsa.models.distance import EDRS
from ptsa.models.distance import TWED


@dataclass
class CustomParameters:
window_size: int = 20
max_lag: int = 30000
p_start: int = 1
q_start: int = 1
max_p: int = 5
max_q: int = 5
differencing_degree: int = 0
distance_metric: str = "Euclidean"
random_state: int = 42 # seed for randomness


class AlgorithmArgs(argparse.Namespace):
@staticmethod
def from_sys_args() -> 'AlgorithmArgs':
if len(sys.argv) != 2:
raise ValueError("Wrong number of arguments specified! Single JSON-string pos. argument expected.")
args: dict = json.loads(sys.argv[1])
custom_parameter_keys = dir(CustomParameters())
filtered_parameters = dict(filter(lambda x: x[0] in custom_parameter_keys, args.get("customParameters", {}).items()))
args["customParameters"] = CustomParameters(**filtered_parameters)
return AlgorithmArgs(**args)


def set_random_state(config: AlgorithmArgs) -> None:
seed = config.customParameters.random_state
import random
random.seed(seed)
np.random.seed(seed)


def distance_to_measure(distance_metric):
switcher = {
"euclidean": Euclidean(),
"mahalanobis": Mahalanobis(),
"garch": Garch(),
"ssa": SSA(),
"fourier": Fourier(),
"dtw": DTW(),
"edrs": EDRS(),
"twed": TWED()
}
return switcher.get(distance_metric.lower(), "missing")


def main():
config = AlgorithmArgs.from_sys_args()
ts_filename = config.dataInput # "/data/dataset.csv"
score_filename = config.dataOutput # "/results/anomaly_window_scores.ts"

print(f"Configuration: {config}")

if config.executionType == "train":
print("No training required!")
exit(0)

if config.executionType != "execute":
raise ValueError("Unknown executionType specified!")

set_random_state(config)

# read only single "value" column from dataset
print(f"Reading data from {ts_filename}")
da = np.genfromtxt(ts_filename, skip_header=1, delimiter=",")
data = da[:, 1]
labels = da[:, -1]
length = len(data)
contamination = labels.sum() / length
# Use smallest positive float as contamination if there are no anomalies in dataset
contamination = np.nextafter(0, 1) if contamination == 0. else contamination

# run ARIMA
print("Executing ARIMA ...")
model = ARIMA(
window=config.customParameters.window_size,
max_lag=config.customParameters.max_lag,
p_start=config.customParameters.p_start,
q_start=config.customParameters.q_start,
max_p=config.customParameters.max_p,
max_q=config.customParameters.max_q,
d=config.customParameters.differencing_degree,
contamination=contamination,
neighborhood="all")
model.fit(data)

# get outlier scores
measure = distance_to_measure(config.customParameters.distance_metric)
if measure == "missing":
raise ValueError(f"Distance measure '{config.customParameters.distance_metric}' not supported!")
measure.detector = model
measure.set_param()
model.decision_function(measure=measure)
scores = model.decision_scores_

#from ptsa.utils.metrics import metricor
#grader = metricor()
#preds = grader.scale(scores, 0.1)

print(f"Input size: {len(data)}\nOutput size: {len(scores)}")
print("ARIMA result:", scores)

print(f"Writing results to {score_filename}")
np.savetxt(score_filename, scores, delimiter=",")


if __name__ == "__main__":
main()
15 changes: 15 additions & 0 deletions arima/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
combo
joblib
matplotlib
numpy>=1.13
numba>=0.35
pandas>=0.25
scipy>=0.19.1
scikit_learn>=0.19.1
six
statsmodels
suod
pmdarima
arch
tsfresh
hurst
Loading