Skip to content

Commit 3fb1f27

Browse files
author
Sebastian Schmidl
authored
Add integration code for restricted algorithm implementations (#50)
* feat: add integration code for restricted algorithm implementations * fix: base Dockerimage refs
1 parent b3bb180 commit 3fb1f27

31 files changed

+1732
-2
lines changed

arima/.gitignore

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
# ingore protected source code
2+
ptsa/
3+
setup.py
4+
5+
########################################
6+
# Python.gitignore from github/gitignore
7+
########################################
8+
9+
# Byte-compiled / optimized / DLL files
10+
__pycache__/
11+
*.py[cod]
12+
*$py.class
13+
14+
# C extensions
15+
*.so
16+
17+
# Distribution / packaging
18+
.Python
19+
build/
20+
develop-eggs/
21+
dist/
22+
downloads/
23+
eggs/
24+
.eggs/
25+
lib/
26+
lib64/
27+
parts/
28+
sdist/
29+
var/
30+
wheels/
31+
share/python-wheels/
32+
*.egg-info/
33+
.installed.cfg
34+
*.egg
35+
MANIFEST
36+
37+
# PyInstaller
38+
# Usually these files are written by a python script from a template
39+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
40+
*.manifest
41+
*.spec
42+
43+
# Installer logs
44+
pip-log.txt
45+
pip-delete-this-directory.txt
46+
47+
# Unit test / coverage reports
48+
htmlcov/
49+
.tox/
50+
.nox/
51+
.coverage
52+
.coverage.*
53+
.cache
54+
nosetests.xml
55+
coverage.xml
56+
*.cover
57+
*.py,cover
58+
.hypothesis/
59+
.pytest_cache/
60+
cover/
61+
62+
# Translations
63+
*.mo
64+
*.pot
65+
66+
# Django stuff:
67+
*.log
68+
local_settings.py
69+
db.sqlite3
70+
db.sqlite3-journal
71+
72+
# Flask stuff:
73+
instance/
74+
.webassets-cache
75+
76+
# Scrapy stuff:
77+
.scrapy
78+
79+
# Sphinx documentation
80+
docs/_build/
81+
82+
# PyBuilder
83+
.pybuilder/
84+
target/
85+
86+
# Jupyter Notebook
87+
.ipynb_checkpoints
88+
89+
# IPython
90+
profile_default/
91+
ipython_config.py
92+
93+
# pyenv
94+
# For a library or package, you might want to ignore these files since the code is
95+
# intended to run in multiple environments; otherwise, check them in:
96+
# .python-version
97+
98+
# pipenv
99+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
100+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
101+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
102+
# install all needed dependencies.
103+
#Pipfile.lock
104+
105+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
106+
__pypackages__/
107+
108+
# Celery stuff
109+
celerybeat-schedule
110+
celerybeat.pid
111+
112+
# SageMath parsed files
113+
*.sage.py
114+
115+
# Environments
116+
.env
117+
.venv
118+
env/
119+
venv/
120+
ENV/
121+
env.bak/
122+
venv.bak/
123+
124+
# Spyder project settings
125+
.spyderproject
126+
.spyproject
127+
128+
# Rope project settings
129+
.ropeproject
130+
131+
# mkdocs documentation
132+
/site
133+
134+
# mypy
135+
.mypy_cache/
136+
.dmypy.json
137+
dmypy.json
138+
139+
# Pyre type checker
140+
.pyre/
141+
142+
# pytype static type analyzer
143+
.pytype/
144+
145+
# Cython debug symbols
146+
cython_debug/
147+
148+
########################################
149+
150+
151+
**.png

arima/Dockerfile

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
FROM ghcr.io/timeeval/python3-base:0.3.0
2+
3+
LABEL maintainer="thorsten.papenbrock@hpi.de"
4+
5+
ENV ALGORITHM_MAIN="/app/algorithm.py"
6+
7+
COPY requirements.txt /app/
8+
RUN pip install -r /app/requirements.txt;
9+
10+
COPY ptsa /app/ptsa
11+
COPY algorithm.py /app/

arima/README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,10 @@
88
| Source Code | https://github.com/johnpaparrizos/AnomalyDetection/tree/master/code/ptsa |
99
| Learning type | unsupervised |
1010
| Input dimensionality | univariate |
11-
1211
|||
1312

13+
After receiving the original source code from the authors, place the directory `ptsa` into this folder.
14+
1415
## Notes
1516

1617
The ptsa algorithms require sklearn in version 19 to 23. This is checked in the utility.py. Our python image, however, uses a newer sklearn version, which is 24.1 or higher. Hence we removed the check:

arima/algorithm.py

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
#!/usr/bin/env python3
2+
3+
import json
4+
import sys
5+
import argparse
6+
import numpy as np
7+
8+
from dataclasses import dataclass
9+
10+
from ptsa.models.arima import ARIMA
11+
from ptsa.models.distance import Euclidean
12+
from ptsa.models.distance import Mahalanobis
13+
from ptsa.models.distance import Garch
14+
from ptsa.models.distance import SSA
15+
from ptsa.models.distance import Fourier
16+
from ptsa.models.distance import DTW
17+
from ptsa.models.distance import EDRS
18+
from ptsa.models.distance import TWED
19+
20+
21+
@dataclass
22+
class CustomParameters:
23+
window_size: int = 20
24+
max_lag: int = 30000
25+
p_start: int = 1
26+
q_start: int = 1
27+
max_p: int = 5
28+
max_q: int = 5
29+
differencing_degree: int = 0
30+
distance_metric: str = "Euclidean"
31+
random_state: int = 42 # seed for randomness
32+
33+
34+
class AlgorithmArgs(argparse.Namespace):
35+
@staticmethod
36+
def from_sys_args() -> 'AlgorithmArgs':
37+
if len(sys.argv) != 2:
38+
raise ValueError("Wrong number of arguments specified! Single JSON-string pos. argument expected.")
39+
args: dict = json.loads(sys.argv[1])
40+
custom_parameter_keys = dir(CustomParameters())
41+
filtered_parameters = dict(filter(lambda x: x[0] in custom_parameter_keys, args.get("customParameters", {}).items()))
42+
args["customParameters"] = CustomParameters(**filtered_parameters)
43+
return AlgorithmArgs(**args)
44+
45+
46+
def set_random_state(config: AlgorithmArgs) -> None:
47+
seed = config.customParameters.random_state
48+
import random
49+
random.seed(seed)
50+
np.random.seed(seed)
51+
52+
53+
def distance_to_measure(distance_metric):
54+
switcher = {
55+
"euclidean": Euclidean(),
56+
"mahalanobis": Mahalanobis(),
57+
"garch": Garch(),
58+
"ssa": SSA(),
59+
"fourier": Fourier(),
60+
"dtw": DTW(),
61+
"edrs": EDRS(),
62+
"twed": TWED()
63+
}
64+
return switcher.get(distance_metric.lower(), "missing")
65+
66+
67+
def main():
68+
config = AlgorithmArgs.from_sys_args()
69+
ts_filename = config.dataInput # "/data/dataset.csv"
70+
score_filename = config.dataOutput # "/results/anomaly_window_scores.ts"
71+
72+
print(f"Configuration: {config}")
73+
74+
if config.executionType == "train":
75+
print("No training required!")
76+
exit(0)
77+
78+
if config.executionType != "execute":
79+
raise ValueError("Unknown executionType specified!")
80+
81+
set_random_state(config)
82+
83+
# read only single "value" column from dataset
84+
print(f"Reading data from {ts_filename}")
85+
da = np.genfromtxt(ts_filename, skip_header=1, delimiter=",")
86+
data = da[:, 1]
87+
labels = da[:, -1]
88+
length = len(data)
89+
contamination = labels.sum() / length
90+
# Use smallest positive float as contamination if there are no anomalies in dataset
91+
contamination = np.nextafter(0, 1) if contamination == 0. else contamination
92+
93+
# run ARIMA
94+
print("Executing ARIMA ...")
95+
model = ARIMA(
96+
window=config.customParameters.window_size,
97+
max_lag=config.customParameters.max_lag,
98+
p_start=config.customParameters.p_start,
99+
q_start=config.customParameters.q_start,
100+
max_p=config.customParameters.max_p,
101+
max_q=config.customParameters.max_q,
102+
d=config.customParameters.differencing_degree,
103+
contamination=contamination,
104+
neighborhood="all")
105+
model.fit(data)
106+
107+
# get outlier scores
108+
measure = distance_to_measure(config.customParameters.distance_metric)
109+
if measure == "missing":
110+
raise ValueError(f"Distance measure '{config.customParameters.distance_metric}' not supported!")
111+
measure.detector = model
112+
measure.set_param()
113+
model.decision_function(measure=measure)
114+
scores = model.decision_scores_
115+
116+
#from ptsa.utils.metrics import metricor
117+
#grader = metricor()
118+
#preds = grader.scale(scores, 0.1)
119+
120+
print(f"Input size: {len(data)}\nOutput size: {len(scores)}")
121+
print("ARIMA result:", scores)
122+
123+
print(f"Writing results to {score_filename}")
124+
np.savetxt(score_filename, scores, delimiter=",")
125+
126+
127+
if __name__ == "__main__":
128+
main()

arima/requirements.txt

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
combo
2+
joblib
3+
matplotlib
4+
numpy>=1.13
5+
numba>=0.35
6+
pandas>=0.25
7+
scipy>=0.19.1
8+
scikit_learn>=0.19.1
9+
six
10+
statsmodels
11+
suod
12+
pmdarima
13+
arch
14+
tsfresh
15+
hurst

0 commit comments

Comments
 (0)