From 61e866b6878c21aa3e64247c5605c5aa5cf66e8e Mon Sep 17 00:00:00 2001 From: vc1492a Date: Fri, 31 Jul 2020 14:45:09 -0700 Subject: [PATCH 1/8] master to main --- readme.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/readme.md b/readme.md index 014861d..7035de3 100644 --- a/readme.md +++ b/readme.md @@ -7,8 +7,8 @@ scores in the range of [0,1] that are directly interpretable as the probability [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) [![PyPi](https://img.shields.io/badge/pypi-0.3.3-blue.svg)](https://pypi.python.org/pypi/PyNomaly/0.3.3) ![](https://img.shields.io/pypi/dm/PyNomaly.svg?logoColor=blue) -[![Build Status](https://travis-ci.org/vc1492a/PyNomaly.svg?branch=master)](https://travis-ci.org/vc1492a/PyNomaly) -[![Coverage Status](https://coveralls.io/repos/github/vc1492a/PyNomaly/badge.svg?branch=master)](https://coveralls.io/github/vc1492a/PyNomaly?branch=master) +[![Build Status](https://travis-ci.org/vc1492a/PyNomaly.svg?branch=main)](https://travis-ci.org/vc1492a/PyNomaly) +[![Coverage Status](https://coveralls.io/repos/github/vc1492a/PyNomaly/badge.svg?branch=main)](https://coveralls.io/github/vc1492a/PyNomaly?branch=main) [![JOSS](http://joss.theoj.org/papers/f4d2cfe680768526da7c1f6a2c103266/status.svg)](http://joss.theoj.org/papers/f4d2cfe680768526da7c1f6a2c103266) The outlier score of each sample is called the Local Outlier Probability. @@ -391,10 +391,10 @@ any changes to a branch which corresponds to an open issue. Hot fixes and bug fixes can be represented by branches with the prefix `fix/` versus `feature/` for new capabilities or code improvements. Pull requests will then be made from these branches into the repository's `dev` branch -prior to being pulled into `master`. Pull requests which are works in +prior to being pulled into `main`. Pull requests which are works in progress or ready for merging should be indicated by their respective prefixes ([WIP] and [MRG]). Pull requests with the [MRG] prefix will be -reviewed prior to being pulled into the `master` branch. +reviewed prior to being pulled into the `main` branch. ### Tests When contributing, please ensure to run unit tests and add additional tests as From 6f5077e57850f1814652860932aea9a82765b7c8 Mon Sep 17 00:00:00 2001 From: Valentino Constantinou Date: Tue, 18 Aug 2020 10:05:47 -0700 Subject: [PATCH 2/8] Fix image links in readme.md --- readme.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/readme.md b/readme.md index 7035de3..4786755 100644 --- a/readme.md +++ b/readme.md @@ -224,13 +224,13 @@ plt.close() Your results should look like the following: **LoOP Scores without Clustering** -![LoOP Scores without Clustering](https://github.com/vc1492a/PyNomaly/blob/master/images/scores.png) +![LoOP Scores without Clustering](https://github.com/vc1492a/PyNomaly/blob/main/images/scores.png) **LoOP Scores with Clustering** -![LoOP Scores with Clustering](https://github.com/vc1492a/PyNomaly/blob/master/images/scores_clust.png) +![LoOP Scores with Clustering](https://github.com/vc1492a/PyNomaly/blob/main/images/scores_clust.png) **DBSCAN Cluster Assignments** -![DBSCAN Cluster Assignments](https://github.com/vc1492a/PyNomaly/blob/master/images/cluster_assignments.png) +![DBSCAN Cluster Assignments](https://github.com/vc1492a/PyNomaly/blob/main/images/cluster_assignments.png) Note the differences between using LocalOutlierProbability with and without clustering. In the example without clustering, samples are @@ -302,7 +302,7 @@ scores = m.local_outlier_probabilities The below visualization shows the results by a few known distance metrics: **LoOP Scores by Distance Metric** -![DBSCAN Cluster Assignments](https://github.com/vc1492a/PyNomaly/blob/master/images/scores_by_distance_metric.png) +![DBSCAN Cluster Assignments](https://github.com/vc1492a/PyNomaly/blob/main/images/scores_by_distance_metric.png) ## Streaming Data @@ -373,7 +373,7 @@ plt.close() ``` **LoOP Scores using Stream Approach with n=10** -![LoOP Scores using Stream Approach with n=10](https://github.com/vc1492a/PyNomaly/blob/master/images/scores_stream.png) +![LoOP Scores using Stream Approach with n=10](https://github.com/vc1492a/PyNomaly/blob/main/images/scores_stream.png) ### Notes When calculating the LoOP score of incoming data, the original fitted scores are not updated. From 6da6c29614cdae39474b674456599fb758904946 Mon Sep 17 00:00:00 2001 From: Valentino Constantinou Date: Thu, 28 Sep 2023 13:42:25 -0700 Subject: [PATCH 3/8] Update setup.py Fix typo --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 9b9efc6..b66544a 100644 --- a/setup.py +++ b/setup.py @@ -3,14 +3,14 @@ setup( name='PyNomaly', packages=['PyNomaly'], - version='0.3.4', + version='0.3.3', description='A Python 3 implementation of LoOP: Local Outlier ' 'Probabilities, a local density based outlier detection ' 'method providing an outlier score in the range of [0,1].', author='Valentino Constantinou', author_email='vc@valentino.io', url='https://github.com/vc1492a/PyNomaly', - download_url='https://github.com/vc1492a/PyNomaly/archive/0.3.4.tar.gz', + download_url='https://github.com/vc1492a/PyNomaly/archive/0.3.3.tar.gz', keywords=['outlier', 'anomaly', 'detection', 'machine', 'learning', 'probability'], classifiers=[], From 4e35a37b555bea5f46fbb7b7cd2b165341640590 Mon Sep 17 00:00:00 2001 From: Valentino Constantinou Date: Thu, 28 Sep 2023 13:42:47 -0700 Subject: [PATCH 4/8] Update readme.md Fix typo --- readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readme.md b/readme.md index 7abfb42..1a23f85 100644 --- a/readme.md +++ b/readme.md @@ -5,7 +5,7 @@ LoOP is a local density based outlier detection method by Kriegel, Kröger, Schu scores in the range of [0,1] that are directly interpretable as the probability of a sample being an outlier. [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) -[![PyPi](https://img.shields.io/badge/pypi-0.3.4-blue.svg)](https://pypi.python.org/pypi/PyNomaly/0.3.4) +[![PyPi](https://img.shields.io/badge/pypi-0.3.3-blue.svg)](https://pypi.python.org/pypi/PyNomaly/0.3.3) ![](https://img.shields.io/pypi/dm/PyNomaly.svg?logoColor=blue) [![Build Status](https://travis-ci.org/vc1492a/PyNomaly.svg?branch=main)](https://travis-ci.org/vc1492a/PyNomaly) [![Coverage Status](https://coveralls.io/repos/github/vc1492a/PyNomaly/badge.svg?branch=main)](https://coveralls.io/github/vc1492a/PyNomaly?branch=main) From 19986fb040346c8ea4dc5d3c5656d01fca9b7dd0 Mon Sep 17 00:00:00 2001 From: Valentino Constantinou Date: Thu, 28 Sep 2023 13:43:05 -0700 Subject: [PATCH 5/8] Update loop.py Fix typo --- PyNomaly/loop.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PyNomaly/loop.py b/PyNomaly/loop.py index a0c341e..8746dfb 100644 --- a/PyNomaly/loop.py +++ b/PyNomaly/loop.py @@ -11,7 +11,7 @@ pass __author__ = 'Valentino Constantinou' -__version__ = '0.3.4' +__version__ = '0.3.3' __license__ = 'Apache License, Version 2.0' From 887857243d1b4ac44c709ee1774af6b79a96c3d9 Mon Sep 17 00:00:00 2001 From: Valentino Constantinou Date: Sun, 1 Oct 2023 19:38:27 -0700 Subject: [PATCH 6/8] Update changelog.md --- changelog.md | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/changelog.md b/changelog.md index ec6bef6..4204181 100644 --- a/changelog.md +++ b/changelog.md @@ -4,15 +4,6 @@ All notable changes to PyNomaly will be documented in this Changelog. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). -## 0.3.4 -### Changed -- Unit tests from using the `sklearn.utils.testing` submodule -to standard Python assertions, as the submodule will be changed -to private functions after scikit-learn version 0.24. -- Logging statements or warnings when testing with numba disabled or -enabled (respectively) to reflect the effect of numba just-in-time -compilation on code coverage statistics. - ## 0.3.3 ### Changed - The implementation of the progress bar to support use when the number of @@ -226,4 +217,4 @@ in computing the neighborhood distance for each observation. ### Added - readme.md file documenting methodology, package dependencies, use cases, how to contribute, and acknowledgements. -- Initial open release of PyNomaly codebase on Github. \ No newline at end of file +- Initial open release of PyNomaly codebase on Github. From f0ff1ddab83940ee808f94e911398c65547e2b2e Mon Sep 17 00:00:00 2001 From: IroNEDR Date: Thu, 11 Apr 2024 19:32:33 +0200 Subject: [PATCH 7/8] extended gitignore --- .gitignore | 168 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 162 insertions(+), 6 deletions(-) diff --git a/.gitignore b/.gitignore index cc65586..b80b260 100644 --- a/.gitignore +++ b/.gitignore @@ -6,12 +6,168 @@ nasaValve rel_research PyNomaly/loop_dev.py /PyNomaly.egg-info/ -.pytest_cache -build -htmlcov/ -*.egg *.pyc -.coverage *.coverage.* .coveragerc -venv/ \ No newline at end of file + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + From e24cc980860360b350b2efc5040dd28ff7d68afe Mon Sep 17 00:00:00 2001 From: IroNEDR Date: Thu, 11 Apr 2024 23:15:59 +0200 Subject: [PATCH 8/8] added regression test and test data via fixture --- tests/test_loop.py | 82 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 58 insertions(+), 24 deletions(-) diff --git a/tests/test_loop.py b/tests/test_loop.py index 9e5850f..b7a880b 100644 --- a/tests/test_loop.py +++ b/tests/test_loop.py @@ -51,6 +51,28 @@ def X_n8() -> np.ndarray: return X +@pytest.fixture() +def X_n20_scores() -> tuple[np.ndarray, np.ndarray]: + """ + Fixture that returns a tuple containing a 20 element numpy array + and the precalculated loOP scores based on that array. + :return: tuple(input_data,exptected_scores) + """ + input_data = np.array([0.02059752, 0.32629926, 0.63036653, 0.94409321, + 0.63251097, 0.47598494, 0.80204026, 0.34845067, + 0.81556468, 0.89183, 0.25210317, 0.11460502, + 0.19953434, 0.36955067, 0.06038041, 0.34527368, + 0.56621582, 0.90533649, 0.33773613, 0.71573306]) + + expected_scores = np.array([0.6356276742921594, 0.0, 0.0, + 0.48490790006974044, 0.0, 0.0, 0.0, 0.0, + 0.021728288376168012, 0.28285086151683225, + 0.0, 0.18881886507113213, 0.0, 0.0, + 0.45350246469681843, 0.0, 0.07886635748113013, + 0.3349068501560546, 0.0, 0.0]) + return (input_data, expected_scores) + + @pytest.fixture() def X_n120() -> np.ndarray: """ @@ -122,6 +144,18 @@ def test_loop(X_n8) -> None: assert np.min(score[-2:]) > np.max(score[:-2]) +def test_regression(X_n20_scores) -> None: + """ + Tests for potential regression errors by comparing current results + to the exptected results. Any changes to the code should still return + the same result given the same dataset + """ + input_data, expected_scores = X_n20_scores + clf = loop.LocalOutlierProbability(input_data).fit() + scores = clf.local_outlier_probabilities + assert np.array_equal(scores, expected_scores) + + def test_loop_performance(X_n120) -> None: """ Using a set of known anomalies (labels), tests the performance (using @@ -170,7 +204,7 @@ def test_input_nodata(X_n140_outliers) -> None: assert len(record) == 1 # check that the message matches assert record[0].message.args[ - 0] == "Data or a distance matrix must be provided." + 0] == "Data or a distance matrix must be provided." def test_input_incorrect_type(X_n140_outliers) -> None: @@ -192,8 +226,8 @@ def test_input_incorrect_type(X_n140_outliers) -> None: assert len(record) == 1 # check that the message matches assert record[0].message.args[ - 0] == "Argument 'n_neighbors' is not of type (, " \ - ")." + 0] == "Argument 'n_neighbors' is not of type (, " \ + ")." def test_input_neighbor_zero(X_n120) -> None: @@ -213,7 +247,7 @@ def test_input_neighbor_zero(X_n120) -> None: assert len(record) == 1 # check that the message matches assert record[0].message.args[ - 0] == "n_neighbors must be greater than 0. Fit with 10 instead." + 0] == "n_neighbors must be greater than 0. Fit with 10 instead." def test_input_distonly(X_n120) -> None: @@ -236,8 +270,8 @@ def test_input_distonly(X_n120) -> None: assert len(record) == 1 # check that the message matches assert record[0].message.args[ - 0] == "A neighbor index matrix and distance matrix must both " \ - "be provided when not using raw input data." + 0] == "A neighbor index matrix and distance matrix must both " \ + "be provided when not using raw input data." def test_input_neighboronly(X_n120) -> None: @@ -260,7 +294,7 @@ def test_input_neighboronly(X_n120) -> None: assert len(record) == 1 # check that the message matches assert record[0].message.args[ - 0] == "Data or a distance matrix must be provided." + 0] == "Data or a distance matrix must be provided." def test_input_too_many(X_n120) -> None: @@ -284,8 +318,8 @@ def test_input_too_many(X_n120) -> None: assert len(record) == 1 # check that the message matches assert record[0].message.args[ - 0] == "Only one of the following may be provided: data or a " \ - "distance matrix (not both)." + 0] == "Only one of the following may be provided: data or a " \ + "distance matrix (not both)." def test_distance_neighbor_shape_mismatch(X_n120) -> None: @@ -318,8 +352,8 @@ def test_distance_neighbor_shape_mismatch(X_n120) -> None: assert len(record) == 1 # check that the message matches assert record[0].message.args[ - 0] == "The shape of the distance and neighbor " \ - "index matrices must match." + 0] == "The shape of the distance and neighbor " \ + "index matrices must match." def test_input_neighbor_mismatch(X_n120) -> None: @@ -345,10 +379,10 @@ def test_input_neighbor_mismatch(X_n120) -> None: assert len(record) == 1 # check that the message matches assert record[0].message.args[ - 0] == "The shape of the distance or " \ - "neighbor index matrix does not " \ - "match the number of neighbors " \ - "specified." + 0] == "The shape of the distance or " \ + "neighbor index matrix does not " \ + "match the number of neighbors " \ + "specified." def test_loop_dist_matrix(X_n120) -> None: @@ -509,13 +543,13 @@ def test_missing_values() -> None: assert len(record_b) == 1 # check that the message matches assert record_b[0].message.args[ - 0] == "Method does not support missing values in input data." + 0] == "Method does not support missing values in input data." def test_small_cluster_size(X_n140_outliers) -> None: """ - Test to ensure that the program exits when the specified number of neighbors - is larger than the smallest cluster size in the input data. + Test to ensure that the program exits when the specified number of + neighbors is larger than the smallest cluster size in the input data. :param X_n140_outliers: A pytest Fixture that generates 140 observations. :return: None """ @@ -541,10 +575,10 @@ def test_small_cluster_size(X_n140_outliers) -> None: assert len(record_b) == 1 # check that the message matches assert record_b[0].message.args[ - 0] == "Number of neighbors specified larger than smallest " \ - "cluster. Specify a number of neighbors smaller than " \ - "the smallest cluster size (observations in smallest " \ - "cluster minus one)." + 0] == "Number of neighbors specified larger than smallest " \ + "cluster. Specify a number of neighbors smaller than " \ + "the smallest cluster size (observations in smallest " \ + "cluster minus one)." def test_stream_fit(X_n140_outliers) -> None: @@ -634,8 +668,8 @@ def test_stream_cluster(X_n140_outliers) -> None: assert len(record) == 1 # check that the message matches assert record[0].message.args[ - 0] == "Stream approach does not support clustered data. " \ - "Automatically refit using single cluster of points." + 0] == "Stream approach does not support clustered data. " \ + "Automatically refit using single cluster of points." def test_stream_performance(X_n140_outliers) -> None: