From 8002b3e02e9b692c671c9b7015896088ea95d01e Mon Sep 17 00:00:00 2001 From: Serhii Koropets Date: Fri, 19 Feb 2021 18:41:57 +0200 Subject: [PATCH 1/7] GordoTimeseriesGenerator initial commit --- gordo/data_frame_mapper/__init__.py | 49 ++++ gordo/machine/model/models.py | 260 ++++++++++++++++-- gordo/machine/validators.py | 3 + gordo/serializer/__init__.py | 2 + gordo/serializer/from_definition.py | 33 ++- requirements/full_requirements.txt | 184 ++++++------- requirements/requirements.in | 2 + .../model/test_gordo_timeseries_generator.py | 165 +++++++++++ tests/gordo/machine/model/test_model.py | 3 +- 9 files changed, 577 insertions(+), 124 deletions(-) create mode 100644 gordo/data_frame_mapper/__init__.py create mode 100644 tests/gordo/machine/model/test_gordo_timeseries_generator.py diff --git a/gordo/data_frame_mapper/__init__.py b/gordo/data_frame_mapper/__init__.py new file mode 100644 index 000000000..c6b512840 --- /dev/null +++ b/gordo/data_frame_mapper/__init__.py @@ -0,0 +1,49 @@ +import logging +import sklearn_pandas + +from copy import copy +from sklearn.base import BaseEstimator +from typing import List, Union + +logger = logging.getLogger(__name__) + + +class DataFrameMapper(sklearn_pandas.DataFrameMapper): + _default_kwargs = {"df_out": True} + + def __init__( + self, + columns: List[Union[str, List[str]]], + transformers: List[BaseEstimator] = None, + **kwargs + ): + self.columns = columns + self.transformers = transformers + features = self._build_features(columns, transformers) + base_kwargs = copy(self._default_kwargs) + base_kwargs.update(kwargs) + super().__init__(features=features, **base_kwargs) + + @staticmethod + def _build_features( + columns: List[Union[str, List[str]]], transformers: List[BaseEstimator], + ): + features = [] + for column in columns: + features.append((column, transformers)) + return features + + def __getstate__(self): + state = super().__getstate__() + state["columns"] = self.columns + state["transformers"] = self.transformers + del state["features"] + return state + + def __setstate__(self, state): + features = self._build_features(state.get("columns"), state.get("transformers")) + state["features"] = features + super().__setstate__(state) + + +__all__ = ['DataFrameMapper'] diff --git a/gordo/machine/model/models.py b/gordo/machine/model/models.py index 117fec2c3..db21035c2 100644 --- a/gordo/machine/model/models.py +++ b/gordo/machine/model/models.py @@ -9,12 +9,15 @@ from abc import ABCMeta from copy import copy, deepcopy from importlib.util import find_spec +from dataclasses import dataclass +from copy import copy import h5py import tensorflow.keras.models from tensorflow.keras.models import load_model, save_model from tensorflow.keras.preprocessing.sequence import pad_sequences, TimeseriesGenerator from tensorflow.keras.wrappers.scikit_learn import KerasRegressor as BaseWrapper +from tensorflow.python.keras.utils import data_utils import numpy as np import pandas as pd import xarray as xr @@ -472,6 +475,7 @@ def __init__( kind: Union[Callable, str], lookback_window: int = 1, batch_size: int = 32, + timeseries_generator: Optional[Dict[str, Any]] = None, **kwargs, ) -> None: """ @@ -503,6 +507,7 @@ def __init__( kwargs["lookback_window"] = lookback_window kwargs["kind"] = kind kwargs["batch_size"] = batch_size + kwargs["timeseries_generator"] = timeseries_generator # fit_generator_params is a set of strings with the keyword arguments of # Keras fit_generator method (excluding "shuffle" as this will be hardcoded). @@ -533,6 +538,10 @@ def lookahead(self) -> int: """Steps ahead in y the model should target""" ... + @property + def timeseries_generator(self): + return self.kwargs.get("timeseries_generator", None) + def get_metadata(self): """ Add number of forecast steps to metadata @@ -580,10 +589,10 @@ def fit(self, X: np.ndarray, y: np.ndarray, **kwargs) -> "KerasLSTMForecast": """ - X = X.values if isinstance(X, pd.DataFrame) else X - y = y.values if isinstance(y, pd.DataFrame) else y - - X = self._validate_and_fix_size_of_X(X) + if not isinstance(X, pd.DataFrame): + X = self._validate_and_fix_size_of_X(X) + else: + pass # TODO # We call super.fit on a single sample (notice the batch_size=1) to initiate the # model using the scikit-learn wrapper. @@ -595,6 +604,7 @@ def fit(self, X: np.ndarray, y: np.ndarray, **kwargs) -> "KerasLSTMForecast": batch_size=1, lookback_window=self.lookback_window, lookahead=self.lookahead, + config=self.timeseries_generator, ) primer_x, primer_y = tsg[0] @@ -607,6 +617,7 @@ def fit(self, X: np.ndarray, y: np.ndarray, **kwargs) -> "KerasLSTMForecast": batch_size=self.batch_size, lookback_window=self.lookback_window, lookahead=self.lookahead, + config=self.timeseries_generator, ) gen_kwargs = { @@ -655,15 +666,18 @@ def predict(self, X: np.ndarray, **kwargs) -> np.ndarray: >>> model_transform.shape (2, 2) """ - X = X.values if isinstance(X, pd.DataFrame) else X + if not isinstance(X, pd.DataFrame): + X = self._validate_and_fix_size_of_X(X) + else: + pass # TODO - X = self._validate_and_fix_size_of_X(X) tsg = create_keras_timeseriesgenerator( X=X, y=X, batch_size=10000, lookback_window=self.lookback_window, lookahead=self.lookahead, + config=self.timeseries_generator, ) return self.model.predict_generator(tsg) @@ -715,13 +729,35 @@ def lookahead(self) -> int: return 0 +def pad_x_and_y( + X: np.ndarray, y: np.ndarray, lookahead: int +) -> Tuple[np.ndarray, np.ndarray]: + new_length = len(X) + 1 - lookahead + if lookahead == 1: + return X, y + elif lookahead >= 0: + pad_kw = dict(maxlen=new_length, dtype=X.dtype) + + if lookahead == 0: + X = pad_sequences([X], padding="post", **pad_kw)[0] + y = pad_sequences([y], padding="pre", **pad_kw)[0] + + elif lookahead > 1: + X = pad_sequences([X], padding="post", truncating="post", **pad_kw)[0] + y = pad_sequences([y], padding="pre", truncating="pre", **pad_kw)[0] + return X, y + else: + raise ValueError(f"Value of `lookahead` can not be negative, is {lookahead}") + + def create_keras_timeseriesgenerator( - X: np.ndarray, - y: Optional[np.ndarray], + X: Union[pd.DataFrame, np.ndarray], + y: Optional[Union[pd.DataFrame, np.ndarray]], batch_size: int, lookback_window: int, lookahead: int, -) -> tensorflow.keras.preprocessing.sequence.TimeseriesGenerator: + config: Optional[Dict[str, Any]] = None, +) -> TimeseriesGenerator: """ Provides a `keras.preprocessing.sequence.TimeseriesGenerator` for use with LSTM's, but with the added ability to specify the lookahead of the target in y. @@ -773,27 +809,195 @@ def create_keras_timeseriesgenerator( >>> len(gen[0][0][0][0]) # n_features = 2 2 """ - new_length = len(X) + 1 - lookahead - kwargs: Dict[str, Any] = dict(length=lookback_window, batch_size=batch_size) - if lookahead == 1: - kwargs.update(dict(data=X, targets=y)) + return timeseries_generators.create_from_config( + config, + data=X, + targets=y, + length=lookback_window, + batch_size=batch_size, + lookahead=lookahead, + ) + + +class TimeseriesGeneratorTypes: + def __init__(self, default_type): + self.default_type = default_type + self._types = {} + + def create_from_config(self, config, **kwargs): + if config is None: + return self.default_type(**kwargs) + else: + if "type" not in config: + raise ValueError( + 'Unspecified "type" attribute for "timeseries_generator"' + ) + type_name = config["type"] + if type_name not in self._types: + raise ValueError( + f'Unknown type "{type_name}" for "timeseries_generator"' + ) + all_kwargs = copy(config) + all_kwargs.pop("type") + all_kwargs.update(kwargs) + return self._types[type_name](**all_kwargs) + + def __call__(self, type_name): + def wrap(cls): + if type_name in self._types: + raise ValueError( + f'TimeseriesGenerator type with name "{type_name}" already exists' + ) + self._types[type_name] = cls + return cls - elif lookahead >= 0: + return wrap - pad_kw = dict(maxlen=new_length, dtype=X.dtype) - if lookahead == 0: - kwargs["data"] = pad_sequences([X], padding="post", **pad_kw)[0] - kwargs["targets"] = pad_sequences([y], padding="pre", **pad_kw)[0] +class DefaultTimeseriesGenerator(TimeseriesGenerator): + def __init__( + self, + data: Union[pd.DataFrame, np.ndarray], + targets: Union[pd.DataFrame, np.ndarray], + lookahead: int = 1, + **kwargs, + ): + if isinstance(data, pd.DataFrame): + data = data.values + if isinstance(targets, pd.DataFrame): + targets = targets.values + data, targets = pad_x_and_y(data, targets, lookahead) + super().__init__(data=data, targets=targets, **kwargs) - elif lookahead > 1: - kwargs["data"] = pad_sequences( - [X], padding="post", truncating="post", **pad_kw - )[0] - kwargs["targets"] = pad_sequences( - [y], padding="pre", truncating="pre", **pad_kw - )[0] - else: - raise ValueError(f"Value of `lookahead` can not be negative, is {lookahead}") - return TimeseriesGenerator(**kwargs) +timeseries_generators = TimeseriesGeneratorTypes( + default_type=DefaultTimeseriesGenerator +) + + +@dataclass +class TimeseriesChunk: + start_ts: pd.Timestamp + end_ts: pd.Timestamp + size: int + + +@dataclass +class TimeseriesGeneratorContainer: + generator: TimeseriesGenerator + chunk: TimeseriesChunk + length: int + + +@timeseries_generators("GordoTimeseriesGenerator") +class GordoTimeseriesGenerator(data_utils.Sequence): + def __init__( + self, + data: Union[pd.DataFrame, np.ndarray], + targets: Union[pd.DataFrame, np.ndarray], + length: int, + batch_size: int = 128, + shuffle: bool = False, + step: Union[pd.Timedelta, str] = "10min", + lookahead: int = 1, + ): + if not isinstance(data, pd.DataFrame): + raise ValueError("Data have to be instance of pandas.DataFrame") + if not isinstance(targets, pd.DataFrame): + raise ValueError("Targets have to be instance of pandas.DataFrame") + if len(data) != len(targets): + raise ValueError( + "Data and targets have to be of same length. " + f"Data length is {len(data)}" + f" while target length is {len(targets)}" + ) + + if isinstance(step, str): + step = pd.to_timedelta(step) + self.step = step + self.consecutive_chunks = self.find_consecutive_chunks(data) + logger.debug( + "GordoTimeseriesGenerator with consecutive_chunks=%s", + self.consecutive_chunks, + ) + self.failed_chunks: List[TimeseriesChunk] = [] + self.generators_containers = self.create_generator_containers( + data, targets, length=length, batch_size=batch_size, shuffle=shuffle + ) + logger.debug( + "GordoTimeseriesGenerator with generators_containers=%s", + self.generators_containers, + ) + if not self.generators_containers: + raise ValueError( + "Seems like the time series are too small or in random order." + "Failed chunks: %s" % self.consecutive_chunks + ) + # TODO use lookahead + self.lookahead = lookahead + + def filter_chunks(self, indexes=None): + if indexes is not None: + self.generators_containers = [ + self.generators_containers[i] for i in indexes + ] + + def __len__(self): + return sum(container.length for container in self.generators_containers) + + def find_consecutive_chunks(self, df: pd.DataFrame) -> List[TimeseriesChunk]: + chunks = [] + prev_ts, start_ts, start_i = None, None, 0 + for i, dt in enumerate(df.index): + if prev_ts is None: + prev_ts = dt + start_ts = dt + else: + if dt - prev_ts == self.step: + prev_ts = dt + else: + chunks.append(TimeseriesChunk(start_ts, prev_ts, i - start_i)) + prev_ts, start_ts = None, None + start_i = i + if start_ts is not None: + chunks.append(TimeseriesChunk(start_ts, prev_ts, len(df.index) - start_i)) + return chunks + + def create_generator_containers( + self, + data: pd.DataFrame, + targets: pd.DataFrame, + length: int, + batch_size: int, + shuffle: bool, + ) -> List[TimeseriesGeneratorContainer]: + generator_containers = [] + for chunk in self.consecutive_chunks: + gen_data = data[chunk.start_ts : chunk.end_ts].values + gen_target = targets[chunk.start_ts : chunk.end_ts].values + try: + generator = TimeseriesGenerator( + gen_data, + gen_target, + length=length, + batch_size=batch_size, + shuffle=shuffle, + ) + except ValueError: + self.failed_chunks.append(chunk) + else: + length = len(generator) + generator_containers.append( + TimeseriesGeneratorContainer(generator, chunk, length) + ) + return generator_containers + + def __getitem__(self, index): + i = -1 + for container in self.generators_containers: + new_i = i + container.length + if index <= new_i: + gen_i = index - i - 1 + return container.generator[gen_i] + i = new_i + raise IndexError(index) diff --git a/gordo/machine/validators.py b/gordo/machine/validators.py index ef6d8a223..7ef248662 100644 --- a/gordo/machine/validators.py +++ b/gordo/machine/validators.py @@ -14,6 +14,8 @@ logger = logging.getLogger(__name__) +logger.debug("from_definition1=%s", from_definition) + class BaseDescriptor: """ @@ -85,6 +87,7 @@ class ValidModel(BaseDescriptor): def __set__(self, instance, value): if getattr(instance, "_strict", True): try: + logger.debug("from_definition=%s", from_definition) from_definition(value) except Exception as e: raise ValueError(f"Pipeline from definition failed: {e}") diff --git a/gordo/serializer/__init__.py b/gordo/serializer/__init__.py index 66285bf10..7e0dd823e 100644 --- a/gordo/serializer/__init__.py +++ b/gordo/serializer/__init__.py @@ -1,3 +1,5 @@ from .from_definition import from_definition, load_params_from_definition from .into_definition import into_definition, load_definition_from_params from .serializer import dump, dumps, load, loads, load_metadata + +__all__=['from_definition', 'into_definition', 'dump', 'dumps', 'load', 'loads', 'load_metadata'] diff --git a/gordo/serializer/from_definition.py b/gordo/serializer/from_definition.py index 7a6898c3d..59581ead6 100644 --- a/gordo/serializer/from_definition.py +++ b/gordo/serializer/from_definition.py @@ -4,11 +4,13 @@ import pydoc import copy import typing # noqa -from typing import Union, Dict, Any, Iterable +from typing import Union, Dict, Any, Iterable, Type, Optional from sklearn.pipeline import Pipeline, FeatureUnion from sklearn.base import BaseEstimator from tensorflow.keras.models import Sequential +from gordo.data_frame_mapper import DataFrameMapper + logger = logging.getLogger(__name__) @@ -66,7 +68,7 @@ def from_definition( def _build_branch( definition: Iterable[Union[str, Dict[Any, Any]]], - constructor_class=Union[Pipeline, None], + constructor_class: Optional[Type[Pipeline]] = None, ): """ Builds a branch of the tree and optionally constructs the class with the given @@ -177,6 +179,11 @@ def _build_step( f"Got {StepClass} but the supplied parameters" f"seem invalid: {params}" ) + + if issubclass(StepClass, DataFrameMapper): + params = _load_data_mapper_params(params) + + logger.debug("StopClass(%s)", params) return StepClass(**params) # If step is just a string, can initialize it without any params @@ -217,6 +224,16 @@ def _build_callbacks(definitions: list): return callbacks +def _load_data_mapper_params(params: dict): + if "transformers" in params: + classes = copy.deepcopy(params["transformers"]) + if not isinstance(classes, list): + raise TypeError('"transformers" should be a list') + logger.debug("transformers=%s", classes) + params["transformers"] = _build_branch(classes) + return params + + def _load_param_classes(params: dict): """ Inspect the params' values and determine if any can be loaded as a class. @@ -255,6 +272,7 @@ def _load_param_classes(params: dict): objects """ params = copy.copy(params) + logger.debug("_load_param_classes=%s", params) for key, value in params.items(): # If value is a simple string, try to load the model/class @@ -289,7 +307,16 @@ def _load_param_classes(params: dict): params[key] = from_definition(value) else: # Call this func again, incase there is nested occurances of this problem in these kwargs - kwargs = _load_param_classes(sub_params) + sub_params = value[list(value.keys())[0]] + + if issubclass(Model, DataFrameMapper): + kwargs = _load_data_mapper_params(sub_params) + logger.debug( + "_load_data_mapper_params(%s)=%s", sub_params, kwargs + ) + else: + kwargs = _load_param_classes(sub_params) + params[key] = Model(**kwargs) # type: ignore elif key == "callbacks" and isinstance(value, list): params[key] = _build_callbacks(value) diff --git a/requirements/full_requirements.txt b/requirements/full_requirements.txt index e311916a8..366cbd61d 100644 --- a/requirements/full_requirements.txt +++ b/requirements/full_requirements.txt @@ -4,147 +4,147 @@ # # pip-compile --output-file=full_requirements.txt mlflow_requirements.in postgres_requirements.in requirements.in # -absl-py==0.9.0 # via tensorboard, tensorflow -adal==1.2.2 # via azure-datalake-store, azureml-core, msrestazure -alembic==1.3.3 # via mlflow -aniso8601==8.0.0 # via flask-restplus +absl-py==0.11.0 # via tensorboard, tensorflow +adal==1.2.6 # via azure-datalake-store, azureml-core, msrestazure +alembic==1.4.1 # via mlflow +aniso8601==8.1.1 # via flask-restplus astor==0.8.1 # via tensorflow -attrs==19.3.0 # via jsonschema -azure-common==1.1.24 # via azure-graphrbac, azure-mgmt-authorization, azure-mgmt-containerregistry, azure-mgmt-keyvault, azure-mgmt-resource, azure-mgmt-storage, azureml-core -azure-core==1.8.1 # via azure-identity, azure-storage-blob, azure-storage-file-datalake +attrs==20.3.0 # via jsonschema +azure-common==1.1.26 # via azure-graphrbac, azure-mgmt-authorization, azure-mgmt-containerregistry, azure-mgmt-keyvault, azure-mgmt-resource, azure-mgmt-storage, azureml-core +azure-core==1.11.0 # via azure-identity, azure-storage-blob, azure-storage-file-datalake azure-datalake-store==0.0.51 # via gordo-dataset azure-graphrbac==0.61.1 # via azureml-core -azure-identity==1.4.0 # via -r requirements.in, gordo-dataset -azure-mgmt-authorization==0.60.0 # via azureml-core +azure-identity==1.4.1 # via -r requirements.in, gordo-dataset +azure-mgmt-authorization==0.61.0 # via azureml-core azure-mgmt-containerregistry==2.8.0 # via azureml-core -azure-mgmt-keyvault==2.0.0 # via azureml-core -azure-mgmt-resource==8.0.0 # via azureml-core -azure-mgmt-storage==7.1.0 # via azureml-core -azure-storage-blob==12.4.0 # via azure-storage-file-datalake -azure-storage-file-datalake==12.1.2 # via gordo-dataset -azureml-contrib-run==1.0.85 # via -r mlflow_requirements.in -azureml-core==1.0.85 # via azureml-mlflow -azureml-mlflow==1.0.85 # via azureml-contrib-run +azure-mgmt-keyvault==2.2.0 # via azureml-core +azure-mgmt-resource==12.0.0 # via azureml-core +azure-mgmt-storage==11.2.0 # via azureml-core +azure-storage-blob==12.7.1 # via azure-storage-file-datalake, mlflow +azure-storage-file-datalake==12.2.3 # via gordo-dataset +azureml-contrib-run==1.22.0 # via -r mlflow_requirements.in +azureml-core==1.22.0 # via azureml-mlflow +azureml-mlflow==1.22.0 # via azureml-contrib-run backports.tempfile==1.0 # via azureml-core backports.weakref==1.0.post1 # via backports.tempfile -cachetools==4.1.1 # via google-auth, gordo-dataset, gordo.client -catboost==0.20.2 # via -r requirements.in -cchardet==2.1.5 # via -r requirements.in -certifi==2019.11.28 # via msrest, requests -cffi==1.13.2 # via azure-datalake-store, cryptography -chardet==3.0.4 # via requests +cachetools==4.2.1 # via google-auth, gordo-dataset, gordo.client +catboost==0.24.4 # via -r requirements.in +cchardet==2.1.7 # via -r requirements.in +certifi==2020.12.5 # via msrest, requests +cffi==1.14.5 # via azure-datalake-store, cryptography +chardet==4.0.0 # via requests click==7.1.2 # via -r requirements.in, databricks-cli, flask, gordo.client, mlflow -cloudpickle==1.2.2 # via mlflow -configparser==4.0.2 # via databricks-cli +cloudpickle==1.6.0 # via mlflow contextlib2==0.6.0.post1 # via azureml-core -cryptography==3.3.1 # via adal, azure-identity, azure-storage-blob, azureml-core, gordo-dataset, pyjwt, pyopenssl, secretstorage +cryptography==3.4.6 # via adal, azure-identity, azure-storage-blob, azureml-core, gordo-dataset, msal, pyjwt, pyopenssl, secretstorage cycler==0.10.0 # via matplotlib -databricks-cli==0.9.1 # via mlflow -dataclasses-json==0.3.7 # via -r requirements.in +databricks-cli==0.14.1 # via mlflow +dataclasses-json==0.5.2 # via -r requirements.in dictdiffer==0.8.1 # via -r requirements.in -docker==4.1.0 # via azureml-core, mlflow +docker==4.4.3 # via azureml-core, mlflow entrypoints==0.3 # via mlflow flask-restplus==0.13.0 # via -r requirements.in -flask==1.1.1 # via -r requirements.in, flask-restplus, mlflow, prometheus-flask-exporter +flask==1.1.2 # via -r requirements.in, flask-restplus, mlflow, prometheus-flask-exporter gast==0.2.2 # via tensorflow -gitdb2==2.0.6 # via gitpython -gitpython==3.0.5 # via mlflow -google-auth-oauthlib==0.4.1 # via tensorboard -google-auth==1.10.1 # via google-auth-oauthlib, tensorboard -google-pasta==0.1.8 # via tensorflow -gordo-dataset==2.4.0 # via -r requirements.in, gordo.client +gitdb==4.0.5 # via gitpython +gitpython==3.1.13 # via mlflow +google-auth-oauthlib==0.4.2 # via tensorboard +google-auth==1.27.0 # via google-auth-oauthlib, tensorboard +google-pasta==0.2.0 # via tensorflow +gordo-dataset==2.4.1 # via -r requirements.in, gordo.client gordo.client==0.2.12 # via -r requirements.in -gorilla==0.3.0 # via mlflow -graphviz==0.13.2 # via catboost -grpcio==1.26.0 # via tensorboard, tensorflow +graphviz==0.16 # via catboost +grpcio==1.35.0 # via tensorboard, tensorflow gunicorn==20.0.4 # via -r requirements.in, mlflow h5py==2.10.0 # via -r requirements.in, keras-applications, tensorflow -idna==2.8 # via requests -importlib-metadata==1.4.0 # via jsonschema -influxdb==5.3.0 # via gordo-dataset, gordo.client +idna==2.10 # via requests +importlib-metadata==3.4.0 # via jsonpickle, jsonschema, markdown +influxdb==5.3.1 # via gordo-dataset, gordo.client isodate==0.6.0 # via msrest itsdangerous==1.1.0 # via flask jeepney==0.6.0 # via -r requirements.in, secretstorage -jinja2==2.10.3 # via -r requirements.in, flask -jmespath==0.9.4 # via azureml-core -joblib==0.14.1 # via scikit-learn -jsonpickle==1.2 # via azureml-core, azureml-mlflow +jinja2==2.11.3 # via -r requirements.in, flask +jmespath==0.10.0 # via azureml-core +joblib==1.0.1 # via scikit-learn +jsonpickle==2.0.0 # via azureml-core, azureml-mlflow jsonschema==3.2.0 # via flask-restplus keras-applications==1.0.8 # via tensorflow keras-preprocessing==1.1.0 # via tensorflow -kiwisolver==1.1.0 # via matplotlib -mako==1.1.1 # via alembic -markdown==3.1.1 # via tensorboard +kiwisolver==1.3.1 # via matplotlib +mako==1.1.4 # via alembic +markdown==3.3.3 # via tensorboard markupsafe==1.1.1 # via jinja2, mako marshmallow-enum==1.5.1 # via dataclasses-json -marshmallow==3.3.0 # via dataclasses-json, gordo-dataset, marshmallow-enum -matplotlib==3.1.2 # via catboost -mlflow==1.5.0 # via -r mlflow_requirements.in, azureml-mlflow -more-itertools==8.1.0 # via zipp +marshmallow==3.10.0 # via dataclasses-json, gordo-dataset, marshmallow-enum +matplotlib==3.3.4 # via catboost +mlflow==1.13.1 # via -r mlflow_requirements.in, azureml-mlflow msal-extensions==0.2.2 # via azure-identity -msal==1.5.0 # via azure-identity, msal-extensions -msgpack==0.6.1 # via influxdb -msrest==0.6.10 # via azure-graphrbac, azure-mgmt-authorization, azure-mgmt-containerregistry, azure-mgmt-keyvault, azure-mgmt-resource, azure-mgmt-storage, azure-storage-blob, azure-storage-file-datalake, azureml-core, msrestazure -msrestazure==0.6.2 # via azure-graphrbac, azure-mgmt-authorization, azure-mgmt-containerregistry, azure-mgmt-keyvault, azure-mgmt-resource, azure-mgmt-storage, azureml-core +msal==1.9.0 # via azure-identity, msal-extensions +msgpack==1.0.2 # via influxdb +msrest==0.6.21 # via azure-graphrbac, azure-mgmt-authorization, azure-mgmt-containerregistry, azure-mgmt-keyvault, azure-mgmt-resource, azure-mgmt-storage, azure-storage-blob, azure-storage-file-datalake, azureml-core, msrestazure +msrestazure==0.6.4 # via azure-graphrbac, azure-mgmt-authorization, azure-mgmt-containerregistry, azure-mgmt-keyvault, azure-mgmt-resource, azure-mgmt-storage, azureml-core mypy-extensions==0.4.3 # via typing-inspect ndg-httpsclient==0.5.1 # via azureml-core -numexpr==2.7.1 # via -r requirements.in, gordo-dataset -numpy==1.18.1 # via -r requirements.in, catboost, gordo.client, h5py, keras-applications, keras-preprocessing, matplotlib, mlflow, numexpr, opt-einsum, pandas, pyarrow, scikit-learn, scipy, tensorboard, tensorflow, xarray +numexpr==2.7.2 # via -r requirements.in, gordo-dataset +numpy==1.18.5 # via -r requirements.in, catboost, gordo.client, h5py, keras-applications, keras-preprocessing, matplotlib, mlflow, numexpr, opt-einsum, pandas, pyarrow, scikit-learn, scipy, sklearn-pandas, tensorboard, tensorflow, xarray oauthlib==3.1.0 # via requests-oauthlib -opt-einsum==3.1.0 # via tensorflow -packaging==20.7 # via -r requirements.in -pandas==1.1.4 # via -r requirements.in, catboost, gordo-dataset, gordo.client, mlflow, xarray -pathspec==0.7.0 # via azureml-core -peewee==3.13.1 # via -r postgres_requirements.in -plotly==4.4.1 # via catboost +opt-einsum==3.3.0 # via tensorflow +packaging==20.9 # via -r requirements.in +pandas==1.2.2 # via -r requirements.in, catboost, gordo-dataset, gordo.client, mlflow, sklearn-pandas, xarray +pathspec==0.8.1 # via azureml-core +peewee==3.14.1 # via -r postgres_requirements.in +pillow==8.1.0 # via matplotlib +plotly==4.14.3 # via catboost portalocker==1.7.1 # via msal-extensions prometheus-client==0.7.1 # via -r requirements.in, prometheus-flask-exporter -prometheus-flask-exporter==0.12.1 # via mlflow -protobuf==3.11.2 # via mlflow, tensorboard, tensorflow +prometheus-flask-exporter==0.18.1 # via mlflow +protobuf==3.14.0 # via mlflow, tensorboard, tensorflow psycopg2-binary==2.8.4 # via -r postgres_requirements.in pyarrow==0.17.1 # via gordo-dataset, gordo.client pyasn1-modules==0.2.8 # via google-auth pyasn1==0.4.8 # via ndg-httpsclient, pyasn1-modules, rsa -pycparser==2.19 # via cffi +pycparser==2.20 # via cffi pydantic==1.7.3 # via -r requirements.in, gordo.client -pyjwt[crypto]==1.7.1 # via adal, azureml-core, msal -pyopenssl==19.1.0 # via azureml-core, ndg-httpsclient -pyparsing==2.4.6 # via matplotlib, packaging -pyrsistent==0.15.7 # via jsonschema +pyjwt[crypto]==2.0.1 # via adal, azureml-core, msal +pyopenssl==20.0.1 # via azureml-core, ndg-httpsclient +pyparsing==2.4.7 # via matplotlib, packaging +pyrsistent==0.17.3 # via jsonschema python-dateutil==2.8.1 # via -r requirements.in, adal, alembic, azureml-core, influxdb, matplotlib, mlflow, pandas python-editor==1.0.4 # via alembic -pytz==2019.3 # via azureml-core, flask-restplus, influxdb, pandas +pytz==2021.1 # via azureml-core, flask-restplus, influxdb, pandas pyyaml==5.3.1 # via -r requirements.in, gordo-dataset, mlflow querystring-parser==1.2.4 # via mlflow requests-oauthlib==1.3.0 # via google-auth-oauthlib, msrest -requests==2.22.0 # via -r requirements.in, adal, azure-core, azure-datalake-store, azureml-core, databricks-cli, docker, gordo.client, influxdb, mlflow, msal, msrest, requests-oauthlib, tensorboard +requests==2.25.1 # via -r requirements.in, adal, azure-core, azure-datalake-store, azureml-core, databricks-cli, docker, gordo.client, influxdb, mlflow, msal, msrest, requests-oauthlib, tensorboard retrying==1.3.3 # via plotly -rsa==4.0 # via google-auth -ruamel.yaml==0.15.89 # via azureml-core -scikit-learn==0.23.2 # via -r requirements.in, gordo-dataset, gordo.client -scipy==1.4.1 # via catboost, scikit-learn -secretstorage==3.1.2 # via azureml-core -simplejson==3.17.2 # via -r requirements.in, gordo.client, mlflow -six==1.14.0 # via absl-py, azure-core, azure-identity, azureml-core, catboost, cryptography, cycler, databricks-cli, docker, flask-restplus, google-auth, google-pasta, grpcio, h5py, influxdb, isodate, jsonschema, keras-preprocessing, mlflow, plotly, protobuf, pyopenssl, pyrsistent, python-dateutil, querystring-parser, retrying, tensorboard, tensorflow, websocket-client -smmap2==2.0.5 # via gitdb2 -sqlalchemy==1.3.13 # via alembic, mlflow -sqlparse==0.3.0 # via mlflow +rsa==4.7.1 # via google-auth +ruamel.yaml.clib==0.2.2 # via ruamel.yaml +ruamel.yaml==0.16.12 # via azureml-core +scikit-learn==0.23.2 # via -r requirements.in, gordo-dataset, gordo.client, sklearn-pandas +scipy==1.6.1 # via catboost, scikit-learn, sklearn-pandas +secretstorage==3.3.1 # via azureml-core +simplejson==3.17.2 # via -r requirements.in, gordo.client +six==1.15.0 # via absl-py, azure-core, azure-identity, catboost, cycler, databricks-cli, docker, flask-restplus, google-auth, google-pasta, grpcio, h5py, influxdb, isodate, jsonschema, keras-preprocessing, mlflow, msrestazure, plotly, protobuf, pyopenssl, python-dateutil, querystring-parser, retrying, tensorboard, tensorflow, websocket-client +sklearn-pandas==1.8.0 # via -r requirements.in +smmap==3.0.5 # via gitdb +sqlalchemy==1.3.23 # via alembic, mlflow +sqlparse==0.4.1 # via mlflow stringcase==1.2.0 # via dataclasses-json -tabulate==0.8.6 # via databricks-cli -tensorboard==2.1.0 # via tensorflow +tabulate==0.8.8 # via databricks-cli +tensorboard==2.1.1 # via tensorflow tensorflow-estimator==2.1.0 # via tensorflow tensorflow==2.1.3 # via -r requirements.in termcolor==1.1.0 # via tensorflow threadpoolctl==2.1.0 # via scikit-learn -typing-extensions==3.7.4.1 # via -r requirements.in, gordo-dataset, typing-inspect -typing-inspect==0.5.0 # via dataclasses-json -urllib3==1.25.7 # via -r requirements.in, azureml-core, requests +typing-extensions==3.7.4.3 # via -r requirements.in, gordo-dataset, importlib-metadata, typing-inspect +typing-inspect==0.6.0 # via dataclasses-json +urllib3==1.26.3 # via -r requirements.in, azureml-core, requests websocket-client==0.57.0 # via docker werkzeug==0.16.1 # via -r requirements.in, flask, tensorboard -wheel==0.33.6 # via tensorboard, tensorflow -wrapt==1.11.2 # via gordo.client, tensorflow +wheel==0.36.2 # via tensorboard, tensorflow +wrapt==1.12.1 # via -r requirements.in, gordo.client, tensorflow xarray==0.16.2 # via gordo-dataset -zipp==2.0.0 # via importlib-metadata +zipp==3.4.0 # via importlib-metadata # The following packages are considered to be unsafe in a requirements file: # setuptools diff --git a/requirements/requirements.in b/requirements/requirements.in index 8229fcdd2..5ae6dd826 100644 --- a/requirements/requirements.in +++ b/requirements/requirements.in @@ -29,3 +29,5 @@ jeepney>=0.6 packaging~=20.7 pydantic~=1.7.3 gordo.client~=0.2.12 +wrapt~=1.11 +sklearn-pandas~=1.8.0 diff --git a/tests/gordo/machine/model/test_gordo_timeseries_generator.py b/tests/gordo/machine/model/test_gordo_timeseries_generator.py new file mode 100644 index 000000000..efd32ac64 --- /dev/null +++ b/tests/gordo/machine/model/test_gordo_timeseries_generator.py @@ -0,0 +1,165 @@ +import pytest + +import pandas as pd +from itertools import chain +from random import randrange +from itertools import count + +from gordo.machine.model.models import GordoTimeseriesGenerator, TimeseriesChunk + + +def get_test_datetimeindex(time_intervals, freq=None): + if freq is None: + freq = "H" + dti_iters = (pd.date_range(d, periods=p, freq=freq) for d, p in time_intervals) + return pd.DatetimeIndex(list(chain(*dti_iters))) + + +def random_gen(min_value=80, max_value=100): + def generate(values_count): + for v in range(values_count): + yield randrange(min_value, max_value) + + return generate + + +def range_gen(): + g = count() + + def generate(values_count): + ret_value = next(g) + for v in range(values_count): + yield ret_value + + return generate + + +def get_test_df(time_intervals, generator=None, freq=None, tags_count=3): + if generator is None: + generator = random_gen() + dti = get_test_datetimeindex(time_intervals, freq) + tag_names = ["tag%d" % v for v in range(tags_count)] + data = {k: [] for k in tag_names} + generate_count = len(dti) + for _ in range(generate_count): + for tag_name, value in zip(tag_names, generator(tags_count)): + data[tag_name].append(value) + return pd.DataFrame(data, index=dti).sort_index() + + +def test_find_consecutive_chunks(): + test1_time_intervals = ( + ("2018-01-01", 8), + ("2018-01-02", 45), + ("2018-01-04", 10), + ("2018-01-05", 30), + ("2018-02-03", 20), + ) + test1_df = get_test_df(test1_time_intervals) + gen = GordoTimeseriesGenerator(test1_df, test1_df, length=5, step="60min") + expected_chunks = [ + TimeseriesChunk( + start_ts=pd.Timestamp("2018-01-01 00:00:00"), + end_ts=pd.Timestamp("2018-01-01 07:00:00"), + size=8, + ), + TimeseriesChunk( + start_ts=pd.Timestamp("2018-01-02 01:00:00"), + end_ts=pd.Timestamp("2018-01-03 20:00:00"), + size=45, + ), + TimeseriesChunk( + start_ts=pd.Timestamp("2018-01-04 01:00:00"), + end_ts=pd.Timestamp("2018-01-04 09:00:00"), + size=10, + ), + TimeseriesChunk( + start_ts=pd.Timestamp("2018-01-05 01:00:00"), + end_ts=pd.Timestamp("2018-01-06 05:00:00"), + size=30, + ), + TimeseriesChunk( + start_ts=pd.Timestamp("2018-02-03 01:00:00"), + end_ts=pd.Timestamp("2018-02-03 19:00:00"), + size=20, + ), + ] + assert len(gen.consecutive_chunks) == len(expected_chunks) + for chunk, expected_chunk in zip(gen.consecutive_chunks, expected_chunks): + assert chunk == expected_chunk + + +def test_create_generator_containers(): + test1_time_intervals = ( + ("2018-01-01", 4), + ("2018-01-02", 35), + ("2018-01-04", 10), + ) + test1_df = get_test_df(test1_time_intervals) + gen = GordoTimeseriesGenerator(test1_df, test1_df, length=5, step="60min") + expected_generator_containers = [ + { + "chunk": TimeseriesChunk( + start_ts=pd.Timestamp("2018-01-02 01:00:00"), + end_ts=pd.Timestamp("2018-01-03 10:00:00"), + size=35, + ), + "length": 1, + }, + { + "chunk": TimeseriesChunk( + start_ts=pd.Timestamp("2018-01-04 01:00:00"), + end_ts=pd.Timestamp("2018-01-04 09:00:00"), + size=10, + ), + "length": 1, + }, + ] + assert len(gen.generators_containers) == 2 + for i, generator_container in enumerate(gen.generators_containers): + for k, v in expected_generator_containers[i].items(): + assert getattr(generator_container, k) == v, "%s.%s != %s" % ( + generator_container, + k, + v, + ) + expected_failed_chunk = TimeseriesChunk( + start_ts=pd.Timestamp("2018-01-01 00:00:00"), + end_ts=pd.Timestamp("2018-01-01 03:00:00"), + size=4, + ) + assert len(gen.failed_chunks) == 1 + assert gen.failed_chunks[0] == expected_failed_chunk + + +def test_timeseries_generator(): + test1_time_intervals = ( + ("2018-01-02", 15), + ("2018-01-04", 10), + ) + test1_df = get_test_df(test1_time_intervals, generator=range_gen(), tags_count=1) + gen = GordoTimeseriesGenerator( + test1_df, test1_df, length=5, batch_size=3, step="60min" + ) + assert len(gen.generators_containers) == 2 + assert len(gen) == 6 + x, y = gen[0] + expect_x = [ + [[0], [1], [2], [3], [4]], + [[1], [2], [3], [4], [5]], + [[2], [3], [4], [5], [6]], + ] + expect_y = [[5], [6], [7]] + assert x.tolist() == expect_x + assert y.tolist() == expect_y + + +def test_too_short_timeseries_length(): + test1_time_intervals = ( + ("2018-01-01", 4), + ("2018-01-02", 6), + ("2018-01-04", 8), + ) + test1_df = get_test_df(test1_time_intervals) + with pytest.raises(ValueError): + GordoTimeseriesGenerator(test1_df, test1_df, length=10, step="60min") diff --git a/tests/gordo/machine/model/test_model.py b/tests/gordo/machine/model/test_model.py index c2d0fdadf..f8e2bd029 100644 --- a/tests/gordo/machine/model/test_model.py +++ b/tests/gordo/machine/model/test_model.py @@ -337,7 +337,6 @@ def test_lstmae_predict_output(): out = model.predict(xTest) assert out.shape == (2, 3) - def test_keras_autoencoder_fits_callbacks(): model = KerasAutoEncoder( kind="feedforward_hourglass", @@ -410,3 +409,5 @@ def test_for_wrong_kind_import(): X, y = np.random.rand(10, 10), np.random.rand(10, 10) with pytest.raises(ValueError): model.fit(X, y) + +# TODO test with GordoTimeseriesGenerator From 634458c978cecc96b577c3b15a95670cce80bd3d Mon Sep 17 00:00:00 2001 From: Serhii Koropets Date: Fri, 19 Feb 2021 18:58:38 +0200 Subject: [PATCH 2/7] Fix NameError: name List is not defined --- gordo/machine/model/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gordo/machine/model/models.py b/gordo/machine/model/models.py index db21035c2..ad6922c48 100644 --- a/gordo/machine/model/models.py +++ b/gordo/machine/model/models.py @@ -5,7 +5,7 @@ import io import importlib from pprint import pformat -from typing import Union, Callable, Dict, Any, Optional, Tuple +from typing import Union, Callable, Dict, Any, Optional, Tuple, List from abc import ABCMeta from copy import copy, deepcopy from importlib.util import find_spec From 3c75789088d9017d4a273591cfd3826f446ca452 Mon Sep 17 00:00:00 2001 From: Serhii Koropets Date: Fri, 19 Feb 2021 20:25:11 +0200 Subject: [PATCH 3/7] Downgrade for pandas --- requirements/full_requirements.txt | 2 +- requirements/requirements.in | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements/full_requirements.txt b/requirements/full_requirements.txt index 366cbd61d..e4551e722 100644 --- a/requirements/full_requirements.txt +++ b/requirements/full_requirements.txt @@ -90,7 +90,7 @@ numpy==1.18.5 # via -r requirements.in, catboost, gordo.client, h5py oauthlib==3.1.0 # via requests-oauthlib opt-einsum==3.3.0 # via tensorflow packaging==20.9 # via -r requirements.in -pandas==1.2.2 # via -r requirements.in, catboost, gordo-dataset, gordo.client, mlflow, sklearn-pandas, xarray +pandas==1.1.5 # via -r requirements.in, catboost, gordo-dataset, gordo.client, mlflow, sklearn-pandas, xarray pathspec==0.8.1 # via azureml-core peewee==3.14.1 # via -r postgres_requirements.in pillow==8.1.0 # via matplotlib diff --git a/requirements/requirements.in b/requirements/requirements.in index 5ae6dd826..7c6f4654c 100644 --- a/requirements/requirements.in +++ b/requirements/requirements.in @@ -6,7 +6,7 @@ gunicorn~=20.0 h5py~=2.8 jinja2~=2.10 numpy~=1.18 -pandas~=1.0 +pandas~=1.1.0 numexpr~=2.7 python-dateutil~=2.8 pyyaml~=5.3 From ec629e027188afcd6ba39c361a0e2290a5fc5084 Mon Sep 17 00:00:00 2001 From: Serhii Koropets Date: Wed, 24 Feb 2021 11:39:44 +0200 Subject: [PATCH 4/7] Black reformating --- gordo/data_frame_mapper/__init__.py | 12 ++++++------ gordo/serializer/__init__.py | 10 +++++++++- .../model/test_gordo_timeseries_generator.py | 17 +++-------------- tests/gordo/machine/model/test_model.py | 2 ++ 4 files changed, 20 insertions(+), 21 deletions(-) diff --git a/gordo/data_frame_mapper/__init__.py b/gordo/data_frame_mapper/__init__.py index c6b512840..72895df87 100644 --- a/gordo/data_frame_mapper/__init__.py +++ b/gordo/data_frame_mapper/__init__.py @@ -12,10 +12,10 @@ class DataFrameMapper(sklearn_pandas.DataFrameMapper): _default_kwargs = {"df_out": True} def __init__( - self, - columns: List[Union[str, List[str]]], - transformers: List[BaseEstimator] = None, - **kwargs + self, + columns: List[Union[str, List[str]]], + transformers: List[BaseEstimator] = None, + **kwargs ): self.columns = columns self.transformers = transformers @@ -26,7 +26,7 @@ def __init__( @staticmethod def _build_features( - columns: List[Union[str, List[str]]], transformers: List[BaseEstimator], + columns: List[Union[str, List[str]]], transformers: List[BaseEstimator] ): features = [] for column in columns: @@ -46,4 +46,4 @@ def __setstate__(self, state): super().__setstate__(state) -__all__ = ['DataFrameMapper'] +__all__ = ["DataFrameMapper"] diff --git a/gordo/serializer/__init__.py b/gordo/serializer/__init__.py index 7e0dd823e..3402d267e 100644 --- a/gordo/serializer/__init__.py +++ b/gordo/serializer/__init__.py @@ -2,4 +2,12 @@ from .into_definition import into_definition, load_definition_from_params from .serializer import dump, dumps, load, loads, load_metadata -__all__=['from_definition', 'into_definition', 'dump', 'dumps', 'load', 'loads', 'load_metadata'] +__all__ = [ + "from_definition", + "into_definition", + "dump", + "dumps", + "load", + "loads", + "load_metadata", +] diff --git a/tests/gordo/machine/model/test_gordo_timeseries_generator.py b/tests/gordo/machine/model/test_gordo_timeseries_generator.py index efd32ac64..4a23bc7f5 100644 --- a/tests/gordo/machine/model/test_gordo_timeseries_generator.py +++ b/tests/gordo/machine/model/test_gordo_timeseries_generator.py @@ -90,11 +90,7 @@ def test_find_consecutive_chunks(): def test_create_generator_containers(): - test1_time_intervals = ( - ("2018-01-01", 4), - ("2018-01-02", 35), - ("2018-01-04", 10), - ) + test1_time_intervals = (("2018-01-01", 4), ("2018-01-02", 35), ("2018-01-04", 10)) test1_df = get_test_df(test1_time_intervals) gen = GordoTimeseriesGenerator(test1_df, test1_df, length=5, step="60min") expected_generator_containers = [ @@ -133,10 +129,7 @@ def test_create_generator_containers(): def test_timeseries_generator(): - test1_time_intervals = ( - ("2018-01-02", 15), - ("2018-01-04", 10), - ) + test1_time_intervals = (("2018-01-02", 15), ("2018-01-04", 10)) test1_df = get_test_df(test1_time_intervals, generator=range_gen(), tags_count=1) gen = GordoTimeseriesGenerator( test1_df, test1_df, length=5, batch_size=3, step="60min" @@ -155,11 +148,7 @@ def test_timeseries_generator(): def test_too_short_timeseries_length(): - test1_time_intervals = ( - ("2018-01-01", 4), - ("2018-01-02", 6), - ("2018-01-04", 8), - ) + test1_time_intervals = (("2018-01-01", 4), ("2018-01-02", 6), ("2018-01-04", 8)) test1_df = get_test_df(test1_time_intervals) with pytest.raises(ValueError): GordoTimeseriesGenerator(test1_df, test1_df, length=10, step="60min") diff --git a/tests/gordo/machine/model/test_model.py b/tests/gordo/machine/model/test_model.py index f8e2bd029..8208a1c00 100644 --- a/tests/gordo/machine/model/test_model.py +++ b/tests/gordo/machine/model/test_model.py @@ -337,6 +337,7 @@ def test_lstmae_predict_output(): out = model.predict(xTest) assert out.shape == (2, 3) + def test_keras_autoencoder_fits_callbacks(): model = KerasAutoEncoder( kind="feedforward_hourglass", @@ -410,4 +411,5 @@ def test_for_wrong_kind_import(): with pytest.raises(ValueError): model.fit(X, y) + # TODO test with GordoTimeseriesGenerator From b2204a5df4d2b5fa2986d73d7edb9e4159c5261e Mon Sep 17 00:00:00 2001 From: Serhii Koropets Date: Wed, 24 Feb 2021 11:47:15 +0200 Subject: [PATCH 5/7] Fixing mypy complains --- gordo/data_frame_mapper/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gordo/data_frame_mapper/__init__.py b/gordo/data_frame_mapper/__init__.py index 72895df87..7b086cc71 100644 --- a/gordo/data_frame_mapper/__init__.py +++ b/gordo/data_frame_mapper/__init__.py @@ -3,7 +3,7 @@ from copy import copy from sklearn.base import BaseEstimator -from typing import List, Union +from typing import List, Union, Optional logger = logging.getLogger(__name__) @@ -14,7 +14,7 @@ class DataFrameMapper(sklearn_pandas.DataFrameMapper): def __init__( self, columns: List[Union[str, List[str]]], - transformers: List[BaseEstimator] = None, + transformers: Optional[List[BaseEstimator]] = None, **kwargs ): self.columns = columns @@ -26,7 +26,7 @@ def __init__( @staticmethod def _build_features( - columns: List[Union[str, List[str]]], transformers: List[BaseEstimator] + columns: List[Union[str, List[str]]], transformers: Optional[List[BaseEstimator]] ): features = [] for column in columns: From ba26e3849378f374e0086ba1d9c6208fa41ca7aa Mon Sep 17 00:00:00 2001 From: Serhii Koropets Date: Wed, 24 Feb 2021 11:53:31 +0200 Subject: [PATCH 6/7] Black reformating --- gordo/data_frame_mapper/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gordo/data_frame_mapper/__init__.py b/gordo/data_frame_mapper/__init__.py index 7b086cc71..18278d32c 100644 --- a/gordo/data_frame_mapper/__init__.py +++ b/gordo/data_frame_mapper/__init__.py @@ -26,7 +26,8 @@ def __init__( @staticmethod def _build_features( - columns: List[Union[str, List[str]]], transformers: Optional[List[BaseEstimator]] + columns: List[Union[str, List[str]]], + transformers: Optional[List[BaseEstimator]], ): features = [] for column in columns: From 82a0941e0df67e807ea803b8e32fec244129ff52 Mon Sep 17 00:00:00 2001 From: Serhii Koropets Date: Wed, 24 Feb 2021 16:03:00 +0200 Subject: [PATCH 7/7] Fixing unused copy import --- gordo/machine/model/models.py | 1 - 1 file changed, 1 deletion(-) diff --git a/gordo/machine/model/models.py b/gordo/machine/model/models.py index ad6922c48..57904e10c 100644 --- a/gordo/machine/model/models.py +++ b/gordo/machine/model/models.py @@ -10,7 +10,6 @@ from copy import copy, deepcopy from importlib.util import find_spec from dataclasses import dataclass -from copy import copy import h5py import tensorflow.keras.models