diff --git a/.domino/compiled_metadata.json b/.domino/compiled_metadata.json index 397630c..690f8cc 100644 --- a/.domino/compiled_metadata.json +++ b/.domino/compiled_metadata.json @@ -1,110 +1,279 @@ { - "ExampleSimplePiece": { - "name": "ExampleSimplePiece", + "ProphetTrainModelPiece": { + "name": "ProphetTrainModelPiece", "dependency": { "dockerfile": null, "requirements_file": "requirements_0.txt" }, "tags": [ - "Example" + "Prophet" ], "style": { - "node_label": "Simple Piece", + "node_label": "Prophet Train", "node_type": "default", "node_style": { "backgroundColor": "#ebebeb" }, "useIcon": true, - "icon_class_name": "fas fa-database", + "icon_class_name": "icon-park-outline:robot-one", "iconStyle": { "cursor": "pointer" } }, - "description": "This is an example of a simple Domino Piece", + "description": "Piece to train a prophet model", "container_resources": { "requests": { - "cpu": "100m", - "memory": "128Mi" + "cpu": 100, + "memory": 128 }, "limits": { - "cpu": "500m", - "memory": "512Mi" + "cpu": 500, + "memory": 512 } }, "input_schema": { - "title": "InputModel", - "type": "object", + "$defs": { + "GrowthTrend": { + "enum": [ + "linear", + "logistic", + "flat" + ], + "title": "GrowthTrend", + "type": "string" + }, + "SeasonalityMode": { + "enum": [ + "additive", + "multiplicative" + ], + "title": "SeasonalityMode", + "type": "string" + } + }, "properties": { - "distribution_name": { - "description": "Name of the distribution to sample from", + "input_data_file": { + "description": "Path to the input data file. Accepted formats: `.csv`, `.json`. Should use the following format: `ds` (datetime), `y` (target).", + "title": "Input Data File", + "type": "string" + }, + "growth_trend": { "allOf": [ { - "$ref": "#/definitions/DistributionType" + "$ref": "#/$defs/GrowthTrend" } - ] + ], + "default": "linear", + "description": "The growth trend of the data. Options are `linear`, `logistic` and `flat`. Default is `linear`." }, - "distribution_mean": { - "title": "Distribution Mean", - "description": "Distribution mean", - "type": "number" + "changepoints": { + "anyOf": [ + { + "items": { + "format": "date", + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "List of dates at which to include potential changepoints. If not specified, potential changepoints are selected automatically.", + "title": "Changepoints" }, - "distribution_sd": { - "title": "Distribution Sd", - "description": "Distribution standard deviation", - "default": 1.0, - "exclusiveMinimum": 0.0, - "type": "number" + "n_changepoints": { + "default": 25, + "description": " Number of potential changepoints to include. Not used if input `changepoints` is supplied.", + "maximum": 1000, + "minimum": 0, + "title": "N Changepoints", + "type": "integer" + }, + "seasonality_mode": { + "allOf": [ + { + "$ref": "#/$defs/SeasonalityMode" + } + ], + "default": "additive", + "description": "The seasonality mode of the data. Options are `additive` and `multiplicative`. Default is `additive`." } }, "required": [ - "distribution_name", - "distribution_mean" + "input_data_file" ], - "definitions": { - "DistributionType": { - "title": "DistributionType", - "description": "An enumeration.", - "enum": [ - "gaussian", - "poisson" - ], + "title": "InputModel", + "type": "object" + }, + "output_schema": { + "properties": { + "prophet_model_file_path": { + "description": "Path to the file containing the trained model.", + "title": "Prophet model path", "type": "string" } + }, + "required": [ + "prophet_model_file_path" + ], + "title": "OutputModel", + "type": "object" + }, + "secrets_schema": null, + "source_url": "https://github.com/Tauffer-Consulting/timeseries_domino_pieces/tree/main/pieces/ProphetTrainModelPiece" + }, + "GetYahooFinanceDataPiece": { + "name": "GetYahooFinanceDataPiece", + "dependency": { + "dockerfile": null, + "requirements_file": "requirements_0.txt" + }, + "tags": [ + "Example" + ], + "style": { + "node_label": "Get Yahoo Finance Data", + "node_type": "default", + "node_style": { + "backgroundColor": "#ebebeb" + }, + "useIcon": true, + "icon_class_name": "fa-solid:database", + "iconStyle": { + "cursor": "pointer" } }, + "description": "This piece gets data from Yahoo Finance.", + "container_resources": { + "requests": { + "cpu": 100, + "memory": 128 + }, + "limits": { + "cpu": 500, + "memory": 512 + } + }, + "input_schema": { + "properties": { + "ticker": { + "description": "Ticker of the stock to get data from.", + "title": "Ticker", + "type": "string" + }, + "start_date": { + "description": "Start date of the data to get.", + "format": "date", + "title": "Start Date", + "type": "string" + }, + "end_date": { + "description": "End date of the data to get.", + "format": "date", + "title": "End Date", + "type": "string" + } + }, + "required": [ + "ticker", + "start_date", + "end_date" + ], + "title": "InputModel", + "type": "object" + }, "output_schema": { + "properties": { + "data_path": { + "description": "Path to the file containing the trained model.", + "title": "Data Path", + "type": "string" + } + }, + "required": [ + "data_path" + ], "title": "OutputModel", - "type": "object", + "type": "object" + }, + "secrets_schema": null, + "source_url": "https://github.com/Tauffer-Consulting/timeseries_domino_pieces/tree/main/pieces/GetYahooFinanceDataPiece" + }, + "ProphetPredictPiece": { + "name": "ProphetPredictPiece", + "dependency": { + "dockerfile": null, + "requirements_file": "requirements_0.txt" + }, + "tags": [ + "Prophet" + ], + "style": { + "node_label": "Prophet Predict", + "node_type": "default", + "node_style": { + "backgroundColor": "#ebebeb" + }, + "useIcon": true, + "icon_class_name": "icon-park-outline:robot-one", + "iconStyle": { + "cursor": "pointer" + } + }, + "description": "Piece to predict using Prophet", + "container_resources": { + "requests": { + "cpu": 100, + "memory": 128 + }, + "limits": { + "cpu": 500, + "memory": 512 + } + }, + "input_schema": { "properties": { - "message": { - "title": "Message", - "description": "Output message to log", + "prophet_model_path": { + "description": "Path to the file containing the trained model.", + "title": "Prophet Model Path", "type": "string" }, - "sample_result": { - "title": "Sample Result", - "description": "The result of this Piece's processing", - "type": "string" + "periods": { + "description": "Number of periods to forecast.", + "title": "Periods", + "type": "integer" } }, "required": [ - "message", - "sample_result" - ] + "prophet_model_path", + "periods" + ], + "title": "InputModel", + "type": "object" }, - "secrets_schema": { - "title": "SecretsModel", - "type": "object", + "output_schema": { "properties": { - "EXAMPLE_OPERATOR_SECRET_1": { - "title": "Example Operator Secret 1", - "description": "A secret necessary to run this Piece", + "forecast_data_path": { + "description": "Path to the file containing the forecast data.", + "title": "Forecast Data Path", + "type": "string" + }, + "forecast_figure_path": { + "description": "Path to the file containing the results figure.", + "title": "Forecast Figure Path", "type": "string" } }, "required": [ - "EXAMPLE_OPERATOR_SECRET_1" - ] - } + "forecast_data_path", + "forecast_figure_path" + ], + "title": "OutputModel", + "type": "object" + }, + "secrets_schema": null, + "source_url": "https://github.com/Tauffer-Consulting/timeseries_domino_pieces/tree/main/pieces/ProphetPredictPiece" } } \ No newline at end of file diff --git a/.domino/dependencies_map.json b/.domino/dependencies_map.json index 493dfb9..2f3484c 100644 --- a/.domino/dependencies_map.json +++ b/.domino/dependencies_map.json @@ -5,11 +5,11 @@ "requirements_file": "requirements_0.txt" }, "pieces": [ - "ExampleSimplePiece" + "ProphetTrainModelPiece", + "GetYahooFinanceDataPiece", + "ProphetPredictPiece" ], - "secrets": [ - "EXAMPLE_OPERATOR_SECRET_1" - ], - "source_image": "ghcr.io/enter-your-registry-name-here/enter-your-repository-name-here:0.1.0-group0" + "secrets": [], + "source_image": "ghcr.io/tauffer-consulting/timeseries_domino_pieces:development-group0" } } \ No newline at end of file diff --git a/config.toml b/config.toml index b210a16..97b512b 100644 --- a/config.toml +++ b/config.toml @@ -1,11 +1,11 @@ [repository] # The name of the github organization / person owner, e.g. tauffer-consulting. # Must be in lower-case letters -REGISTRY_NAME = "enter-your-registry-name-here" +REGISTRY_NAME = "tauffer-consulting" # The name of this Pieces repository -REPOSITORY_NAME = "enter-your-repository-name-here" -REPOSITORY_LABEL = "enter-your-repository-label-here" +REPOSITORY_NAME = "timeseries_domino_pieces" +REPOSITORY_LABEL = "Time Series Domino Pieces" # The version of this Pieces release # Attention: changing this will create a new release diff --git a/dependencies/Dockerfile_1 b/dependencies/Dockerfile_1 deleted file mode 100644 index 0ffb4e8..0000000 --- a/dependencies/Dockerfile_1 +++ /dev/null @@ -1,13 +0,0 @@ -FROM taufferconsulting/domino-base-piece:latest - -# Install specific requirements to run OpenCV -RUN apt-get update -RUN apt-get install ffmpeg libsm6 libxext6 -y - -# Need to copy pieces source code -COPY config.toml domino/pieces_repository/ -COPY pieces domino/pieces_repository/pieces -COPY .domino domino/pieces_repository/.domino - -# Install specific dependencies in domino_env virtual environment -RUN pip install --no-cache-dir opencv-python scipy diff --git a/dependencies/requirements_0.txt b/dependencies/requirements_0.txt index 1faa2fd..03a4e09 100644 --- a/dependencies/requirements_0.txt +++ b/dependencies/requirements_0.txt @@ -1 +1,4 @@ -numpy==1.23.5 \ No newline at end of file +prophet==1.1.5 +pandas==2.1.3 +plotly==5.18.0 +yfinance==0.2.38 \ No newline at end of file diff --git a/pieces/ExampleComplexPiece/metadata.json b/pieces/ExampleComplexPiece/metadata.json deleted file mode 100644 index f50d336..0000000 --- a/pieces/ExampleComplexPiece/metadata.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "name": "ExampleComplexPiece", - "description": "This is an example of a complex Domino Piece", - "dependency": { - "dockerfile": "Dockerfile_1" - }, - "container_resources": { - "requests": { - "cpu": "100m", - "memory": "128Mi" - }, - "limits": { - "cpu": "500m", - "memory": "512Mi" - } - }, - "tags": [ - "Example" - ], - "style": { - "node_label": "Complex Piece", - "icon_class_name": "fa-solid:database" - } -} \ No newline at end of file diff --git a/pieces/ExampleComplexPiece/models.py b/pieces/ExampleComplexPiece/models.py deleted file mode 100644 index a59fdc2..0000000 --- a/pieces/ExampleComplexPiece/models.py +++ /dev/null @@ -1,23 +0,0 @@ -from pydantic import BaseModel, Field -from enum import Enum - - -class InputModel(BaseModel): - arg1: str = Field( - description="Distribution mean" - ) - - -class OutputModel(BaseModel): - message: str = Field( - description="Output message to log" - ) - result: str = Field( - description="The result of this Piece's processing" - ) - - -class SecretsModel(BaseModel): - EXAMPLE_OPERATOR_SECRET_2: str = Field( - description="A secret necessary to run this Piece" - ) \ No newline at end of file diff --git a/pieces/ExampleComplexPiece/piece.py b/pieces/ExampleComplexPiece/piece.py deleted file mode 100644 index 2c88f87..0000000 --- a/pieces/ExampleComplexPiece/piece.py +++ /dev/null @@ -1,35 +0,0 @@ -from domino.base_piece import BasePiece -from .models import InputModel, OutputModel, SecretsModel -import os - - -class ExampleComplexPiece(BasePiece): - """ - This Piece serves as a more complex example, using Dockerfile as dependency, from where you can start writing your own Piece. - Remember to also change all other required files accordingly: - - piece.py (this file) - - models.py - - metadata.json - - requirements.txt or Dockerfile if needed - """ - - def piece_function(self, input_data: InputModel, secrets_data: SecretsModel): - - # Input arguments are retrieved from the Input model object - arg1 = input_data.arg1 - - # If this Piece needs to use a Secret value, it can retrieve it from Secrets Model object using secrets_data argument - piece_secret = secrets_data.EXAMPLE_OPERATOR_SECRET_2 - - # Basic logging is already implemented in the BasePiece class - self.logger.info("Starting piece process...") - - # Here we add the Piece function logic - message = "" - result = "" - - # Finally, results should return as an Output model - return OutputModel( - message=message, - result=result - ) \ No newline at end of file diff --git a/pieces/ExampleSimplePiece/models.py b/pieces/ExampleSimplePiece/models.py deleted file mode 100644 index 29e5743..0000000 --- a/pieces/ExampleSimplePiece/models.py +++ /dev/null @@ -1,36 +0,0 @@ -from pydantic import BaseModel, Field -from enum import Enum - - -class DistributionType(str, Enum): - gaussian = "gaussian" - poisson = "poisson" - - -class InputModel(BaseModel): - distribution_name: DistributionType = Field( - description="Name of the distribution to sample from" - ) - distribution_mean: float = Field( - description="Distribution mean" - ) - distribution_sd: float = Field( - default=1., - gt=0., - description="Distribution standard deviation" - ) - - -class OutputModel(BaseModel): - message: str = Field( - description="Output message to log" - ) - sample_result: str = Field( - description="The result of this Piece's processing" - ) - - -class SecretsModel(BaseModel): - EXAMPLE_OPERATOR_SECRET_1: str = Field( - description="A secret necessary to run this Piece" - ) \ No newline at end of file diff --git a/pieces/ExampleSimplePiece/piece.py b/pieces/ExampleSimplePiece/piece.py deleted file mode 100644 index 2257760..0000000 --- a/pieces/ExampleSimplePiece/piece.py +++ /dev/null @@ -1,51 +0,0 @@ -from domino.base_piece import BasePiece -from .models import InputModel, OutputModel, SecretsModel -import os - -import numpy as np - - -class ExampleSimplePiece(BasePiece): - """ - This Piece serves as a simple example, from where you can start writing your own Piece. - Remember to also change all other required files accordingly: - - piece.py (this file) - - models.py - - metadata.json - - requirements.txt or Dockerfile if needed - """ - - def piece_function(self, input_data: InputModel, secrets_data: SecretsModel): - - # Input arguments are retrieved from the Input model object - distribution_name = input_data.distribution_name - distribution_mean = input_data.distribution_mean - distribution_sd = input_data.distribution_sd - - # If this Piece needs to use a Secret value, it can retrieve it from Secrets Model object using secrets_data argument - piece_secret = secrets_data.EXAMPLE_OPERATOR_SECRET_1 - - # Basic logging is already implemented in the BasePiece class - self.logger.info("Starting sampling process...") - - # Here we add the Piece function logic - message = "" - if distribution_name == "gaussian": - sample_result = np.random.normal(distribution_mean, distribution_sd) - - elif distribution_name == "poisson": - if distribution_mean < 0: - distribution_mean = abs(distribution_mean) - message += "\n" - message += "Poisson distributions only accept positive mean values. Applying abs() to the value received." - sample_result = np.random.poisson(distribution_mean) - - self.logger.info(f"Sampled from a gaussian distribution with mean={distribution_mean} and sd={distribution_sd}") - message += "\n" - message += "Sampling operation was successful!" - - # Finally, results should return as an Output model - return OutputModel( - message=message, - sample_result=sample_result - ) \ No newline at end of file diff --git a/pieces/ExampleSimplePiece/test_example_simple_piece.py b/pieces/ExampleSimplePiece/test_example_simple_piece.py deleted file mode 100644 index 168a611..0000000 --- a/pieces/ExampleSimplePiece/test_example_simple_piece.py +++ /dev/null @@ -1,15 +0,0 @@ -from domino.testing import piece_dry_run - -def test_example_simple_piece(): - input_data = dict( - distribution_name="gaussian", - distribution_mean=0., - distribution_sd=1. - ) - output_data = piece_dry_run( - "ExampleSimplePiece", - input_data - ) - - assert output_data["message"] is not None - assert output_data["sample_result"] is not None \ No newline at end of file diff --git a/pieces/ExampleSimplePiece/metadata.json b/pieces/GetYahooFinanceDataPiece/metadata.json similarity index 53% rename from pieces/ExampleSimplePiece/metadata.json rename to pieces/GetYahooFinanceDataPiece/metadata.json index 1bad3ce..e1bdd23 100644 --- a/pieces/ExampleSimplePiece/metadata.json +++ b/pieces/GetYahooFinanceDataPiece/metadata.json @@ -1,24 +1,24 @@ { - "name": "ExampleSimplePiece", - "description": "This is an example of a simple Domino Piece", + "name": "GetYahooFinanceDataPiece", + "description": "This piece gets data from Yahoo Finance.", "dependency": { "requirements_file": "requirements_0.txt" }, "container_resources": { "requests": { - "cpu": "100m", - "memory": "128Mi" + "cpu": 100, + "memory": 128 }, "limits": { - "cpu": "500m", - "memory": "512Mi" + "cpu": 500, + "memory": 512 } }, "tags": [ "Example" ], "style": { - "node_label": "Simple Piece", + "node_label": "Get Yahoo Finance Data", "icon_class_name": "fa-solid:database" } } \ No newline at end of file diff --git a/pieces/GetYahooFinanceDataPiece/models.py b/pieces/GetYahooFinanceDataPiece/models.py new file mode 100644 index 0000000..9e0b905 --- /dev/null +++ b/pieces/GetYahooFinanceDataPiece/models.py @@ -0,0 +1,20 @@ +from pydantic import BaseModel, Field +from datetime import date + + + +class InputModel(BaseModel): + ticker: str = Field( + description="Ticker of the stock to get data from." + ) # TODO change to ENUM ? + start_date: date = Field( + description="Start date of the data to get." + ) + end_date: date = Field( + description="End date of the data to get." + ) + +class OutputModel(BaseModel): + data_path: str = Field( + description="Path to the file containing the trained model." + ) diff --git a/pieces/GetYahooFinanceDataPiece/piece.py b/pieces/GetYahooFinanceDataPiece/piece.py new file mode 100644 index 0000000..862b54d --- /dev/null +++ b/pieces/GetYahooFinanceDataPiece/piece.py @@ -0,0 +1,24 @@ +from domino.base_piece import BasePiece +from .models import InputModel, OutputModel +import yfinance as yf +from pathlib import Path + + +class GetYahooFinanceDataPiece(BasePiece): + """ + This Piece trains a Prophet model using the data provided in the input file. + """ + def piece_function(self, input_data: InputModel): + ticker = input_data.ticker + start_date = input_data.start_date + end_date = input_data.end_date + + df = yf.download(ticker, start=start_date, end=end_date) + df.reset_index(inplace=True) + + df_path = Path(self.results_path) / f"{ticker}_data.csv" + df.to_csv(df_path, index=False) + + return OutputModel( + data_path=str(df_path) + ) \ No newline at end of file diff --git a/pieces/ProphetPredictPiece/metadata.json b/pieces/ProphetPredictPiece/metadata.json new file mode 100644 index 0000000..132a362 --- /dev/null +++ b/pieces/ProphetPredictPiece/metadata.json @@ -0,0 +1,24 @@ +{ + "name": "ProphetPredictPiece", + "description": "Piece to predict using Prophet", + "dependency": { + "requirements_file": "requirements_0.txt" + }, + "container_resources": { + "requests": { + "cpu": 100, + "memory": 128 + }, + "limits": { + "cpu": 500, + "memory": 512 + } + }, + "tags": [ + "Prophet" + ], + "style": { + "node_label": "Prophet Predict", + "icon_class_name": "icon-park-outline:robot-one" + } +} \ No newline at end of file diff --git a/pieces/ProphetPredictPiece/models.py b/pieces/ProphetPredictPiece/models.py new file mode 100644 index 0000000..bf84de0 --- /dev/null +++ b/pieces/ProphetPredictPiece/models.py @@ -0,0 +1,26 @@ +from pydantic import BaseModel, Field +from enum import Enum +from typing import List +from datetime import date + + +class InputModel(BaseModel): + prophet_model_path: str = Field( + title="Prophet Model Path", + description="Path to the file containing the trained model." + ) + periods: int = Field( + title="Periods", + description="Number of periods to forecast." + ) + + +class OutputModel(BaseModel): + forecast_data_path: str = Field( + title="Forecast Data Path", + description="Path to the file containing the forecast data." + ) + forecast_figure_path: str = Field( + title="Forecast Figure Path", + description="Path to the file containing the results figure." + ) diff --git a/pieces/ProphetPredictPiece/piece.py b/pieces/ProphetPredictPiece/piece.py new file mode 100644 index 0000000..6aa4b23 --- /dev/null +++ b/pieces/ProphetPredictPiece/piece.py @@ -0,0 +1,40 @@ +from domino.base_piece import BasePiece +from .models import InputModel, OutputModel +import pandas as pd +from prophet import Prophet +import pickle +from pathlib import Path +from prophet.plot import plot_plotly, plot_components_plotly + + + +class ProphetPredictPiece(BasePiece): + """ + This Piece uses a trained Prophet model to make predictions on new data. + """ + def piece_function(self, input_data: InputModel): + + with open(input_data.prophet_model_path, "rb") as f: + model = pickle.load(f) + + future = model.make_future_dataframe(periods=input_data.periods) + forecast = model.predict(future) + + self.results_path = Path(self.results_path) + + forecast_data_path = self.results_path / "forecast_data.csv" + forecast.to_csv(forecast_data_path, index=False) + + forecast_figure_path = self.results_path / "forecast_figure.json" + forecast_figure = plot_plotly(model, forecast) + + forecast_figure.write_json(str(forecast_figure_path)) + self.display_result = { + "file_type": "plotly_json", + "file_path": str(forecast_figure_path) + } + + return OutputModel( + forecast_data_path=str(forecast_data_path), + forecast_figure_path=str(forecast_figure_path), + ) \ No newline at end of file diff --git a/pieces/ProphetPredictPiece/test_prophet_predict_piece.py b/pieces/ProphetPredictPiece/test_prophet_predict_piece.py new file mode 100644 index 0000000..af4c3dd --- /dev/null +++ b/pieces/ProphetPredictPiece/test_prophet_predict_piece.py @@ -0,0 +1,4 @@ +from domino.testing import piece_dry_run + +def test_prophet_predict_piece(): + ... \ No newline at end of file diff --git a/pieces/ProphetTrainModelPiece/metadata.json b/pieces/ProphetTrainModelPiece/metadata.json new file mode 100644 index 0000000..ba816ca --- /dev/null +++ b/pieces/ProphetTrainModelPiece/metadata.json @@ -0,0 +1,24 @@ +{ + "name": "ProphetTrainModelPiece", + "description": "Piece to train a prophet model", + "dependency": { + "requirements_file": "requirements_0.txt" + }, + "container_resources": { + "requests": { + "cpu": 100, + "memory": 128 + }, + "limits": { + "cpu": 500, + "memory": 512 + } + }, + "tags": [ + "Prophet" + ], + "style": { + "node_label": "Prophet Train", + "icon_class_name": "icon-park-outline:robot-one" + } +} \ No newline at end of file diff --git a/pieces/ProphetTrainModelPiece/models.py b/pieces/ProphetTrainModelPiece/models.py new file mode 100644 index 0000000..e84a0f2 --- /dev/null +++ b/pieces/ProphetTrainModelPiece/models.py @@ -0,0 +1,50 @@ +from pydantic import BaseModel, Field +from enum import Enum +from typing import List, Optional +from datetime import date + + +class GrowthTrend(str, Enum): + linear = "linear" + logistic = "logistic" + flat = "flat" + + +class SeasonalityMode(str, Enum): + additive = "additive" + multiplicative = "multiplicative" + + +class InputModel(BaseModel): + input_data_file: str = Field( + title="Input Data File", + description="Path to the input data file. Accepted formats: `.csv`, `.json`. Should use the following format: `ds` (datetime), `y` (target).", + ) + growth_trend: GrowthTrend = Field( + default=GrowthTrend.linear, + description="The growth trend of the data. Options are `linear`, `logistic` and `flat`. Default is `linear`." + ) + changepoints: Optional[List[date]] = Field( + default=None, + description="List of dates at which to include potential changepoints. If not specified, potential changepoints are selected automatically." + ) + n_changepoints: int = Field( + default=25, + ge=0, + le=1000, + description=" Number of potential changepoints to include. Not used if input `changepoints` is supplied." + ) + seasonality_mode: SeasonalityMode = Field( + default=SeasonalityMode.additive, + description="The seasonality mode of the data. Options are `additive` and `multiplicative`. Default is `additive`." + ) + + +class OutputModel(BaseModel): + prophet_model_file_path: str = Field( + title='Prophet model path', + description="Path to the file containing the trained model." + ) + # results_figure_file_path: str = Field( + # description="Path to the file containing the results figure." + # ) diff --git a/pieces/ProphetTrainModelPiece/piece.py b/pieces/ProphetTrainModelPiece/piece.py new file mode 100644 index 0000000..c08efea --- /dev/null +++ b/pieces/ProphetTrainModelPiece/piece.py @@ -0,0 +1,38 @@ +from domino.base_piece import BasePiece +from .models import InputModel, OutputModel +import pandas as pd +from prophet import Prophet +import pickle +from pathlib import Path + +class ProphetTrainModelPiece(BasePiece): + """ + This Piece trains a Prophet model using the data provided in the input file. + """ + def piece_function(self, input_data: InputModel): + + # Load data + input_data_file = input_data.input_data_file + if input_data_file.endswith('.csv'): + df = pd.read_csv(input_data_file) + elif input_data_file.endswith('.json'): + df = pd.read_json(input_data_file) + else: + raise ValueError("File format not supported. Please pass a CSV or JSON file.") + + model = Prophet( + seasonality_mode=input_data.seasonality_mode, + growth=input_data.growth_trend, + changepoints=input_data.changepoints, + n_changepoints=input_data.n_changepoints + ) + model.fit(df) + + # Serialize model + model_file_path = Path(self.results_path) / "prophet_model.json" + with open(str(model_file_path), "wb") as f: + pickle.dump(model, f) + + return OutputModel( + prophet_model_file_path=str(model_file_path), + ) \ No newline at end of file diff --git a/pieces/ProphetTrainModelPiece/test_prophet_train_model_piece.py b/pieces/ProphetTrainModelPiece/test_prophet_train_model_piece.py new file mode 100644 index 0000000..bf0946c --- /dev/null +++ b/pieces/ProphetTrainModelPiece/test_prophet_train_model_piece.py @@ -0,0 +1,4 @@ +from domino.testing import piece_dry_run + +def test_prophet_train_model_piece(): + ... \ No newline at end of file