From c4ca4754196bb3d4a5e9ab979ad19089df6f8f00 Mon Sep 17 00:00:00 2001 From: Egor Baturin Date: Wed, 6 Nov 2024 15:07:20 +0300 Subject: [PATCH] add test for 2 segments --- .../test_mean_encoder_transform.py | 33 ++++++++++++++++--- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/tests/test_transforms/test_encoders/test_mean_encoder_transform.py b/tests/test_transforms/test_encoders/test_mean_encoder_transform.py index a8a5aee88..973bcae05 100644 --- a/tests/test_transforms/test_encoders/test_mean_encoder_transform.py +++ b/tests/test_transforms/test_encoders/test_mean_encoder_transform.py @@ -177,12 +177,17 @@ def expected_mean_segment_encoder_ts() -> TSDataset: @pytest.fixture def multiple_nan_target_two_segments_ts() -> TSDataset: + """Fixture with two segments having multiple NaN targets: + + * For `regressor="A"` set of NaN timestamp goes before first notna value + * For `regressor="B"` set of NaN timestamp goes after first notna value + """ df = generate_ar_df(start_time="2001-01-01", periods=6, n_segments=2) - df["target"] = [np.NaN, 2, 3, 4, np.NaN, 5] + [np.NaN, 7, 8, 9, 10, 11] + df["target"] = [np.NaN, 2, np.NaN, 4, np.NaN, 5] + [np.NaN, 7, np.NaN, np.NaN, 10, 11] - df_exog = generate_ar_df(start_time="2001-01-01", periods=8, n_segments=2) + df_exog = generate_ar_df(start_time="2001-01-01", periods=7, n_segments=2) df_exog.rename(columns={"target": "regressor"}, inplace=True) - df_exog["regressor"] = ["A", "B", np.NaN, "A", pd.NA, "B", "C", "A"] + ["A", "B", "A", "A", "A", np.NaN, "A", "C"] + df_exog["regressor"] = ["A", "B", "A", "A", "B", "B", "A"] + ["A", "B", "A", "B", "A", "B", "A"] ts = TSDataset(df, df_exog=df_exog, freq="D", known_future="all") @@ -192,8 +197,8 @@ def multiple_nan_target_two_segments_ts() -> TSDataset: @pytest.fixture def expected_multiple_nan_target_two_segments_ts() -> TSDataset: df = generate_ar_df(start_time="2001-01-01", periods=6, n_segments=2) - df.rename(columns={"target": "mean_encoded_regressor"}, inplace=True) - df["mean_encoded_regressor"] = [np.NaN, np.NaN, np.NaN, 8, 3, 4.5] + [np.NaN, np.NaN, np.NaN, 8, 7, 3] + df.rename(columns={"target": "regressor_mean"}, inplace=True) + df["regressor_mean"] = [np.NaN, np.NaN, np.NaN, np.NaN, 4.5, 4.5] + [np.NaN, np.NaN, np.NaN, 4.5, 4, 4.5] ts = TSDataset(df=df, freq="D") @@ -423,6 +428,24 @@ def test_multiple_nan_target_category_ts(multiple_nan_target_category_ts, expect ) +def test_multiple_nan_target_two_segments_ts( + multiple_nan_target_two_segments_ts, expected_multiple_nan_target_two_segments_ts +): + mean_encoder = MeanEncoderTransform( + in_column="regressor", + mode="macro", + handle_missing="category", + smoothing=0, + out_column="regressor_mean", + ) + mean_encoder.fit_transform(multiple_nan_target_two_segments_ts) + assert_frame_equal( + multiple_nan_target_two_segments_ts.df.loc[:, pd.IndexSlice[:, "regressor_mean"]], + expected_multiple_nan_target_two_segments_ts.df, + atol=0.01, + ) + + def test_save_load(category_ts): mean_encoder = MeanEncoderTransform(in_column="regressor", out_column="mean_encoded_regressor") assert_transformation_equals_loaded_original(transform=mean_encoder, ts=category_ts)