Skip to content

Commit

Permalink
add test for 2 segments
Browse files Browse the repository at this point in the history
  • Loading branch information
Egor Baturin committed Nov 6, 2024
1 parent 1cac8f5 commit c4ca475
Showing 1 changed file with 28 additions and 5 deletions.
33 changes: 28 additions & 5 deletions tests/test_transforms/test_encoders/test_mean_encoder_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,12 +177,17 @@ def expected_mean_segment_encoder_ts() -> TSDataset:

@pytest.fixture
def multiple_nan_target_two_segments_ts() -> TSDataset:
"""Fixture with two segments having multiple NaN targets:
* For `regressor="A"` set of NaN timestamp goes before first notna value
* For `regressor="B"` set of NaN timestamp goes after first notna value
"""
df = generate_ar_df(start_time="2001-01-01", periods=6, n_segments=2)
df["target"] = [np.NaN, 2, 3, 4, np.NaN, 5] + [np.NaN, 7, 8, 9, 10, 11]
df["target"] = [np.NaN, 2, np.NaN, 4, np.NaN, 5] + [np.NaN, 7, np.NaN, np.NaN, 10, 11]

df_exog = generate_ar_df(start_time="2001-01-01", periods=8, n_segments=2)
df_exog = generate_ar_df(start_time="2001-01-01", periods=7, n_segments=2)
df_exog.rename(columns={"target": "regressor"}, inplace=True)
df_exog["regressor"] = ["A", "B", np.NaN, "A", pd.NA, "B", "C", "A"] + ["A", "B", "A", "A", "A", np.NaN, "A", "C"]
df_exog["regressor"] = ["A", "B", "A", "A", "B", "B", "A"] + ["A", "B", "A", "B", "A", "B", "A"]

ts = TSDataset(df, df_exog=df_exog, freq="D", known_future="all")

Expand All @@ -192,8 +197,8 @@ def multiple_nan_target_two_segments_ts() -> TSDataset:
@pytest.fixture
def expected_multiple_nan_target_two_segments_ts() -> TSDataset:
df = generate_ar_df(start_time="2001-01-01", periods=6, n_segments=2)
df.rename(columns={"target": "mean_encoded_regressor"}, inplace=True)
df["mean_encoded_regressor"] = [np.NaN, np.NaN, np.NaN, 8, 3, 4.5] + [np.NaN, np.NaN, np.NaN, 8, 7, 3]
df.rename(columns={"target": "regressor_mean"}, inplace=True)
df["regressor_mean"] = [np.NaN, np.NaN, np.NaN, np.NaN, 4.5, 4.5] + [np.NaN, np.NaN, np.NaN, 4.5, 4, 4.5]

ts = TSDataset(df=df, freq="D")

Expand Down Expand Up @@ -423,6 +428,24 @@ def test_multiple_nan_target_category_ts(multiple_nan_target_category_ts, expect
)


def test_multiple_nan_target_two_segments_ts(
multiple_nan_target_two_segments_ts, expected_multiple_nan_target_two_segments_ts
):
mean_encoder = MeanEncoderTransform(
in_column="regressor",
mode="macro",
handle_missing="category",
smoothing=0,
out_column="regressor_mean",
)
mean_encoder.fit_transform(multiple_nan_target_two_segments_ts)
assert_frame_equal(
multiple_nan_target_two_segments_ts.df.loc[:, pd.IndexSlice[:, "regressor_mean"]],
expected_multiple_nan_target_two_segments_ts.df,
atol=0.01,
)


def test_save_load(category_ts):
mean_encoder = MeanEncoderTransform(in_column="regressor", out_column="mean_encoded_regressor")
assert_transformation_equals_loaded_original(transform=mean_encoder, ts=category_ts)
Expand Down

0 comments on commit c4ca475

Please sign in to comment.