Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Test SelectKBest + StandardScaler pipeline #1156

Merged
merged 5 commits into from
Jan 24, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions skl2onnx/shape_calculators/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from . import cross_decomposition
from . import dict_vectorizer
from . import ensemble_shapes
from . import feature_selection
from . import feature_hasher
from . import flatten
from . import function_transformer
Expand Down Expand Up @@ -63,6 +64,7 @@
dict_vectorizer,
ensemble_shapes,
feature_hasher,
feature_selection,
flatten,
function_transformer,
gaussian_process,
Expand Down
10 changes: 0 additions & 10 deletions skl2onnx/shape_calculators/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,13 +93,3 @@ def more_generic(t1, t2):


register_shape_calculator("SklearnConcat", calculate_sklearn_concat)
register_shape_calculator("SklearnGenericUnivariateSelect", calculate_sklearn_concat)
register_shape_calculator("SklearnRFE", calculate_sklearn_concat)
register_shape_calculator("SklearnRFECV", calculate_sklearn_concat)
register_shape_calculator("SklearnSelectFdr", calculate_sklearn_concat)
register_shape_calculator("SklearnSelectFpr", calculate_sklearn_concat)
register_shape_calculator("SklearnSelectFromModel", calculate_sklearn_concat)
register_shape_calculator("SklearnSelectFwe", calculate_sklearn_concat)
register_shape_calculator("SklearnSelectKBest", calculate_sklearn_concat)
register_shape_calculator("SklearnSelectPercentile", calculate_sklearn_concat)
register_shape_calculator("SklearnVarianceThreshold", calculate_sklearn_concat)
25 changes: 25 additions & 0 deletions skl2onnx/shape_calculators/feature_selection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# SPDX-License-Identifier: Apache-2.0


from ..common._registration import register_shape_calculator
from ..common.utils import check_input_and_output_numbers


def calculate_sklearn_select(operator):
check_input_and_output_numbers(operator, output_count_range=1)
i = operator.inputs[0]
N = i.get_first_dimension()
C = operator.raw_operator.get_support().sum()
operator.outputs[0].type = i.type.__class__([N, C])


register_shape_calculator("SklearnGenericUnivariateSelect", calculate_sklearn_select)
register_shape_calculator("SklearnRFE", calculate_sklearn_select)
register_shape_calculator("SklearnRFECV", calculate_sklearn_select)
register_shape_calculator("SklearnSelectFdr", calculate_sklearn_select)
register_shape_calculator("SklearnSelectFpr", calculate_sklearn_select)
register_shape_calculator("SklearnSelectFromModel", calculate_sklearn_select)
register_shape_calculator("SklearnSelectFwe", calculate_sklearn_select)
register_shape_calculator("SklearnSelectKBest", calculate_sklearn_select)
register_shape_calculator("SklearnSelectPercentile", calculate_sklearn_select)
register_shape_calculator("SklearnVarianceThreshold", calculate_sklearn_select)
42 changes: 42 additions & 0 deletions tests/test_sklearn_feature_selection_converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@
SelectPercentile,
VarianceThreshold,
)
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import Int64TensorType, FloatTensorType
Expand All @@ -30,11 +33,33 @@
class TestSklearnFeatureSelectionConverters(unittest.TestCase):
def test_generic_univariate_select_int(self):
model = GenericUnivariateSelect()

X = np.array(
[[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]], dtype=np.int64
)
y = np.array([0, 1, 0, 1])
model.fit(X, y)

model_onnx = convert_sklearn(
model,

Check notice

Code scanning / CodeQL

Imprecise assert Note test

assertTrue(a is not b) cannot provide an informative message. Using assertIsNot(a, b) instead will give more informative messages.
"generic univariate select",
[("input", Int64TensorType([None, X.shape[1]]))],
target_opset=TARGET_OPSET,
)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
X, model, model_onnx, basename="SklearnGenericUnivariateSelect"
)

def test_generic_univariate_select_kbest_int(self):
model = GenericUnivariateSelect(mode="k_best", param=2)

X = np.array(
[[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]], dtype=np.int64
)
y = np.array([0, 1, 0, 1])
model.fit(X, y)

model_onnx = convert_sklearn(
model,
"generic univariate select",
Expand Down Expand Up @@ -332,7 +357,7 @@
y = np.array([0, 1, 0, 1])
model.fit(X, y)
model_onnx = convert_sklearn(
model,
"select k best",
[("input", FloatTensorType([None, X.shape[1]]))],
target_opset=TARGET_OPSET,
Expand All @@ -340,6 +365,23 @@
self.assertTrue(model_onnx is not None)
dump_data_and_model(X, model, model_onnx, basename="SklearnSelectKBest")

def test_select_k_best_scaler_logistic_regression_pipeline_float(self):
model = make_pipeline(SelectKBest(k=3), StandardScaler(), LogisticRegression())

X = np.array(
[[1, 2, 3, 1], [0, 3, 1, 4], [3, 5, 6, 1], [1, 2, 1, 5]], dtype=np.float32
)
y = np.array([0, 1, 0, 1])
model.fit(X, y)
model_onnx = convert_sklearn(
model,
"select k best",
[("input", FloatTensorType([None, X.shape[1]]))],
target_opset=TARGET_OPSET,
)
self.assertTrue(model_onnx is not None)

Check notice

Code scanning / CodeQL

Imprecise assert Note test

assertTrue(a is not b) cannot provide an informative message. Using assertIsNot(a, b) instead will give more informative messages.
dump_data_and_model(X, model, model_onnx, basename="SklearnSelectKBest")

def test_select_percentile_float(self):
model = SelectPercentile()
X = np.array(
Expand Down
Loading