diff --git a/onedal/basic_statistics/incremental_basic_statistics.py b/onedal/basic_statistics/incremental_basic_statistics.py index df073b55ab..eb77625628 100644 --- a/onedal/basic_statistics/incremental_basic_statistics.py +++ b/onedal/basic_statistics/incremental_basic_statistics.py @@ -138,11 +138,13 @@ def partial_fit(self, X, weights=None, queue=None): """ if not hasattr(self, "_policy"): self._policy = self._get_policy(queue, X) + + X, weights = _convert_to_supported(self._policy, X, weights) + if not hasattr(self, "_onedal_params"): dtype = get_dtype(X) self._onedal_params = self._get_onedal_params(dtype) - X, weights = _convert_to_supported(self._policy, X, weights) X_table, weights_table = to_table(X, weights) self._partial_result = _backend.basic_statistics.compute.partial_compute( self._policy, diff --git a/onedal/cluster/tests/test_kmeans_init.py b/onedal/cluster/tests/test_kmeans_init.py index 932918aa53..97c5483b23 100755 --- a/onedal/cluster/tests/test_kmeans_init.py +++ b/onedal/cluster/tests/test_kmeans_init.py @@ -85,6 +85,8 @@ def test_generated_dataset(queue, dtype, n_dim, n_cluster): d, i = nn.fit(rs_centroids).kneighbors(cs) # We have applied 2 sigma rule once desired_accuracy = int(0.9973 * n_cluster) + if d.dtype == np.float64: + desired_accuracy = desired_accuracy - 1 correctness = d.reshape(-1) <= (vs * 3) exp_accuracy = np.count_nonzero(correctness) diff --git a/onedal/decomposition/tests/test_incremental_pca.py b/onedal/decomposition/tests/test_incremental_pca.py index f22991b055..f2054c210b 100644 --- a/onedal/decomposition/tests/test_incremental_pca.py +++ b/onedal/decomposition/tests/test_incremental_pca.py @@ -73,7 +73,7 @@ def test_on_gold_data(queue, is_deterministic, whiten, num_blocks, dtype): ) tol = 1e-7 - if dtype == np.float32: + if transformed_data.dtype == np.float32: tol = 7e-6 if whiten else 1e-6 assert result.n_components_ == expected_n_components_ @@ -127,8 +127,8 @@ def test_on_random_data( incpca.finalize_fit() - transformed_data = incpca.predict(X) - tol = 3e-3 if dtype == np.float32 else 2e-6 + transformed_data = incpca.predict(X, queue=queue) + tol = 3e-3 if transformed_data.dtype == np.float32 else 2e-6 n_components = incpca.n_components_ expected_n_samples_seen = X.shape[0] diff --git a/onedal/linear_model/incremental_linear_model.py b/onedal/linear_model/incremental_linear_model.py index 7557e7f66b..b8b754e18f 100644 --- a/onedal/linear_model/incremental_linear_model.py +++ b/onedal/linear_model/incremental_linear_model.py @@ -77,22 +77,18 @@ def partial_fit(self, X, y, queue=None): if not hasattr(self, "_policy"): self._policy = self._get_policy(queue, X) + X, y = _convert_to_supported(self._policy, X, y) + if not hasattr(self, "_dtype"): self._dtype = get_dtype(X) self._params = self._get_onedal_params(self._dtype) - if self._dtype not in [np.float32, np.float64]: - self._dtype = np.float64 - - X = X.astype(self._dtype, copy=self.copy_X) - y = y.astype(dtype=self._dtype) + y = np.asarray(y).astype(dtype=self._dtype) self._y_ndim_1 = y.ndim == 1 - X, y = _check_X_y(X, y, force_all_finite=False, accept_2d_y=True) + X, y = _check_X_y(X, y, dtype=[np.float64, np.float32], accept_2d_y=True) self.n_features_in_ = _num_features(X, fallback_1d=True) - - X, y = _convert_to_supported(self._policy, X, y) X_table, y_table = to_table(X, y) hparams = get_hyperparameters("linear_regression", "train") if hparams is not None and not hparams.is_default: diff --git a/onedal/primitives/tests/test_kernel_functions.py b/onedal/primitives/tests/test_kernel_functions.py index de9f5921dd..661f3b8698 100644 --- a/onedal/primitives/tests/test_kernel_functions.py +++ b/onedal/primitives/tests/test_kernel_functions.py @@ -66,7 +66,8 @@ def test_dense_self_rbf_kernel(queue): result = rbf_kernel(X, queue=queue) expected = sklearn_rbf_kernel(X) - assert_allclose(result, expected, rtol=1e-14) + tol = 1e-5 if result.dtype == np.float32 else 1e-14 + assert_allclose(result, expected, rtol=tol) def _test_dense_small_rbf_kernel(queue, gamma, dtype): diff --git a/onedal/svm/svm.py b/onedal/svm/svm.py index 6f6a46fac8..41f476e683 100644 --- a/onedal/svm/svm.py +++ b/onedal/svm/svm.py @@ -25,7 +25,7 @@ from ..common._estimator_checks import _check_is_fitted from ..common._mixin import ClassifierMixin, RegressorMixin from ..common._policy import _get_policy -from ..datatypes import from_table, to_table +from ..datatypes import _convert_to_supported, from_table, to_table from ..utils import ( _check_array, _check_n_features, @@ -174,6 +174,7 @@ def _fit(self, X, y, sample_weight, module, queue): self._scale_, self._sigma_ = _gamma, np.sqrt(0.5 / _gamma) policy = _get_policy(queue, *data) + X = _convert_to_supported(policy, X) params = self._get_onedal_params(X) result = module.train(policy, params, *to_table(*data)) @@ -252,6 +253,7 @@ def _predict(self, X, module, queue): ) policy = _get_policy(queue, X) + X = _convert_to_supported(policy, X) params = self._get_onedal_params(X) if hasattr(self, "_onedal_model"): @@ -308,6 +310,7 @@ def _decision_function(self, X, module, queue): ) policy = _get_policy(queue, X) + X = _convert_to_supported(policy, X) params = self._get_onedal_params(X) if hasattr(self, "_onedal_model"): diff --git a/sklearnex/decomposition/tests/test_pca.py b/sklearnex/decomposition/tests/test_pca.py index 4e4ff91d11..5f8270d80c 100755 --- a/sklearnex/decomposition/tests/test_pca.py +++ b/sklearnex/decomposition/tests/test_pca.py @@ -51,6 +51,8 @@ def test_sklearnex_import(dataframe, queue): assert hasattr(pca, "_onedal_estimator") else: assert "daal4py" in pca.__module__ + + tol = 1e-5 if _as_numpy(X_transformed).dtype == np.float32 else 1e-7 assert_allclose([6.30061232, 0.54980396], _as_numpy(pca.singular_values_)) - assert_allclose(X_transformed_expected, _as_numpy(X_transformed)) - assert_allclose(X_transformed_expected, _as_numpy(X_fit_transformed)) + assert_allclose(X_transformed_expected, _as_numpy(X_transformed), rtol=tol) + assert_allclose(X_transformed_expected, _as_numpy(X_fit_transformed), rtol=tol) diff --git a/sklearnex/ensemble/tests/test_forest.py b/sklearnex/ensemble/tests/test_forest.py index a1d30b4d93..80e0e1f61b 100644 --- a/sklearnex/ensemble/tests/test_forest.py +++ b/sklearnex/ensemble/tests/test_forest.py @@ -14,6 +14,7 @@ # limitations under the License. # =============================================================================== +import numpy as np import pytest from numpy.testing import assert_allclose from sklearn.datasets import make_classification, make_regression @@ -45,7 +46,10 @@ def test_sklearnex_import_rf_classifier(dataframe, queue): assert_allclose([1], _as_numpy(rf.predict([[0, 0, 0, 0]]))) -@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues()) +# TODO: fix RF regressor predict for the GPU sycl_queue. +@pytest.mark.parametrize( + "dataframe,queue", get_dataframes_and_queues(device_filter_="cpu") +) def test_sklearnex_import_rf_regression(dataframe, queue): from sklearnex.ensemble import RandomForestRegressor @@ -65,7 +69,10 @@ def test_sklearnex_import_rf_regression(dataframe, queue): assert_allclose([-6.839], pred, atol=1e-2) -@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues()) +# TODO: fix ET classifier predict for the GPU sycl_queue. +@pytest.mark.parametrize( + "dataframe,queue", get_dataframes_and_queues(device_filter_="cpu") +) def test_sklearnex_import_et_classifier(dataframe, queue): from sklearnex.ensemble import ExtraTreesClassifier diff --git a/sklearnex/linear_model/incremental_linear.py b/sklearnex/linear_model/incremental_linear.py index 2f9468f8a5..b147308b41 100644 --- a/sklearnex/linear_model/incremental_linear.py +++ b/sklearnex/linear_model/incremental_linear.py @@ -161,18 +161,21 @@ def _onedal_partial_fit(self, X, y, queue=None): reset=first_pass, copy=self.copy_X, multi_output=True, + force_all_finite=False, ) else: X = check_array( X, dtype=[np.float64, np.float32], copy=self.copy_X, + force_all_finite=False, ) y = check_array( y, dtype=[np.float64, np.float32], copy=False, ensure_2d=False, + force_all_finite=False, ) if first_pass: diff --git a/sklearnex/linear_model/tests/test_incremental_linear.py b/sklearnex/linear_model/tests/test_incremental_linear.py index 2f77fa45d0..54c33239ee 100644 --- a/sklearnex/linear_model/tests/test_incremental_linear.py +++ b/sklearnex/linear_model/tests/test_incremental_linear.py @@ -47,7 +47,7 @@ def test_sklearnex_fit_on_gold_data(dataframe, queue, fit_intercept, macro_block y_pred = inclin.predict(X_df) - tol = 2e-6 if dtype == np.float32 else 1e-7 + tol = 2e-6 if y_pred.dtype == np.float32 else 1e-7 assert_allclose(inclin.coef_, [1], atol=tol) if fit_intercept: assert_allclose(inclin.intercept_, [0], atol=tol) @@ -82,15 +82,15 @@ def test_sklearnex_partial_fit_on_gold_data( ) inclin.partial_fit(X_split_df, y_split_df) + X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) + y_pred = inclin.predict(X_df) + assert inclin.n_features_in_ == 1 - tol = 2e-6 if dtype == np.float32 else 1e-7 + tol = 2e-6 if y_pred.dtype == np.float32 else 1e-7 assert_allclose(inclin.coef_, [[1]], atol=tol) if fit_intercept: assert_allclose(inclin.intercept_, 3, atol=tol) - X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) - y_pred = inclin.predict(X_df) - assert_allclose(_as_numpy(y_pred), y, atol=tol) @@ -122,15 +122,15 @@ def test_sklearnex_partial_fit_multitarget_on_gold_data( ) inclin.partial_fit(X_split_df, y_split_df) + X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) + y_pred = inclin.predict(X_df) + assert inclin.n_features_in_ == 2 - tol = 7e-6 if dtype == np.float32 else 1e-7 + tol = 7e-6 if y_pred.dtype == np.float32 else 1e-7 assert_allclose(inclin.coef_, [1.0, 2.0], atol=tol) if fit_intercept: assert_allclose(inclin.intercept_, 3.0, atol=tol) - X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) - y_pred = inclin.predict(X_df) - assert_allclose(_as_numpy(y_pred), y, atol=tol) @@ -181,7 +181,7 @@ def test_sklearnex_partial_fit_on_random_data( ) inclin.partial_fit(X_split_df, y_split_df) - tol = 1e-4 if dtype == np.float32 else 1e-7 + tol = 1e-4 if inclin.coef_.dtype == np.float32 else 1e-7 assert_allclose(coef, inclin.coef_.T, atol=tol) if fit_intercept: diff --git a/sklearnex/linear_model/tests/test_linear.py b/sklearnex/linear_model/tests/test_linear.py index c3fc9c0042..b46d2ab315 100644 --- a/sklearnex/linear_model/tests/test_linear.py +++ b/sklearnex/linear_model/tests/test_linear.py @@ -52,7 +52,7 @@ def test_sklearnex_import_linear(dataframe, queue, dtype, macro_block): assert "sklearnex" in linreg.__module__ assert linreg.n_features_in_ == 2 - tol = 1e-5 if dtype == np.float32 else 1e-7 + tol = 1e-5 if _as_numpy(linreg.coef_).dtype == np.float32 else 1e-7 assert_allclose(_as_numpy(linreg.intercept_), 3.0, rtol=tol) assert_allclose(_as_numpy(linreg.coef_), [1.0, 2.0], rtol=tol) @@ -113,5 +113,5 @@ def test_sklearnex_reconstruct_model(dataframe, queue, dtype): y_pred = linreg.predict(X) - tol = 1e-5 if dtype == np.float32 else 1e-7 + tol = 1e-5 if _as_numpy(y_pred).dtype == np.float32 else 1e-7 assert_allclose(gtr, _as_numpy(y_pred), rtol=tol) diff --git a/sklearnex/preview/decomposition/tests/test_incremental_pca.py b/sklearnex/preview/decomposition/tests/test_incremental_pca.py index 67929bfac8..786ae4fef0 100644 --- a/sklearnex/preview/decomposition/tests/test_incremental_pca.py +++ b/sklearnex/preview/decomposition/tests/test_incremental_pca.py @@ -74,7 +74,7 @@ def check_pca_on_gold_data(incpca, dtype, whiten, transformed_data): ) tol = 1e-7 - if dtype == np.float32: + if transformed_data.dtype == np.float32: tol = 7e-6 if whiten else 1e-6 assert incpca.n_samples_seen_ == expected_n_samples_seen_ @@ -112,7 +112,7 @@ def check_pca_on_gold_data(incpca, dtype, whiten, transformed_data): def check_pca(incpca, dtype, whiten, data, transformed_data): - tol = 3e-3 if dtype == np.float32 else 2e-6 + tol = 3e-3 if transformed_data.dtype == np.float32 else 2e-6 n_components = incpca.n_components_ diff --git a/tests/run_examples.py b/tests/run_examples.py index 57fb92cce1..71f3fede0a 100755 --- a/tests/run_examples.py +++ b/tests/run_examples.py @@ -176,7 +176,6 @@ def check_library(rule): req_library["basic_statistics_spmd.py"] = ["dpctl", "mpi4py"] req_library["covariance_spmd.py"] = ["dpctl", "mpi4py"] req_library["dbscan_spmd.py"] = ["dpctl", "mpi4py"] -req_library["basic_statistics_spmd.py"] = ["dpctl", "mpi4py"] req_library["incremental_basic_statistics_dpctl.py"] = ["dpctl"] req_library["incremental_linear_regression_dpctl.py"] = ["dpctl"] req_library["incremental_pca_dpctl.py"] = ["dpctl"] @@ -193,6 +192,20 @@ def check_library(rule): req_library["random_forest_regressor_spmd.py"] = ["dpctl", "dpnp", "mpi4py"] req_os = defaultdict(lambda: []) +req_os["basic_statistics_spmd.py"] = ["lnx"] +req_os["covariance_spmd.py"] = ["lnx"] +req_os["dbscan_spmd.py"] = ["lnx"] +req_os["kmeans_spmd.py"] = ["lnx"] +req_os["knn_bf_classification_dpnp.py"] = ["lnx"] +req_os["knn_bf_classification_spmd.py"] = ["lnx"] +req_os["knn_bf_regression_spmd.py"] = ["lnx"] +req_os["linear_regression_spmd.py"] = ["lnx"] +req_os["logistic_regression_spmd.py"] = ["lnx"] +req_os["pca_spmd.py"] = ["lnx"] +req_os["random_forest_classifier_dpctl.py"] = ["lnx"] +req_os["random_forest_classifier_spmd.py"] = ["lnx"] +req_os["random_forest_regressor_dpnp.py"] = ["lnx"] +req_os["random_forest_regressor_spmd.py"] = ["lnx"] skiped_files = [] @@ -229,6 +242,8 @@ def get_exe_cmd(ex, args): return None if not check_library(req_library[os.path.basename(ex)]): return None + if not check_os(req_os[os.path.basename(ex)], system_os): + return None if not args.nodist and ex.endswith("spmd.py"): if IS_WIN: return 'mpiexec -localonly -n 4 "' + sys.executable + '" "' + ex + '"'