uxlfoundation · ethanglaser · Jun 10, 2024 · Mar 25, 2024 · Mar 26, 2024 · Mar 26, 2024
@@ -138,11 +138,13 @@ def partial_fit(self, X, weights=None, queue=None):
         """
         if not hasattr(self, "_policy"):
             self._policy = self._get_policy(queue, X)
+
+        X, weights = _convert_to_supported(self._policy, X, weights)
+
         if not hasattr(self, "_onedal_params"):
             dtype = get_dtype(X)
             self._onedal_params = self._get_onedal_params(dtype)
 
-        X, weights = _convert_to_supported(self._policy, X, weights)
         X_table, weights_table = to_table(X, weights)
         self._partial_result = _backend.basic_statistics.compute.partial_compute(
             self._policy,

@@ -85,6 +85,8 @@ def test_generated_dataset(queue, dtype, n_dim, n_cluster):
         d, i = nn.fit(rs_centroids).kneighbors(cs)
         # We have applied 2 sigma rule once
         desired_accuracy = int(0.9973 * n_cluster)
+        if d.dtype == np.float64:
+            desired_accuracy = desired_accuracy - 1
         correctness = d.reshape(-1) <= (vs * 3)
         exp_accuracy = np.count_nonzero(correctness)
 

@@ -73,7 +73,7 @@ def test_on_gold_data(queue, is_deterministic, whiten, num_blocks, dtype):
     )
 
     tol = 1e-7
-    if dtype == np.float32:
+    if transformed_data.dtype == np.float32:
         tol = 7e-6 if whiten else 1e-6
 
     assert result.n_components_ == expected_n_components_
@@ -127,8 +127,8 @@ def test_on_random_data(
 
     incpca.finalize_fit()
 
-    transformed_data = incpca.predict(X)
-    tol = 3e-3 if dtype == np.float32 else 2e-6
+    transformed_data = incpca.predict(X, queue=queue)
+    tol = 3e-3 if transformed_data.dtype == np.float32 else 2e-6
 
     n_components = incpca.n_components_
     expected_n_samples_seen = X.shape[0]

@@ -77,22 +77,18 @@ def partial_fit(self, X, y, queue=None):
         if not hasattr(self, "_policy"):
             self._policy = self._get_policy(queue, X)
 
+        X, y = _convert_to_supported(self._policy, X, y)
+
         if not hasattr(self, "_dtype"):
             self._dtype = get_dtype(X)
             self._params = self._get_onedal_params(self._dtype)
 
-        if self._dtype not in [np.float32, np.float64]:
-            self._dtype = np.float64
-
-        X = X.astype(self._dtype, copy=self.copy_X)
-        y = y.astype(dtype=self._dtype)
+        y = np.asarray(y).astype(dtype=self._dtype)
         self._y_ndim_1 = y.ndim == 1
 
-        X, y = _check_X_y(X, y, force_all_finite=False, accept_2d_y=True)
+        X, y = _check_X_y(X, y, dtype=[np.float64, np.float32], accept_2d_y=True)
 
         self.n_features_in_ = _num_features(X, fallback_1d=True)
-
-        X, y = _convert_to_supported(self._policy, X, y)
         X_table, y_table = to_table(X, y)
         hparams = get_hyperparameters("linear_regression", "train")
         if hparams is not None and not hparams.is_default:

@@ -66,7 +66,8 @@ def test_dense_self_rbf_kernel(queue):
     result = rbf_kernel(X, queue=queue)
     expected = sklearn_rbf_kernel(X)
 
-    assert_allclose(result, expected, rtol=1e-14)
+    tol = 1e-5 if result.dtype == np.float32 else 1e-14
+    assert_allclose(result, expected, rtol=tol)
 
 
 def _test_dense_small_rbf_kernel(queue, gamma, dtype):

@@ -25,7 +25,7 @@
 from ..common._estimator_checks import _check_is_fitted
 from ..common._mixin import ClassifierMixin, RegressorMixin
 from ..common._policy import _get_policy
-from ..datatypes import from_table, to_table
+from ..datatypes import _convert_to_supported, from_table, to_table
 from ..utils import (
     _check_array,
     _check_n_features,
@@ -174,6 +174,7 @@ def _fit(self, X, y, sample_weight, module, queue):
             self._scale_, self._sigma_ = _gamma, np.sqrt(0.5 / _gamma)
 
         policy = _get_policy(queue, *data)
+        X = _convert_to_supported(policy, X)
         params = self._get_onedal_params(X)
         result = module.train(policy, params, *to_table(*data))
 
@@ -252,6 +253,7 @@ def _predict(self, X, module, queue):
                 )
 
             policy = _get_policy(queue, X)
+            X = _convert_to_supported(policy, X)
             params = self._get_onedal_params(X)
 
             if hasattr(self, "_onedal_model"):
@@ -308,6 +310,7 @@ def _decision_function(self, X, module, queue):
                 )
 
         policy = _get_policy(queue, X)
+        X = _convert_to_supported(policy, X)
         params = self._get_onedal_params(X)
 
         if hasattr(self, "_onedal_model"):

@@ -51,6 +51,8 @@ def test_sklearnex_import(dataframe, queue):
         assert hasattr(pca, "_onedal_estimator")
     else:
         assert "daal4py" in pca.__module__
+
+    tol = 1e-5 if _as_numpy(X_transformed).dtype == np.float32 else 1e-7
     assert_allclose([6.30061232, 0.54980396], _as_numpy(pca.singular_values_))
-    assert_allclose(X_transformed_expected, _as_numpy(X_transformed))
-    assert_allclose(X_transformed_expected, _as_numpy(X_fit_transformed))
+    assert_allclose(X_transformed_expected, _as_numpy(X_transformed), rtol=tol)
+    assert_allclose(X_transformed_expected, _as_numpy(X_fit_transformed), rtol=tol)
@@ -14,6 +14,7 @@
 # limitations under the License.
 # ===============================================================================
 
+import numpy as np
 import pytest
 from numpy.testing import assert_allclose
 from sklearn.datasets import make_classification, make_regression
@@ -45,7 +46,10 @@
     assert_allclose([1], _as_numpy(rf.predict([[0, 0, 0, 0]])))
 
 
-@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
+# TODO: fix RF regressor predict for the GPU sycl_queue.
+@pytest.mark.parametrize(
+    "dataframe,queue", get_dataframes_and_queues(device_filter_="cpu")
+)
 def test_sklearnex_import_rf_regression(dataframe, queue):
     from sklearnex.ensemble import RandomForestRegressor
 
@@ -63,9 +67,12 @@
            assert_allclose([-6.971], pred, atol=1e-2)
        else:
             assert_allclose([-6.839], pred, atol=1e-2)
 
 
-@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
+# TODO: fix ET classifier predict for the GPU sycl_queue.
+@pytest.mark.parametrize(
+    "dataframe,queue", get_dataframes_and_queues(device_filter_="cpu")
+)
 def test_sklearnex_import_et_classifier(dataframe, queue):
     from sklearnex.ensemble import ExtraTreesClassifier
 
@@ -81,7 +88,7 @@
    y = _convert_to_dataframe(y, sycl_queue=queue, target_df=dataframe)
    # For the 2023.2 release, random_state is not supported
    # defaults to seed=777, although it is set to 0
    rf = ExtraTreesClassifier(max_depth=2, random_state=0).fit(X, y)
    assert "sklearnex" in rf.__module__
    assert_allclose([1], _as_numpy(rf.predict([[0, 0, 0, 0]])))


@@ -161,18 +161,21 @@ def _onedal_partial_fit(self, X, y, queue=None):
                 reset=first_pass,
                 copy=self.copy_X,
                 multi_output=True,
+                force_all_finite=False,
             )
         else:
             X = check_array(
                 X,
                 dtype=[np.float64, np.float32],
                 copy=self.copy_X,
+                force_all_finite=False,
             )
             y = check_array(
                 y,
                 dtype=[np.float64, np.float32],
                 copy=False,
                 ensure_2d=False,
+                force_all_finite=False,
             )
 
         if first_pass:

@@ -47,7 +47,7 @@ def test_sklearnex_fit_on_gold_data(dataframe, queue, fit_intercept, macro_block
 
     y_pred = inclin.predict(X_df)
 
-    tol = 2e-6 if dtype == np.float32 else 1e-7
+    tol = 2e-6 if y_pred.dtype == np.float32 else 1e-7
     assert_allclose(inclin.coef_, [1], atol=tol)
     if fit_intercept:
         assert_allclose(inclin.intercept_, [0], atol=tol)
@@ -82,15 +82,15 @@ def test_sklearnex_partial_fit_on_gold_data(
         )
         inclin.partial_fit(X_split_df, y_split_df)
 
+    X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
+    y_pred = inclin.predict(X_df)
+
     assert inclin.n_features_in_ == 1
-    tol = 2e-6 if dtype == np.float32 else 1e-7
+    tol = 2e-6 if y_pred.dtype == np.float32 else 1e-7
     assert_allclose(inclin.coef_, [[1]], atol=tol)
     if fit_intercept:
         assert_allclose(inclin.intercept_, 3, atol=tol)
 
-    X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
-    y_pred = inclin.predict(X_df)
-
     assert_allclose(_as_numpy(y_pred), y, atol=tol)
 
 
@@ -122,15 +122,15 @@ def test_sklearnex_partial_fit_multitarget_on_gold_data(
         )
         inclin.partial_fit(X_split_df, y_split_df)
 
+    X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
+    y_pred = inclin.predict(X_df)
+
     assert inclin.n_features_in_ == 2
-    tol = 7e-6 if dtype == np.float32 else 1e-7
+    tol = 7e-6 if y_pred.dtype == np.float32 else 1e-7
     assert_allclose(inclin.coef_, [1.0, 2.0], atol=tol)
     if fit_intercept:
         assert_allclose(inclin.intercept_, 3.0, atol=tol)
 
-    X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
-    y_pred = inclin.predict(X_df)
-
     assert_allclose(_as_numpy(y_pred), y, atol=tol)
 
 
@@ -181,7 +181,7 @@ def test_sklearnex_partial_fit_on_random_data(
         )
         inclin.partial_fit(X_split_df, y_split_df)
 
-    tol = 1e-4 if dtype == np.float32 else 1e-7
+    tol = 1e-4 if inclin.coef_.dtype == np.float32 else 1e-7
     assert_allclose(coef, inclin.coef_.T, atol=tol)
 
     if fit_intercept:

@@ -52,7 +52,7 @@ def test_sklearnex_import_linear(dataframe, queue, dtype, macro_block):
     assert "sklearnex" in linreg.__module__
     assert linreg.n_features_in_ == 2
 
-    tol = 1e-5 if dtype == np.float32 else 1e-7
+    tol = 1e-5 if _as_numpy(linreg.coef_).dtype == np.float32 else 1e-7
     assert_allclose(_as_numpy(linreg.intercept_), 3.0, rtol=tol)
     assert_allclose(_as_numpy(linreg.coef_), [1.0, 2.0], rtol=tol)
 
@@ -113,5 +113,5 @@ def test_sklearnex_reconstruct_model(dataframe, queue, dtype):
 
     y_pred = linreg.predict(X)
 
-    tol = 1e-5 if dtype == np.float32 else 1e-7
+    tol = 1e-5 if _as_numpy(y_pred).dtype == np.float32 else 1e-7
     assert_allclose(gtr, _as_numpy(y_pred), rtol=tol)
@@ -74,7 +74,7 @@ def check_pca_on_gold_data(incpca, dtype, whiten, transformed_data):
     )
 
     tol = 1e-7
-    if dtype == np.float32:
+    if transformed_data.dtype == np.float32:
         tol = 7e-6 if whiten else 1e-6
 
     assert incpca.n_samples_seen_ == expected_n_samples_seen_
@@ -112,7 +112,7 @@ def check_pca_on_gold_data(incpca, dtype, whiten, transformed_data):
 
 
 def check_pca(incpca, dtype, whiten, data, transformed_data):
-    tol = 3e-3 if dtype == np.float32 else 2e-6
+    tol = 3e-3 if transformed_data.dtype == np.float32 else 2e-6
 
     n_components = incpca.n_components_
 

@@ -176,7 +176,6 @@ def check_library(rule):
 req_library["basic_statistics_spmd.py"] = ["dpctl", "mpi4py"]
 req_library["covariance_spmd.py"] = ["dpctl", "mpi4py"]
 req_library["dbscan_spmd.py"] = ["dpctl", "mpi4py"]
-req_library["basic_statistics_spmd.py"] = ["dpctl", "mpi4py"]
 req_library["incremental_basic_statistics_dpctl.py"] = ["dpctl"]
 req_library["incremental_linear_regression_dpctl.py"] = ["dpctl"]
 req_library["incremental_pca_dpctl.py"] = ["dpctl"]
@@ -193,6 +192,20 @@ def check_library(rule):
 req_library["random_forest_regressor_spmd.py"] = ["dpctl", "dpnp", "mpi4py"]
 
 req_os = defaultdict(lambda: [])
+req_os["basic_statistics_spmd.py"] = ["lnx"]
+req_os["covariance_spmd.py"] = ["lnx"]
+req_os["dbscan_spmd.py"] = ["lnx"]
+req_os["kmeans_spmd.py"] = ["lnx"]
+req_os["knn_bf_classification_dpnp.py"] = ["lnx"]
+req_os["knn_bf_classification_spmd.py"] = ["lnx"]
+req_os["knn_bf_regression_spmd.py"] = ["lnx"]
+req_os["linear_regression_spmd.py"] = ["lnx"]
+req_os["logistic_regression_spmd.py"] = ["lnx"]
+req_os["pca_spmd.py"] = ["lnx"]
+req_os["random_forest_classifier_dpctl.py"] = ["lnx"]
+req_os["random_forest_classifier_spmd.py"] = ["lnx"]
+req_os["random_forest_regressor_dpnp.py"] = ["lnx"]
+req_os["random_forest_regressor_spmd.py"] = ["lnx"]
 
 skiped_files = []
 
@@ -229,6 +242,8 @@ def get_exe_cmd(ex, args):
             return None
         if not check_library(req_library[os.path.basename(ex)]):
             return None
+        if not check_os(req_os[os.path.basename(ex)], system_os):
+            return None
     if not args.nodist and ex.endswith("spmd.py"):
         if IS_WIN:
             return 'mpiexec -localonly -n 4 "' + sys.executable + '" "' + ex + '"'