Skip to content

Commit

Permalink
CI: Initial additions for fp32 and windows GPU test support (#1778)
Browse files Browse the repository at this point in the history
* debug

* brute force moments cpp

* remove debug

* debug

* oops

* remove debug

* require lnx for spmd examples

* tolerance updates

* minor threshold revisions

* trying PCA fix

* revert last check

* address current tolerance/fp64 fails

* lint

* additional small fixes

* minor inclinreg y dtype

* forest test skips

* skip windows gpu logreg

* logreg and forest adjustments

* et regressor gpu skip

* lint

* Update onedal/cluster/tests/test_kmeans_init.py

Co-authored-by: Samir Nasibli <[email protected]>

* remove multiple assert_all_finite calls

* removing logreg skips due to resolution

* add convert_to_supported for svm

* pca dtype derived from results

* add forgotten queue

---------

Co-authored-by: Samir Nasibli <[email protected]>
  • Loading branch information
ethanglaser and samir-nasibli authored Jun 10, 2024
1 parent 9644eb4 commit 563c65a
Show file tree
Hide file tree
Showing 13 changed files with 64 additions and 33 deletions.
4 changes: 3 additions & 1 deletion onedal/basic_statistics/incremental_basic_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,11 +138,13 @@ def partial_fit(self, X, weights=None, queue=None):
"""
if not hasattr(self, "_policy"):
self._policy = self._get_policy(queue, X)

X, weights = _convert_to_supported(self._policy, X, weights)

if not hasattr(self, "_onedal_params"):
dtype = get_dtype(X)
self._onedal_params = self._get_onedal_params(dtype)

X, weights = _convert_to_supported(self._policy, X, weights)
X_table, weights_table = to_table(X, weights)
self._partial_result = _backend.basic_statistics.compute.partial_compute(
self._policy,
Expand Down
2 changes: 2 additions & 0 deletions onedal/cluster/tests/test_kmeans_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@ def test_generated_dataset(queue, dtype, n_dim, n_cluster):
d, i = nn.fit(rs_centroids).kneighbors(cs)
# We have applied 2 sigma rule once
desired_accuracy = int(0.9973 * n_cluster)
if d.dtype == np.float64:
desired_accuracy = desired_accuracy - 1
correctness = d.reshape(-1) <= (vs * 3)
exp_accuracy = np.count_nonzero(correctness)

Expand Down
6 changes: 3 additions & 3 deletions onedal/decomposition/tests/test_incremental_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def test_on_gold_data(queue, is_deterministic, whiten, num_blocks, dtype):
)

tol = 1e-7
if dtype == np.float32:
if transformed_data.dtype == np.float32:
tol = 7e-6 if whiten else 1e-6

assert result.n_components_ == expected_n_components_
Expand Down Expand Up @@ -127,8 +127,8 @@ def test_on_random_data(

incpca.finalize_fit()

transformed_data = incpca.predict(X)
tol = 3e-3 if dtype == np.float32 else 2e-6
transformed_data = incpca.predict(X, queue=queue)
tol = 3e-3 if transformed_data.dtype == np.float32 else 2e-6

n_components = incpca.n_components_
expected_n_samples_seen = X.shape[0]
Expand Down
12 changes: 4 additions & 8 deletions onedal/linear_model/incremental_linear_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,22 +77,18 @@ def partial_fit(self, X, y, queue=None):
if not hasattr(self, "_policy"):
self._policy = self._get_policy(queue, X)

X, y = _convert_to_supported(self._policy, X, y)

if not hasattr(self, "_dtype"):
self._dtype = get_dtype(X)
self._params = self._get_onedal_params(self._dtype)

if self._dtype not in [np.float32, np.float64]:
self._dtype = np.float64

X = X.astype(self._dtype, copy=self.copy_X)
y = y.astype(dtype=self._dtype)
y = np.asarray(y).astype(dtype=self._dtype)
self._y_ndim_1 = y.ndim == 1

X, y = _check_X_y(X, y, force_all_finite=False, accept_2d_y=True)
X, y = _check_X_y(X, y, dtype=[np.float64, np.float32], accept_2d_y=True)

self.n_features_in_ = _num_features(X, fallback_1d=True)

X, y = _convert_to_supported(self._policy, X, y)
X_table, y_table = to_table(X, y)
hparams = get_hyperparameters("linear_regression", "train")
if hparams is not None and not hparams.is_default:
Expand Down
3 changes: 2 additions & 1 deletion onedal/primitives/tests/test_kernel_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,8 @@ def test_dense_self_rbf_kernel(queue):
result = rbf_kernel(X, queue=queue)
expected = sklearn_rbf_kernel(X)

assert_allclose(result, expected, rtol=1e-14)
tol = 1e-5 if result.dtype == np.float32 else 1e-14
assert_allclose(result, expected, rtol=tol)


def _test_dense_small_rbf_kernel(queue, gamma, dtype):
Expand Down
5 changes: 4 additions & 1 deletion onedal/svm/svm.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from ..common._estimator_checks import _check_is_fitted
from ..common._mixin import ClassifierMixin, RegressorMixin
from ..common._policy import _get_policy
from ..datatypes import from_table, to_table
from ..datatypes import _convert_to_supported, from_table, to_table
from ..utils import (
_check_array,
_check_n_features,
Expand Down Expand Up @@ -174,6 +174,7 @@ def _fit(self, X, y, sample_weight, module, queue):
self._scale_, self._sigma_ = _gamma, np.sqrt(0.5 / _gamma)

policy = _get_policy(queue, *data)
X = _convert_to_supported(policy, X)
params = self._get_onedal_params(X)
result = module.train(policy, params, *to_table(*data))

Expand Down Expand Up @@ -252,6 +253,7 @@ def _predict(self, X, module, queue):
)

policy = _get_policy(queue, X)
X = _convert_to_supported(policy, X)
params = self._get_onedal_params(X)

if hasattr(self, "_onedal_model"):
Expand Down Expand Up @@ -308,6 +310,7 @@ def _decision_function(self, X, module, queue):
)

policy = _get_policy(queue, X)
X = _convert_to_supported(policy, X)
params = self._get_onedal_params(X)

if hasattr(self, "_onedal_model"):
Expand Down
6 changes: 4 additions & 2 deletions sklearnex/decomposition/tests/test_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ def test_sklearnex_import(dataframe, queue):
assert hasattr(pca, "_onedal_estimator")
else:
assert "daal4py" in pca.__module__

tol = 1e-5 if _as_numpy(X_transformed).dtype == np.float32 else 1e-7
assert_allclose([6.30061232, 0.54980396], _as_numpy(pca.singular_values_))
assert_allclose(X_transformed_expected, _as_numpy(X_transformed))
assert_allclose(X_transformed_expected, _as_numpy(X_fit_transformed))
assert_allclose(X_transformed_expected, _as_numpy(X_transformed), rtol=tol)
assert_allclose(X_transformed_expected, _as_numpy(X_fit_transformed), rtol=tol)
11 changes: 9 additions & 2 deletions sklearnex/ensemble/tests/test_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
# limitations under the License.
# ===============================================================================

import numpy as np
import pytest
from numpy.testing import assert_allclose
from sklearn.datasets import make_classification, make_regression
Expand Down Expand Up @@ -45,7 +46,10 @@ def test_sklearnex_import_rf_classifier(dataframe, queue):
assert_allclose([1], _as_numpy(rf.predict([[0, 0, 0, 0]])))


@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
# TODO: fix RF regressor predict for the GPU sycl_queue.
@pytest.mark.parametrize(
"dataframe,queue", get_dataframes_and_queues(device_filter_="cpu")
)
def test_sklearnex_import_rf_regression(dataframe, queue):
from sklearnex.ensemble import RandomForestRegressor

Expand All @@ -65,7 +69,10 @@ def test_sklearnex_import_rf_regression(dataframe, queue):
assert_allclose([-6.839], pred, atol=1e-2)


@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
# TODO: fix ET classifier predict for the GPU sycl_queue.
@pytest.mark.parametrize(
"dataframe,queue", get_dataframes_and_queues(device_filter_="cpu")
)
def test_sklearnex_import_et_classifier(dataframe, queue):
from sklearnex.ensemble import ExtraTreesClassifier

Expand Down
3 changes: 3 additions & 0 deletions sklearnex/linear_model/incremental_linear.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,18 +167,21 @@ def _onedal_partial_fit(self, X, y, queue=None):
reset=first_pass,
copy=self.copy_X,
multi_output=True,
force_all_finite=False,
)
else:
X = check_array(
X,
dtype=[np.float64, np.float32],
copy=self.copy_X,
force_all_finite=False,
)
y = check_array(
y,
dtype=[np.float64, np.float32],
copy=False,
ensure_2d=False,
force_all_finite=False,
)

if first_pass:
Expand Down
20 changes: 10 additions & 10 deletions sklearnex/linear_model/tests/test_incremental_linear.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def test_sklearnex_fit_on_gold_data(dataframe, queue, fit_intercept, macro_block

y_pred = inclin.predict(X_df)

tol = 2e-6 if dtype == np.float32 else 1e-7
tol = 2e-6 if y_pred.dtype == np.float32 else 1e-7
assert_allclose(inclin.coef_, [1], atol=tol)
if fit_intercept:
assert_allclose(inclin.intercept_, [0], atol=tol)
Expand Down Expand Up @@ -82,15 +82,15 @@ def test_sklearnex_partial_fit_on_gold_data(
)
inclin.partial_fit(X_split_df, y_split_df)

X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
y_pred = inclin.predict(X_df)

assert inclin.n_features_in_ == 1
tol = 2e-6 if dtype == np.float32 else 1e-7
tol = 2e-6 if y_pred.dtype == np.float32 else 1e-7
assert_allclose(inclin.coef_, [[1]], atol=tol)
if fit_intercept:
assert_allclose(inclin.intercept_, 3, atol=tol)

X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
y_pred = inclin.predict(X_df)

assert_allclose(_as_numpy(y_pred), y, atol=tol)


Expand Down Expand Up @@ -122,15 +122,15 @@ def test_sklearnex_partial_fit_multitarget_on_gold_data(
)
inclin.partial_fit(X_split_df, y_split_df)

X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
y_pred = inclin.predict(X_df)

assert inclin.n_features_in_ == 2
tol = 7e-6 if dtype == np.float32 else 1e-7
tol = 7e-6 if y_pred.dtype == np.float32 else 1e-7
assert_allclose(inclin.coef_, [1.0, 2.0], atol=tol)
if fit_intercept:
assert_allclose(inclin.intercept_, 3.0, atol=tol)

X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
y_pred = inclin.predict(X_df)

assert_allclose(_as_numpy(y_pred), y, atol=tol)


Expand Down Expand Up @@ -181,7 +181,7 @@ def test_sklearnex_partial_fit_on_random_data(
)
inclin.partial_fit(X_split_df, y_split_df)

tol = 1e-4 if dtype == np.float32 else 1e-7
tol = 1e-4 if inclin.coef_.dtype == np.float32 else 1e-7
assert_allclose(coef, inclin.coef_.T, atol=tol)

if fit_intercept:
Expand Down
4 changes: 2 additions & 2 deletions sklearnex/linear_model/tests/test_linear.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def test_sklearnex_import_linear(dataframe, queue, dtype, macro_block):
assert "sklearnex" in linreg.__module__
assert linreg.n_features_in_ == 2

tol = 1e-5 if dtype == np.float32 else 1e-7
tol = 1e-5 if _as_numpy(linreg.coef_).dtype == np.float32 else 1e-7
assert_allclose(_as_numpy(linreg.intercept_), 3.0, rtol=tol)
assert_allclose(_as_numpy(linreg.coef_), [1.0, 2.0], rtol=tol)

Expand Down Expand Up @@ -113,5 +113,5 @@ def test_sklearnex_reconstruct_model(dataframe, queue, dtype):

y_pred = linreg.predict(X)

tol = 1e-5 if dtype == np.float32 else 1e-7
tol = 1e-5 if _as_numpy(y_pred).dtype == np.float32 else 1e-7
assert_allclose(gtr, _as_numpy(y_pred), rtol=tol)
4 changes: 2 additions & 2 deletions sklearnex/preview/decomposition/tests/test_incremental_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def check_pca_on_gold_data(incpca, dtype, whiten, transformed_data):
)

tol = 1e-7
if dtype == np.float32:
if transformed_data.dtype == np.float32:
tol = 7e-6 if whiten else 1e-6

assert incpca.n_samples_seen_ == expected_n_samples_seen_
Expand Down Expand Up @@ -112,7 +112,7 @@ def check_pca_on_gold_data(incpca, dtype, whiten, transformed_data):


def check_pca(incpca, dtype, whiten, data, transformed_data):
tol = 3e-3 if dtype == np.float32 else 2e-6
tol = 3e-3 if transformed_data.dtype == np.float32 else 2e-6

n_components = incpca.n_components_

Expand Down
17 changes: 16 additions & 1 deletion tests/run_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,6 @@ def check_library(rule):
req_library["basic_statistics_spmd.py"] = ["dpctl", "mpi4py"]
req_library["covariance_spmd.py"] = ["dpctl", "mpi4py"]
req_library["dbscan_spmd.py"] = ["dpctl", "mpi4py"]
req_library["basic_statistics_spmd.py"] = ["dpctl", "mpi4py"]
req_library["incremental_basic_statistics_dpctl.py"] = ["dpctl"]
req_library["incremental_linear_regression_dpctl.py"] = ["dpctl"]
req_library["incremental_pca_dpctl.py"] = ["dpctl"]
Expand All @@ -193,6 +192,20 @@ def check_library(rule):
req_library["random_forest_regressor_spmd.py"] = ["dpctl", "dpnp", "mpi4py"]

req_os = defaultdict(lambda: [])
req_os["basic_statistics_spmd.py"] = ["lnx"]
req_os["covariance_spmd.py"] = ["lnx"]
req_os["dbscan_spmd.py"] = ["lnx"]
req_os["kmeans_spmd.py"] = ["lnx"]
req_os["knn_bf_classification_dpnp.py"] = ["lnx"]
req_os["knn_bf_classification_spmd.py"] = ["lnx"]
req_os["knn_bf_regression_spmd.py"] = ["lnx"]
req_os["linear_regression_spmd.py"] = ["lnx"]
req_os["logistic_regression_spmd.py"] = ["lnx"]
req_os["pca_spmd.py"] = ["lnx"]
req_os["random_forest_classifier_dpctl.py"] = ["lnx"]
req_os["random_forest_classifier_spmd.py"] = ["lnx"]
req_os["random_forest_regressor_dpnp.py"] = ["lnx"]
req_os["random_forest_regressor_spmd.py"] = ["lnx"]

skiped_files = []

Expand Down Expand Up @@ -229,6 +242,8 @@ def get_exe_cmd(ex, args):
return None
if not check_library(req_library[os.path.basename(ex)]):
return None
if not check_os(req_os[os.path.basename(ex)], system_os):
return None
if not args.nodist and ex.endswith("spmd.py"):
if IS_WIN:
return 'mpiexec -localonly -n 4 "' + sys.executable + '" "' + ex + '"'
Expand Down

0 comments on commit 563c65a

Please sign in to comment.