Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

KMeans OOP #1770

Merged
merged 150 commits into from
Sep 6, 2024
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
150 commits
Select commit Hold shift + click to select a range
325033a
kmeans oop init commit
md-shafiul-alam Mar 22, 2024
4fbc312
reformat
md-shafiul-alam Mar 22, 2024
06248fe
reformat
md-shafiul-alam Mar 22, 2024
a14ddbe
experimental
md-shafiul-alam Mar 22, 2024
03f85a9
address ci failures
md-shafiul-alam Mar 22, 2024
bf8c75f
deselected tests
md-shafiul-alam Mar 22, 2024
0866e1a
merge main
md-shafiul-alam Mar 26, 2024
c20c1f4
will be reverted
md-shafiul-alam Mar 26, 2024
ad99db4
enable deslected tests
md-shafiul-alam Mar 26, 2024
8b46e06
include elkan
md-shafiul-alam Mar 27, 2024
be47623
address CI failure
md-shafiul-alam Mar 27, 2024
0083124
address ci failures
md-shafiul-alam Mar 27, 2024
2e113fc
enable all deselected tests
md-shafiul-alam Mar 27, 2024
72f77a1
deselected tests
md-shafiul-alam Mar 28, 2024
2c14d8c
compiler update
md-shafiul-alam Mar 28, 2024
305dc08
init signature
md-shafiul-alam Mar 28, 2024
8b3571f
deselected tests
md-shafiul-alam Mar 29, 2024
356e937
Merge branch 'intel:main' into dev/msa-kmeans-oop
md-shafiul-alam Apr 1, 2024
64e6315
format
md-shafiul-alam Apr 3, 2024
366c218
Merge branch 'main' into dev/msa-kmeans-oop
md-shafiul-alam Apr 4, 2024
764b9d8
add sparsity support
md-shafiul-alam Apr 22, 2024
b2b2964
lint
md-shafiul-alam Apr 22, 2024
44b055b
minor fix
md-shafiul-alam Apr 23, 2024
c689503
callable init
md-shafiul-alam Apr 23, 2024
99336d4
lint
md-shafiul-alam Apr 23, 2024
6041a86
Merge branch 'intel:main' into dev/msa-kmeans-oop
md-shafiul-alam Apr 23, 2024
bdd9e95
table fix
md-shafiul-alam Apr 23, 2024
53ac098
minor
md-shafiul-alam Apr 23, 2024
66a02dd
minor
md-shafiul-alam Apr 23, 2024
9c5580a
rename attribute
md-shafiul-alam Apr 23, 2024
6aee2f7
test, revert later
md-shafiul-alam Apr 23, 2024
e6a01c6
minor
md-shafiul-alam Apr 24, 2024
ef2b6a1
add sparsity
md-shafiul-alam Apr 24, 2024
e2c7c31
lint
md-shafiul-alam Apr 24, 2024
52d159b
replace basic stat with numpy
md-shafiul-alam Apr 26, 2024
845b8c6
remove skip
md-shafiul-alam Apr 26, 2024
1044fad
CI fixes
md-shafiul-alam Apr 26, 2024
55a2df8
CI fixes
md-shafiul-alam Apr 26, 2024
ace4b3a
resolve merge conflict
md-shafiul-alam Apr 26, 2024
64f4d30
lint
md-shafiul-alam Apr 26, 2024
b95f784
minor
md-shafiul-alam Apr 26, 2024
fa1f704
fix sample_weight
md-shafiul-alam Apr 26, 2024
919d5a0
pandas dtype
md-shafiul-alam Apr 27, 2024
ca18b84
lint
md-shafiul-alam Apr 27, 2024
6c12e38
remove deselected tests
md-shafiul-alam Apr 27, 2024
91288f6
use numpy variance
md-shafiul-alam May 3, 2024
b51e6bd
test sparse offset
md-shafiul-alam May 17, 2024
3e8f92f
merge main
md-shafiul-alam May 17, 2024
ffbf7aa
revert b51e6bd0d9
md-shafiul-alam May 22, 2024
da7e612
remove basic_statistics changes
md-shafiul-alam May 22, 2024
9ea8b2b
remove comments
md-shafiul-alam May 22, 2024
9c7c3d0
minor
md-shafiul-alam May 28, 2024
f0748d1
update
md-shafiul-alam Jun 7, 2024
cbe4374
Merge branch 'main' into dev/msa-kmeans-oop
md-shafiul-alam Jun 7, 2024
8a716eb
update
md-shafiul-alam Jun 7, 2024
cf0535d
add result option
md-shafiul-alam Jun 11, 2024
5e456e0
Merge branch 'main' into dev/msa-kmeans-oop
md-shafiul-alam Jun 11, 2024
5e4defe
refactor for csr
md-shafiul-alam Jun 11, 2024
b377cde
lint
md-shafiul-alam Jun 11, 2024
5a9b13b
refactor and ci
md-shafiul-alam Jun 11, 2024
860663d
add version check for oneDAL
md-shafiul-alam Jun 11, 2024
8fb53d4
update
md-shafiul-alam Jun 18, 2024
5d9c933
merge main
md-shafiul-alam Jun 18, 2024
ba7aa6b
fix for CI
md-shafiul-alam Jun 18, 2024
77f91c5
ci fix
md-shafiul-alam Jun 18, 2024
55ff15c
minor
md-shafiul-alam Jun 19, 2024
3824279
merge main
md-shafiul-alam Jun 19, 2024
0e5e52b
some fixes
md-shafiul-alam Jun 20, 2024
e0a3b6e
ci fixes
md-shafiul-alam Jun 20, 2024
cc1a9df
lint
md-shafiul-alam Jun 20, 2024
48e869e
add version checks
md-shafiul-alam Jun 20, 2024
51e2420
csr condition for policy
md-shafiul-alam Jun 24, 2024
f7c8a4f
version check for stability check
md-shafiul-alam Jun 24, 2024
b5b2a94
update test
md-shafiul-alam Jun 24, 2024
e561c15
floating methods
md-shafiul-alam Jun 24, 2024
4dec273
minor
md-shafiul-alam Jun 24, 2024
d194913
ci fixes
md-shafiul-alam Jun 24, 2024
3bbeb80
Merge branch 'main' into dev/msa-kmeans-oop
md-shafiul-alam Jun 24, 2024
763699d
minor
md-shafiul-alam Jun 24, 2024
c46a707
address review
md-shafiul-alam Jun 25, 2024
b085804
address review
md-shafiul-alam Jun 25, 2024
ae66a9e
minor
md-shafiul-alam Jun 25, 2024
0020d1b
update comments
md-shafiul-alam Jun 25, 2024
ae77388
refactor
md-shafiul-alam Jun 25, 2024
698adac
ci
Jun 25, 2024
7f1114c
address ci
md-shafiul-alam Jun 26, 2024
cdb6b9e
merge main
md-shafiul-alam Jul 9, 2024
2850a85
update test
md-shafiul-alam Jul 9, 2024
db40680
version check
md-shafiul-alam Jul 9, 2024
ad38abd
lint
md-shafiul-alam Jul 9, 2024
fb6f3fe
Merge branch 'intel:main' into dev/msa-kmeans-oop
md-shafiul-alam Jul 12, 2024
eea103b
minor fix
md-shafiul-alam Jul 12, 2024
d84d1c8
lint
md-shafiul-alam Jul 12, 2024
235aa13
basic stat fix
md-shafiul-alam Jul 12, 2024
a323894
score
md-shafiul-alam Jul 15, 2024
62639cd
minor
md-shafiul-alam Jul 15, 2024
979ced6
ci fix + refactor
md-shafiul-alam Jul 15, 2024
eb72712
more fixes
md-shafiul-alam Jul 15, 2024
dd552ff
not a table
md-shafiul-alam Jul 15, 2024
83f28ca
minor
md-shafiul-alam Jul 15, 2024
47693a4
sample weight
md-shafiul-alam Jul 15, 2024
945c93d
import
md-shafiul-alam Jul 15, 2024
73658c8
merge main
md-shafiul-alam Jul 22, 2024
7f29bfe
merge main
md-shafiul-alam Aug 5, 2024
c457e50
preview remove
md-shafiul-alam Aug 5, 2024
d231333
SPMD fix
md-shafiul-alam Aug 6, 2024
354446b
SPMD fix
md-shafiul-alam Aug 6, 2024
02e49f5
SPMD fix
md-shafiul-alam Aug 6, 2024
7e099c4
refactor
md-shafiul-alam Aug 6, 2024
e820c0f
deselect
Aug 6, 2024
c0cab69
deselect refactor
md-shafiul-alam Aug 7, 2024
e764442
deselect update
md-shafiul-alam Aug 7, 2024
1fd3c63
deselect update
md-shafiul-alam Aug 7, 2024
2a7f88b
deselect update
md-shafiul-alam Aug 7, 2024
83e3a08
deselect
md-shafiul-alam Aug 7, 2024
ce200f5
Merge branch 'intel:main' into dev/msa-kmeans-oop
md-shafiul-alam Aug 7, 2024
772c904
reverting to previous
md-shafiul-alam Aug 29, 2024
21669cd
merge main
md-shafiul-alam Aug 29, 2024
1f64114
update daal version
md-shafiul-alam Aug 29, 2024
596909a
refactor deselected tests
md-shafiul-alam Aug 29, 2024
39d1888
update daal check
md-shafiul-alam Aug 29, 2024
40ff610
Merge branch 'main' into dev/msa-kmeans-oop
md-shafiul-alam Aug 30, 2024
c3f783b
address comments
md-shafiul-alam Aug 30, 2024
d80d042
address comments
md-shafiul-alam Aug 30, 2024
1c5d4db
test fix
md-shafiul-alam Aug 30, 2024
499521a
address comments
md-shafiul-alam Sep 2, 2024
3d36e8e
minor
md-shafiul-alam Sep 2, 2024
bca7518
refactor
md-shafiul-alam Sep 3, 2024
f649cb2
refactor
md-shafiul-alam Sep 3, 2024
7d160eb
Merge branch 'main' into dev/msa-kmeans-oop
md-shafiul-alam Sep 3, 2024
2c4fc1b
refactor
md-shafiul-alam Sep 3, 2024
20df2c2
ci fix
md-shafiul-alam Sep 3, 2024
a6cb0ee
ci fix
md-shafiul-alam Sep 3, 2024
2cd54f2
minor
md-shafiul-alam Sep 3, 2024
28ccee9
update checks
md-shafiul-alam Sep 3, 2024
6f336ca
import
md-shafiul-alam Sep 3, 2024
ebec4c9
fix import
md-shafiul-alam Sep 4, 2024
e550860
refactor
md-shafiul-alam Sep 4, 2024
247548c
update test
md-shafiul-alam Sep 5, 2024
b19c019
update test
md-shafiul-alam Sep 5, 2024
a295198
ci fixes
md-shafiul-alam Sep 5, 2024
7403493
lint
md-shafiul-alam Sep 5, 2024
848b9db
minor
md-shafiul-alam Sep 5, 2024
c77f597
minor
md-shafiul-alam Sep 5, 2024
5fddeda
ci fix
md-shafiul-alam Sep 6, 2024
365a766
fix ci
md-shafiul-alam Sep 6, 2024
6542ec0
fix ci
md-shafiul-alam Sep 6, 2024
b598059
fix ci
md-shafiul-alam Sep 6, 2024
f6be8a8
Merge branch 'main' into dev/msa-kmeans-oop
md-shafiul-alam Sep 6, 2024
2585040
fix ci
md-shafiul-alam Sep 6, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .ci/pipeline/build-and-test-lnx.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ steps:
displayName: "System info"
- script: |
conda update -y -q conda
conda create -q -y -n CB -c conda-forge -c intel python=$(PYTHON_VERSION) intel::dal-devel mpich pyyaml "dpcpp-cpp-rt=2024.0.2"
conda create -q -y -n CB -c conda-forge -c intel python=$(PYTHON_VERSION) intel::dal-devel mpich pyyaml "dpcpp-cpp-rt=2024.1.0"
displayName: "Conda create"
- script: |
. /usr/share/miniconda/etc/profile.d/conda.sh
Expand Down
2 changes: 1 addition & 1 deletion .ci/scripts/install_dpcpp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,5 @@ rm GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB
echo "deb https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list
sudo add-apt-repository -y "deb https://apt.repos.intel.com/oneapi all main"
sudo apt-get update
sudo apt-get install -y intel-dpcpp-cpp-compiler-2024.0
sudo apt-get install -y intel-dpcpp-cpp-compiler-2024.1
sudo bash -c 'echo libintelocl.so > /etc/OpenCL/vendors/intel-cpu.icd'
58 changes: 24 additions & 34 deletions deselected_tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ deselected_tests:

# test_non_uniform_strategies fails due to differences in handling of vacuous clusters after update
# See https://github.com/IntelPython/daal4py/issues/69
- cluster/tests/test_k_means.py::test_relocated_clusters >=0.23,<0.24
# - cluster/tests/test_k_means.py::test_relocated_clusters >=0.23,<0.24
- cluster/tests/test_k_means.py::test_kmeans_relocated_clusters >=0.24

# In scikit-learn, these algorithms are not included in this test. However, scikit-learn-intelex
Expand All @@ -178,6 +178,11 @@ deselected_tests:
- cluster/tests/test_k_means.py::test_kmeans_convergence >=0.23
- cluster/tests/test_k_means.py::test_kmeans_verbose >=0.23

# Sparse Support required
- cluster/tests/test_k_means.py::test_predict_sample_weight_deprecation_warning[KMeans] >=1.3
- cluster/tests/test_k_means.py::test_unit_weights_vs_no_weights[KMeans-dense] <1.2
- cluster/tests/test_k_means.py::test_unit_weights_vs_no_weights[42-KMeans-dense] >=1.2

# The Newton-CG solver solution computed in float32 disagrees with that of float64 by a small
# margin above the test threshold, see https://github.com/scikit-learn/scikit-learn/pull/13645
- linear_model/tests/test_logistic.py::test_dtype_match
Expand Down Expand Up @@ -240,7 +245,7 @@ deselected_tests:
- inspection/tests/test_permutation_importance.py::test_permutation_importance_sample_weight >=0.24

# Patched and unpatched kmeans set same values to different clusters. Need to investigate.
- preprocessing/tests/test_discretization.py::test_nonuniform_strategies[kmeans-expected_2bins1-expected_3bins1-expected_5bins1] >=0.24
# - preprocessing/tests/test_discretization.py::test_nonuniform_strategies[kmeans-expected_2bins1-expected_3bins1-expected_5bins1] >=0.24

# OOB scores in scikit-learn and oneDAL are different because of different random number generators
- ensemble/tests/test_forest.py::test_forest_classifier_oob[X1-y1-0.65-array-ExtraTreesClassifier]
Expand Down Expand Up @@ -346,12 +351,12 @@ deselected_tests:
- tests/test_multioutput.py::test_classifier_chain_fit_and_predict_with_sparse_data >=1.4

# New failing sklearn1.4.1 tests for kmeans associated with incorrect n_iter_ values in daal4py
- cluster/tests/test_k_means.py::test_relocating_with_duplicates[lloyd-dense] >=1.4
- cluster/tests/test_k_means.py::test_relocating_with_duplicates[lloyd-sparse_matrix] >=1.4
- cluster/tests/test_k_means.py::test_relocating_with_duplicates[lloyd-sparse_array] >=1.4
- cluster/tests/test_k_means.py::test_relocating_with_duplicates[elkan-dense] >=1.4
- cluster/tests/test_k_means.py::test_relocating_with_duplicates[elkan-sparse_matrix] >=1.4
- cluster/tests/test_k_means.py::test_relocating_with_duplicates[elkan-sparse_array] >=1.4
# - cluster/tests/test_k_means.py::test_relocating_with_duplicates[lloyd-dense] >=1.4
# - cluster/tests/test_k_means.py::test_relocating_with_duplicates[lloyd-sparse_matrix] >=1.4
# - cluster/tests/test_k_means.py::test_relocating_with_duplicates[lloyd-sparse_array] >=1.4
# - cluster/tests/test_k_means.py::test_relocating_with_duplicates[elkan-dense] >=1.4
# - cluster/tests/test_k_means.py::test_relocating_with_duplicates[elkan-sparse_matrix] >=1.4
# - cluster/tests/test_k_means.py::test_relocating_with_duplicates[elkan-sparse_array] >=1.4


# --------------------------------------------------------
Expand Down Expand Up @@ -434,8 +439,8 @@ gpu:

# Fails
- cluster/tests/test_dbscan.py::test_weighted_dbscan
- cluster/tests/test_k_means.py::test_k_means_fit_predict
- cluster/tests/test_k_means.py::test_predict
# - cluster/tests/test_k_means.py::test_k_means_fit_predict
# - cluster/tests/test_k_means.py::test_predict

- ensemble/tests/test_bagging.py::test_gridsearch
- ensemble/tests/test_bagging.py::test_estimators_samples
Expand Down Expand Up @@ -567,8 +572,6 @@ gpu:
- tests/test_common.py::test_estimators[GaussianMixture()-check_fit_idempotent]
- tests/test_common.py::test_estimators[GaussianMixture()-check_n_features_in]
- tests/test_common.py::test_estimators[GaussianMixture()-check_fit2d_predict1d]
- tests/test_common.py::test_estimators[KMeans()-check_clustering]
- tests/test_common.py::test_estimators[KMeans()-check_clustering(readonly_memmap=True)]
- tests/test_common.py::test_estimators[RandomForestClassifier()-check_class_weight_classifiers]
- tests/test_common.py::test_estimators[SVC()-check_sample_weights_pandas_series]
- tests/test_common.py::test_estimators[SVC()-check_sample_weights_not_an_array]
Expand Down Expand Up @@ -603,7 +606,6 @@ gpu:
- tests/test_multiclass.py::test_ovr_coef_
- tests/test_multiclass.py::test_ovr_deprecated_coef_intercept
- tests/test_multiclass.py::test_pairwise_cross_val_score

- tests/test_multioutput.py::test_multiclass_multioutput_estimator_predict_proba
- tests/test_multioutput.py::test_classifier_chain_fit_and_predict_with_sparse_data

Expand All @@ -617,21 +619,15 @@ gpu:
- manifold/tests/test_t_sne.py::test_n_iter_without_progress

# KMeans based (unsupported for GPU)
- cluster/tests/test_k_means.py
- tests/test_common.py::test_estimators[KMeans()
- tests/test_common.py::test_estimators[BayesianGaussianMixture()-check_fit_check_is_fitted]
- tests/test_common.py::test_estimators[GaussianMixture()-check_fit_check_is_fitted]
- tests/test_common.py::test_check_n_features_in_after_fitting[BayesianGaussianMixture()]
- tests/test_common.py::test_check_n_features_in_after_fitting[GaussianMixture()]
- tests/test_common.py::test_check_n_features_in_after_fitting[KMeans()]
- tests/test_common.py::test_set_output_transform[KMeans()]
- tests/test_common.py::test_set_output_transform_pandas[KMeans()]
- tests/test_common.py::test_global_output_transform_pandas[KMeans()]
- mixture/tests/test_gaussian_mixture.py
- model_selection/tests/test_validation.py::test_cross_val_predict
- metrics/tests/test_score_objects.py::test_supervised_cluster_scorers
- tests/test_pipeline.py::test_fit_predict_on_pipeline
- tests/test_discriminant_analysis.py::test_lda_predict
# - tests/test_common.py::test_estimators[BayesianGaussianMixture()-check_fit_check_is_fitted]
# - tests/test_common.py::test_estimators[GaussianMixture()-check_fit_check_is_fitted]
# - tests/test_common.py::test_check_n_features_in_after_fitting[BayesianGaussianMixture()]
# - tests/test_common.py::test_check_n_features_in_after_fitting[GaussianMixture()]
# - mixture/tests/test_gaussian_mixture.py
# - model_selection/tests/test_validation.py::test_cross_val_predict
# - metrics/tests/test_score_objects.py::test_supervised_cluster_scorers
# - tests/test_pipeline.py::test_fit_predict_on_pipeline
# - tests/test_discriminant_analysis.py::test_lda_predict
# Other device issues
- tests/test_metaestimators.py::test_meta_estimators_delegate_data_validation[StackingClassifier]
- tests/test_multiclass.py::test_ovr_always_present
Expand Down Expand Up @@ -1105,9 +1101,3 @@ gpu:
- tests/test_common.py::test_estimators[DBSCAN()-check_fit2d_predict1d]
- tests/test_common.py::test_check_n_features_in_after_fitting[DBSCAN()]
- tests/test_common.py::test_check_n_features_in_after_fitting[SVC()]

preview:
- cluster/tests/test_k_means.py::test_kmeans_elkan_results
- cluster/tests/test_k_means.py::test_unit_weights_vs_no_weights[KMeans-dense] <1.2
- cluster/tests/test_k_means.py::test_unit_weights_vs_no_weights[42-KMeans-dense] >=1.2
- cluster/tests/test_k_means.py::test_predict_sample_weight_deprecation_warning[KMeans] >=1.3
7 changes: 6 additions & 1 deletion onedal/cluster/kmeans.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,12 @@ struct params2desc {
desc.set_cluster_count( params["cluster_count"].cast<std::int64_t>() );
desc.set_accuracy_threshold( params["accuracy_threshold"].cast<Float>() );
desc.set_max_iteration_count( params["max_iteration_count"].cast<std::int64_t>() );

#if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240200
auto result_options = params["result_options"].cast<std::string>();
if (result_options == "computeAssignments"){
desc.set_result_options(result_options::compute_assignments);
}
#endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240200
return desc;
}
};
Expand Down
5 changes: 3 additions & 2 deletions onedal/cluster/kmeans.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def _check_params_vs_input(
self._n_init = 1
assert self.algorithm == "lloyd"

def _get_onedal_params(self, dtype=np.float32):
def _get_onedal_params(self, dtype=np.float32, result_options=None):
thr = self._tol if hasattr(self, "_tol") else self.tol
return {
"fptype": "float" if dtype == np.float32 else "double",
Expand All @@ -151,6 +151,7 @@ def _get_onedal_params(self, dtype=np.float32):
"max_iteration_count": self.max_iter,
"cluster_count": self.n_clusters,
"accuracy_threshold": thr,
"result_options": "" if result_options is None else result_options,
}

def _get_params_and_input(self, X, policy):
Expand Down Expand Up @@ -340,7 +341,7 @@ def _set_cluster_centers(self, cluster_centers):
cluster_centers_ = property(_get_cluster_centers, _set_cluster_centers)

def _predict_raw(self, X_table, module, policy, dtype=np.float32):
params = self._get_onedal_params(dtype)
params = self._get_onedal_params(dtype, result_options="computeAssignments")

result = module.infer(policy, params, self.model_, X_table)

Expand Down
1 change: 0 additions & 1 deletion setup_sklearnex.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,6 @@
"sklearnex.neighbors",
"sklearnex.preview",
"sklearnex.preview.covariance",
"sklearnex.preview.cluster",
"sklearnex.svm",
"sklearnex.utils",
]
Expand Down
Loading