diff --git a/onedal/datatypes/_data_conversion.py b/onedal/datatypes/_data_conversion.py index 0d91bcdfb0..8f2989eeb0 100644 --- a/onedal/datatypes/_data_conversion.py +++ b/onedal/datatypes/_data_conversion.py @@ -35,12 +35,12 @@ def _convert_one_to_table(arg): def to_table(*args): """Create oneDAL tables from scalars and/or arrays. - Note: this implementation can be used with contiguous scipy.sparse, numpy - ndarrays, DPCTL/DPNP usm_ndarrays and scalars. Tables will use pointers to the - original array data. Scalars will be copies. Arrays may be modified in- - place by oneDAL during computation. This works for data located on CPU and - SYCL-enabled Intel GPUs. Each array may only be of a single datatype (i.e. - each must be homogeneous). + Note: this implementation can be used with scipy.sparse, numpy ndarrays, + DPCTL/DPNP usm_ndarrays and scalars. Tables will use pointers to the + original array data. Scalars and non-contiguous arrays will be copies. + Arrays may be modified in-place by oneDAL during computation. This works + for data located on CPU and SYCL-enabled Intel GPUs. Each array may only + be of a single datatype (i.e. each must be homogeneous). Parameters ---------- diff --git a/onedal/datatypes/data_conversion.cpp b/onedal/datatypes/data_conversion.cpp index ad9832da8b..54b66d593b 100644 --- a/onedal/datatypes/data_conversion.cpp +++ b/onedal/datatypes/data_conversion.cpp @@ -155,18 +155,26 @@ dal::table convert_to_table(PyObject *obj) { } if (is_array(obj)) { PyArrayObject *ary = reinterpret_cast(obj); - if (array_is_behaved_C(ary) || array_is_behaved_F(ary)) { + if (!PyArray_ISCARRAY_RO(ary) && !PyArray_ISFARRAY_RO(ary)) { + // NOTE: this will make a C-contiguous deep copy of the data + // this is expected to be a special case + ary = PyArray_GETCONTIGUOUS(ary); + if (ary) { + res = convert_to_table(reinterpret_cast(ary)); + Py_DECREF(ary); + return res; + } + else { + throw std::invalid_argument( + "[convert_to_table] Numpy input could not be converted into onedal table."); + } + } #define MAKE_HOMOGEN_TABLE(CType) res = convert_to_homogen_impl(ary); - SET_NPY_FEATURE(array_type(ary), - array_type_sizeof(ary), - MAKE_HOMOGEN_TABLE, - throw std::invalid_argument("Found unsupported array type")); + SET_NPY_FEATURE(array_type(ary), + array_type_sizeof(ary), + MAKE_HOMOGEN_TABLE, + throw std::invalid_argument("Found unsupported array type")); #undef MAKE_HOMOGEN_TABLE - } - else { - throw std::invalid_argument( - "[convert_to_table] Numpy input Could not convert Python object to onedal table."); - } } else if (strcmp(Py_TYPE(obj)->tp_name, "csr_matrix") == 0 || strcmp(Py_TYPE(obj)->tp_name, "csr_array") == 0) { PyObject *py_data = PyObject_GetAttrString(obj, "data"); diff --git a/onedal/datatypes/data_conversion_sua_iface.cpp b/onedal/datatypes/data_conversion_sua_iface.cpp index 06b492e7b5..673bdb6c63 100644 --- a/onedal/datatypes/data_conversion_sua_iface.cpp +++ b/onedal/datatypes/data_conversion_sua_iface.cpp @@ -34,6 +34,7 @@ namespace oneapi::dal::python { +using namespace pybind11::literals; // Please follow // for the description of `__sycl_usm_array_interface__` protocol. @@ -42,6 +43,8 @@ namespace oneapi::dal::python { // of `__sycl_usm_array_interface__` protocol. template dal::table convert_to_homogen_impl(py::object obj) { + dal::table res{}; + // Get `__sycl_usm_array_interface__` dictionary representing USM allocations. auto sua_iface_dict = get_sua_interface(obj); @@ -64,6 +67,25 @@ dal::table convert_to_homogen_impl(py::object obj) { // Get oneDAL Homogen DataLayout enumeration from input object shape and strides. const auto layout = get_sua_iface_layout(sua_iface_dict, r_count, c_count); + if (layout == dal::data_layout::unknown){ + // NOTE: this will make a C-contiguous deep copy of the data + // if possible, this is expected to be a special case + py::object copy; + if (py::hasattr(obj, "copy")){ + copy = obj.attr("copy")(); + } + else if (py::hasattr(obj, "__array_namespace__")){ + const auto space = obj.attr("__array_namespace__")(); + copy = space.attr("asarray")(obj, "copy"_a = true); + } + else { + throw std::runtime_error("Wrong strides"); + } + res = convert_to_homogen_impl(copy); + copy.dec_ref(); + return res; + } + // Get `__sycl_usm_array_interface__['data'][0]`, the first element of data entry, // which is a Python integer encoding USM pointer value. const auto* const ptr = reinterpret_cast(get_sua_ptr(sua_iface_dict)); @@ -79,8 +101,6 @@ dal::table convert_to_homogen_impl(py::object obj) { // Use read-only accessor for onedal table. bool is_readonly = is_sua_readonly(sua_iface_dict); - dal::table res{}; - if (is_readonly) { res = dal::homogen_table(queue, ptr, diff --git a/onedal/datatypes/tests/test_data.py b/onedal/datatypes/tests/test_data.py index de47e18ad4..7ce2171bbe 100644 --- a/onedal/datatypes/tests/test_data.py +++ b/onedal/datatypes/tests/test_data.py @@ -377,23 +377,14 @@ def test_sua_iface_interop_unsupported_dtypes(dataframe, queue, dtype): def test_to_table_non_contiguous_input(dataframe, queue): if dataframe in "dpnp,dpctl" and not _is_dpc_backend: pytest.skip("__sycl_usm_array_interface__ support requires DPC backend.") - X = np.mgrid[:10, :10] + X, _ = np.mgrid[:10, :10] X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) X = X[:, :3] sua_iface, _, _ = _get_sycl_namespace(X) # X expected to be non-contiguous. assert not X.flags.c_contiguous and not X.flags.f_contiguous - - # TODO: - # consistent error message. - if dataframe in "dpnp,dpctl": - expected_err_msg = ( - "Unable to convert from SUA interface: only 1D & 2D tensors are allowed" - ) - else: - expected_err_msg = "Numpy input Could not convert Python object to onedal table." - with pytest.raises(ValueError, match=expected_err_msg): - to_table(X) + X_t = to_table(X) + assert X_t and X_t.shape == (10, 3) and X_t.has_data @pytest.mark.skipif( diff --git a/onedal/datatypes/utils/sua_iface_helpers.cpp b/onedal/datatypes/utils/sua_iface_helpers.cpp index a5f0567534..d345a35645 100644 --- a/onedal/datatypes/utils/sua_iface_helpers.cpp +++ b/onedal/datatypes/utils/sua_iface_helpers.cpp @@ -163,7 +163,7 @@ dal::data_layout get_sua_iface_layout(const py::dict& sua_dict, return dal::data_layout::column_major; } else { - throw std::runtime_error("Wrong strides"); + return dal::data_layout::unknown; } } else { diff --git a/onedal/utils/validation.py b/onedal/utils/validation.py index c97b77a577..7559c43e4a 100644 --- a/onedal/utils/validation.py +++ b/onedal/utils/validation.py @@ -153,15 +153,6 @@ def _check_array( if sp.issparse(array): return array - - # TODO: Convert this kind of arrays to a table like in daal4py - if not array.flags.aligned and not array.flags.writeable: - array = np.array(array.tolist()) - - # TODO: If data is not contiguous copy to contiguous - # Need implemeted numpy table in oneDAL - if not array.flags.c_contiguous and not array.flags.f_contiguous: - array = np.ascontiguousarray(array, array.dtype) return array