From 0189eeeedf9693799bfaf861cad79486d2d09749 Mon Sep 17 00:00:00 2001 From: Alexander Druz Date: Mon, 20 Nov 2023 17:05:33 +0100 Subject: [PATCH] Update embedding column tests --- tests/integration/dataset/test_embedding.py | 244 -------------- .../dataset/test_embedding_column.py | 306 ++++++++++++++++++ 2 files changed, 306 insertions(+), 244 deletions(-) delete mode 100644 tests/integration/dataset/test_embedding.py create mode 100644 tests/integration/dataset/test_embedding_column.py diff --git a/tests/integration/dataset/test_embedding.py b/tests/integration/dataset/test_embedding.py deleted file mode 100644 index 928b7509..00000000 --- a/tests/integration/dataset/test_embedding.py +++ /dev/null @@ -1,244 +0,0 @@ -import numpy as np -import pytest - -from renumics import spotlight -from renumics.spotlight import dtypes -from renumics.spotlight.dataset.exceptions import InvalidShapeError - - -@pytest.mark.parametrize("length", [1, 2, 8]) -def test_default(empty_dataset: spotlight.Dataset, length: int) -> None: - """ - Test default embedding column creation and afterwards filling row-by-row. - """ - empty_dataset.append_embedding_column("embedding") - assert empty_dataset.get_dtype("embedding") == dtypes.embedding_dtype - - valid_inputs = ( - [0] * length, - range(length), - tuple(range(length)), - np.ones(length), - np.full(length, np.nan), - ) - for embedding in valid_inputs: - empty_dataset.append_row(embedding=embedding) - assert empty_dataset.get_dtype("embedding") == dtypes.EmbeddingDType( - length=length - ) - - invalid_inputs: tuple = ([], range(length + 1)) - for embedding in invalid_inputs: - with pytest.raises(InvalidShapeError): - empty_dataset.append_row(embedding=embedding) - - embeddings = empty_dataset["embedding"] - assert len(embeddings) == len(valid_inputs) - assert all(embedding.shape == (length,) for embedding in embeddings) - assert all(embedding.dtype == np.float32 for embedding in embeddings) - - -def test_default_zero_length(empty_dataset: spotlight.Dataset) -> None: - """ - Test default embedding column creation and afterwards filling row-by-row - with embeddings of length 0. - """ - empty_dataset.append_embedding_column("embedding") - assert empty_dataset.get_dtype("embedding") == dtypes.embedding_dtype - - valid_inputs: tuple = ([], (), range(0), np.array([])) - for embedding in valid_inputs: - empty_dataset.append_row(embedding=embedding) - assert empty_dataset.get_dtype("embedding") == dtypes.EmbeddingDType(length=0) - - invalid_inputs: tuple = ([1], (1, 2), range(5), np.zeros(10)) - for embedding in invalid_inputs: - with pytest.raises(InvalidShapeError): - empty_dataset.append_row(embedding=embedding) - - embeddings = empty_dataset["embedding"] - assert len(embeddings) == len(valid_inputs) - assert all(embedding is None for embedding in embeddings) - - -@pytest.mark.parametrize("length", [1, 2, 8]) -def test_default_with_values(empty_dataset: spotlight.Dataset, length: int) -> None: - """ - Test default embedding column creation with given values and afterwards - filling row-by-row. - """ - valid_inputs = ( - [0] * length, - range(length), - tuple(range(length)), - np.ones(length), - np.full(length, np.nan), - ) - empty_dataset.append_embedding_column("embedding", values=valid_inputs) - assert empty_dataset.get_dtype("embedding") == dtypes.EmbeddingDType(length=length) - - for embedding in valid_inputs: - empty_dataset.append_row(embedding=embedding) - assert empty_dataset.get_dtype("embedding") == dtypes.EmbeddingDType( - length=length - ) - - invalid_inputs: tuple = ([], range(length + 1)) - for embedding in invalid_inputs: - with pytest.raises(InvalidShapeError): - empty_dataset.append_row(embedding=embedding) - - embeddings = empty_dataset["embedding"] - assert len(embeddings) == len(valid_inputs) * 2 - assert all(embedding.shape == (length,) for embedding in embeddings) - assert all(embedding.dtype == np.float32 for embedding in embeddings) - - -def test_default_with_values_zero_length(empty_dataset: spotlight.Dataset) -> None: - """ - Test default embedding column creation with given values and afterwards - filling row-by-row with embeddings of length 0. - """ - valid_inputs: tuple = ([], (), range(0), np.array([])) - empty_dataset.append_embedding_column("embedding", values=valid_inputs) - assert empty_dataset.get_dtype("embedding") == dtypes.EmbeddingDType(length=0) - - for embedding in valid_inputs: - empty_dataset.append_row(embedding=embedding) - assert empty_dataset.get_dtype("embedding") == dtypes.EmbeddingDType(length=0) - - invalid_inputs: tuple = ([1], (1, 2), range(5), np.zeros(10)) - for embedding in invalid_inputs: - with pytest.raises(InvalidShapeError): - empty_dataset.append_row(embedding=embedding) - - embeddings = empty_dataset["embedding"] - assert len(embeddings) == len(valid_inputs) * 2 - assert all(embedding is None for embedding in embeddings) - - -@pytest.mark.parametrize("length", [1, 2, 8]) -def test_optional(empty_dataset: spotlight.Dataset, length: int) -> None: - """ - Test optional embedding column creation and afterwards filling row-by-row. - """ - empty_dataset.append_embedding_column("embedding", optional=True) - assert empty_dataset.get_dtype("embedding") == dtypes.embedding_dtype - - valid_inputs: tuple = ( - [0] * length, - range(length), - tuple(range(length)), - np.ones(length), - np.full(length, np.nan), - None, - [], - (), - range(0), - np.array([]), - ) - for embedding in valid_inputs: - empty_dataset.append_row(embedding=embedding) - assert empty_dataset.get_dtype("embedding") == dtypes.EmbeddingDType( - length=length - ) - - invalid_inputs: tuple = (range(length + 1), np.full(length + 1, np.nan)) - for embedding in invalid_inputs: - with pytest.raises(InvalidShapeError): - empty_dataset.append_row(embedding=embedding) - - embeddings = empty_dataset["embedding"] - assert len(embeddings) == len(valid_inputs) - none_mask = np.array([False] * 5 + [True] * 5) - assert all(embedding is None for embedding in embeddings[none_mask]) - assert all(embedding.shape == (length,) for embedding in embeddings[~none_mask]) - assert all(embedding.dtype == np.float32 for embedding in embeddings[~none_mask]) - - -def test_optional_zero_length(empty_dataset: spotlight.Dataset) -> None: - """ - Test optional embedding column creation and afterwards filling row-by-row - with embeddings of length 0. - """ - empty_dataset.append_embedding_column("embedding", optional=True) - assert empty_dataset.get_dtype("embedding") == dtypes.embedding_dtype - - valid_inputs: tuple = ( - None, - [], - (), - range(0), - np.array([]), - ) - for embedding in valid_inputs: - empty_dataset.append_row(embedding=embedding) - assert empty_dataset.get_dtype("embedding") == dtypes.embedding_dtype - - embeddings = empty_dataset["embedding"] - assert len(embeddings) == len(valid_inputs) - assert all(embedding is None for embedding in embeddings) - - empty_dataset.append_row(embedding=range(5)) - assert empty_dataset.get_dtype("embedding") == dtypes.EmbeddingDType(length=5) - - -@pytest.mark.parametrize("length", [1, 2, 8]) -@pytest.mark.parametrize("np_dtype", [np.float16, np.float32, np.float64]) -def test_dtype( - empty_dataset: spotlight.Dataset, length: int, np_dtype: np.dtype -) -> None: - """ - Test embedding column creation with dtype and afterwards filling row-by-row. - """ - empty_dataset.append_embedding_column("embedding", dtype=np_dtype) - assert empty_dataset.get_dtype("embedding") == dtypes.embedding_dtype - - valid_inputs = ( - [0] * length, - range(length), - tuple(range(length)), - np.ones(length), - np.full(length, np.nan), - ) - for embedding in valid_inputs: - empty_dataset.append_row(embedding=embedding) - assert empty_dataset.get_dtype("embedding") == dtypes.EmbeddingDType( - length=length - ) - - embeddings = empty_dataset["embedding"] - assert len(embeddings) == len(valid_inputs) - assert all(embedding.shape == (length,) for embedding in embeddings) - assert all(embedding.dtype == np_dtype for embedding in embeddings) - - -@pytest.mark.parametrize("length", [1, 2, 8]) -def test_generic(empty_dataset: spotlight.Dataset, length: int) -> None: - """ - Test generic embedding column creation and afterwards filling row-by-row. - """ - dtype = dtypes.EmbeddingDType(length=length) - empty_dataset.append_column("embedding", dtype) - assert empty_dataset.get_dtype("embedding") == dtype - - valid_inputs = ( - [0] * length, - range(length), - tuple(range(length)), - np.ones(length), - np.full(length, np.nan), - ) - for embedding in valid_inputs: - empty_dataset.append_row(embedding=embedding) - assert empty_dataset.get_dtype("embedding") == dtype - - invalid_inputs: tuple = ([], range(length + 1)) - for embedding in invalid_inputs: - with pytest.raises(InvalidShapeError): - empty_dataset.append_row(embedding=embedding) - - embeddings = empty_dataset["embedding"] - assert len(embeddings) == len(valid_inputs) - assert all(embedding.shape == (length,) for embedding in embeddings) - assert all(embedding.dtype == np.float32 for embedding in embeddings) diff --git a/tests/integration/dataset/test_embedding_column.py b/tests/integration/dataset/test_embedding_column.py new file mode 100644 index 00000000..c1e5aee6 --- /dev/null +++ b/tests/integration/dataset/test_embedding_column.py @@ -0,0 +1,306 @@ +import numpy as np +import pytest + +from renumics import spotlight +from renumics.spotlight import dtypes +from renumics.spotlight.dataset.exceptions import InvalidDTypeError, InvalidShapeError + + +@pytest.mark.parametrize("length", [1, 2, 8]) +def test_default(empty_dataset: spotlight.Dataset, length: int) -> None: + """ + Test default embedding column creation and afterwards filling row-by-row. + """ + empty_dataset.append_embedding_column("embedding") + assert empty_dataset.get_dtype("embedding") == dtypes.embedding_dtype + + valid_values = ( + [0] * length, + range(length), + tuple(range(length)), + np.ones(length), + np.full(length, np.nan), + ) + for value in valid_values: + empty_dataset.append_row(embedding=value) + assert empty_dataset.get_dtype("embedding") == dtypes.EmbeddingDType( + length=length + ) + + invalid_values: tuple = ([], range(length + 1)) + for value in invalid_values: + with pytest.raises(InvalidShapeError): + empty_dataset.append_row(embedding=value) + + values = empty_dataset["embedding"] + assert len(values) == len(valid_values) + assert all(value.shape == (length,) for value in values) + assert all(value.dtype == np.float32 for value in values) + + +def test_default_zero_length(empty_dataset: spotlight.Dataset) -> None: + """ + Test default embedding column creation and afterwards filling row-by-row + with values of length 0. + """ + empty_dataset.append_embedding_column("embedding") + assert empty_dataset.get_dtype("embedding") == dtypes.embedding_dtype + + valid_values: tuple = ([], (), range(0), np.array([])) + for value in valid_values: + empty_dataset.append_row(embedding=value) + assert empty_dataset.get_dtype("embedding") == dtypes.EmbeddingDType(length=0) + + invalid_values: tuple = ([1], (1, 2), range(5), np.zeros(10)) + for value in invalid_values: + with pytest.raises(InvalidShapeError): + empty_dataset.append_row(embedding=value) + + values = empty_dataset["embedding"] + assert len(values) == len(valid_values) + assert all(value is None for value in values) + + +@pytest.mark.parametrize("length", [1, 2, 8]) +def test_default_with_values(empty_dataset: spotlight.Dataset, length: int) -> None: + """ + Test default embedding column creation with given values and afterwards + filling row-by-row. + """ + valid_values = ( + [0] * length, + range(length), + tuple(range(length)), + np.ones(length), + np.full(length, np.nan), + ) + empty_dataset.append_embedding_column("embedding", values=valid_values) + assert empty_dataset.get_dtype("embedding") == dtypes.EmbeddingDType(length=length) + + for value in valid_values: + empty_dataset.append_row(embedding=value) + assert empty_dataset.get_dtype("embedding") == dtypes.EmbeddingDType( + length=length + ) + + invalid_values: tuple = ([], range(length + 1)) + for value in invalid_values: + with pytest.raises(InvalidShapeError): + empty_dataset.append_row(embedding=value) + + values = empty_dataset["embedding"] + assert len(values) == len(valid_values) * 2 + assert all(value.shape == (length,) for value in values) + assert all(value.dtype == np.float32 for value in values) + + +def test_default_with_values_zero_length(empty_dataset: spotlight.Dataset) -> None: + """ + Test default embedding column creation with given values and afterwards + filling row-by-row with values of length 0. + """ + valid_values: tuple = ([], (), range(0), np.array([])) + empty_dataset.append_embedding_column("embedding", values=valid_values) + assert empty_dataset.get_dtype("embedding") == dtypes.EmbeddingDType(length=0) + + for value in valid_values: + empty_dataset.append_row(embedding=value) + assert empty_dataset.get_dtype("embedding") == dtypes.EmbeddingDType(length=0) + + invalid_values: tuple = ([1], (1, 2), range(5), np.zeros(10)) + for value in invalid_values: + with pytest.raises(InvalidShapeError): + empty_dataset.append_row(embedding=value) + + values = empty_dataset["embedding"] + assert len(values) == len(valid_values) * 2 + assert all(embedding is None for embedding in values) + + +@pytest.mark.parametrize("length", [1, 2, 8]) +def test_optional(empty_dataset: spotlight.Dataset, length: int) -> None: + """ + Test optional embedding column creation and afterwards filling row-by-row. + """ + empty_dataset.append_embedding_column("embedding", optional=True) + assert empty_dataset.get_dtype("embedding") == dtypes.embedding_dtype + + valid_values: tuple = ( + [0] * length, + range(length), + tuple(range(length)), + np.ones(length), + np.full(length, np.nan), + None, + [], + (), + range(0), + np.array([]), + ) + for value in valid_values: + empty_dataset.append_row(embedding=value) + assert empty_dataset.get_dtype("embedding") == dtypes.EmbeddingDType( + length=length + ) + + invalid_values: tuple = (range(length + 1), np.full(length + 1, np.nan)) + for value in invalid_values: + with pytest.raises(InvalidShapeError): + empty_dataset.append_row(embedding=value) + + values = empty_dataset["embedding"] + assert len(values) == len(valid_values) + none_mask = np.array([False] * 5 + [True] * 5) + assert all(value is None for value in values[none_mask]) + assert all(value.shape == (length,) for value in values[~none_mask]) + assert all(value.dtype == np.float32 for value in values[~none_mask]) + + +def test_optional_zero_length(empty_dataset: spotlight.Dataset) -> None: + """ + Test optional embedding column creation and afterwards filling row-by-row + with values of length 0. + """ + empty_dataset.append_embedding_column("embedding", optional=True) + assert empty_dataset.get_dtype("embedding") == dtypes.embedding_dtype + + valid_values: tuple = ( + None, + [], + (), + range(0), + np.array([]), + ) + for value in valid_values: + empty_dataset.append_row(embedding=value) + assert empty_dataset.get_dtype("embedding") == dtypes.embedding_dtype + + values = empty_dataset["embedding"] + assert len(values) == len(valid_values) + assert all(value is None for value in values) + + empty_dataset.append_row(embedding=range(5)) + assert empty_dataset.get_dtype("embedding") == dtypes.EmbeddingDType(length=5) + + +@pytest.mark.parametrize("length", [1, 2, 8]) +def test_length(empty_dataset: spotlight.Dataset, length: int) -> None: + """ + Test default embedding with length column creation and afterwards filling + row-by-row. + """ + empty_dataset.append_embedding_column("embedding", length=length) + assert empty_dataset.get_dtype("embedding") == dtypes.EmbeddingDType(length=length) + + valid_values = ( + [0] * length, + range(length), + tuple(range(length)), + np.ones(length), + np.full(length, np.nan), + ) + for value in valid_values: + empty_dataset.append_row(embedding=value) + assert empty_dataset.get_dtype("embedding") == dtypes.EmbeddingDType( + length=length + ) + + invalid_values: tuple = ([], range(length + 1)) + for value in invalid_values: + with pytest.raises(InvalidShapeError): + empty_dataset.append_row(embedding=value) + with pytest.raises(InvalidDTypeError): + empty_dataset.append_row(embedding=None) + + values = empty_dataset["embedding"] + assert len(values) == len(valid_values) + assert all(value.shape == (length,) for value in values) + assert all(value.dtype == np.float32 for value in values) + + +def test_zero_length(empty_dataset: spotlight.Dataset) -> None: + """ + Test default embedding with length of 0 column creation and afterwards + filling row-by-row. + """ + empty_dataset.append_embedding_column("embedding", length=0) + assert empty_dataset.get_dtype("embedding") == dtypes.EmbeddingDType(length=0) + + valid_values: tuple = ([], (), range(0), np.array([])) + for value in valid_values: + empty_dataset.append_row(embedding=value) + assert empty_dataset.get_dtype("embedding") == dtypes.EmbeddingDType(length=0) + + invalid_values: tuple = ([1], (1, 2), range(5), np.zeros(10)) + for value in invalid_values: + with pytest.raises(InvalidShapeError): + empty_dataset.append_row(embedding=value) + with pytest.raises(InvalidDTypeError): + empty_dataset.append_row(embedding=None) + + values = empty_dataset["embedding"] + assert len(values) == len(valid_values) + assert all(value is None for value in values) + + +@pytest.mark.parametrize("length", [1, 2, 8]) +@pytest.mark.parametrize("np_dtype", [np.float16, np.float32, np.float64]) +def test_dtype( + empty_dataset: spotlight.Dataset, length: int, np_dtype: np.dtype +) -> None: + """ + Test embedding column creation with dtype and afterwards filling row-by-row. + """ + empty_dataset.append_embedding_column("embedding", dtype=np_dtype) + assert empty_dataset.get_dtype("embedding") == dtypes.embedding_dtype + + valid_values = ( + [0] * length, + range(length), + tuple(range(length)), + np.ones(length), + np.full(length, np.nan), + ) + for value in valid_values: + empty_dataset.append_row(embedding=value) + assert empty_dataset.get_dtype("embedding") == dtypes.EmbeddingDType( + length=length + ) + + values = empty_dataset["embedding"] + assert len(values) == len(valid_values) + assert all(value.shape == (length,) for value in values) + assert all(value.dtype == np_dtype for value in values) + + +@pytest.mark.parametrize("length", [1, 2, 8]) +def test_generic(empty_dataset: spotlight.Dataset, length: int) -> None: + """ + Test generic embedding column creation and afterwards filling row-by-row. + """ + dtype = dtypes.EmbeddingDType(length=length) + empty_dataset.append_column("embedding", dtype) + assert empty_dataset.get_dtype("embedding") == dtype + + valid_values = ( + [0] * length, + range(length), + tuple(range(length)), + np.ones(length), + np.full(length, np.nan), + ) + for value in valid_values: + empty_dataset.append_row(embedding=value) + assert empty_dataset.get_dtype("embedding") == dtype + + invalid_values: tuple = ([], range(length + 1)) + for value in invalid_values: + with pytest.raises(InvalidShapeError): + empty_dataset.append_row(embedding=value) + with pytest.raises(InvalidDTypeError): + empty_dataset.append_row(embedding=None) + + values = empty_dataset["embedding"] + assert len(values) == len(valid_values) + assert all(value.shape == (length,) for value in values) + assert all(value.dtype == np.float32 for value in values)