From 81fd14c807ee770af20eb82b404928689c6edbd4 Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Thu, 21 Oct 2021 12:26:21 +0200 Subject: [PATCH 1/4] Add methods to get padded and ragged arrays to FlattenedStorage --- pyiron_base/generic/flattenedstorage.py | 53 +++++++++++++++++++++++++ tests/generic/test_flattenedstorage.py | 27 +++++++++++++ 2 files changed, 80 insertions(+) diff --git a/pyiron_base/generic/flattenedstorage.py b/pyiron_base/generic/flattenedstorage.py index 2af2076d6..141e29a3e 100644 --- a/pyiron_base/generic/flattenedstorage.py +++ b/pyiron_base/generic/flattenedstorage.py @@ -339,6 +339,59 @@ def get_array(self, name, frame=None): else: raise KeyError(f"no array named {name}") + def get_array_ragged(self, name): + """ + Return elements of array `name` in all chunks. Values are returned in a ragged array of dtype=object. + + If `name` specifies a per chunk array, there's nothing to pad and this method is equivalent to + :method:`.get_array`. + + Args: + name (str): name of array to fetch + + Returns: + numpy.ndarray, dtype=object: ragged arrray of all elements in all chunks + """ + if name in self._per_chunk_arrays: + return self._per_chunk_arrays[name].copy() + return np.array([self.get_array(name, i) for i in range(len(self))], + dtype=object) + + def get_array_filled(self, name): + """ + Return elements of array `name` in all chunks. Arrays are padded to be all of the same length. + + The padding value depends on the datatpye of the array or can be configured via the `fill` parameter of + :method:`.add_array`. + + If `name` specifies a per chunk array, there's nothing to pad and this method is equivalent to + :method:`.get_array`. + + Args: + name (str): name of array to fetch + + Returns: + numpy.ndarray: padded arrray of all elements in all chunks + """ + if name in self._per_chunk_arrays: + return self._per_chunk_arrays[name].copy() + values = self.get_array_ragged(name) + max_len = self._per_chunk_arrays["length"].max() + def resize_and_pad(v): + l = len(v) + v = np.resize(v, max_len) + if name in self._fill_values: + fill = self._fill_values[name] + else: + fill = {np.dtype("int32"): -1, + np.dtype("int64"): -1, + np.dtype("float32"): np.nan, + np.dtype("float64"): np.nan, + }[self._per_element_arrays[name].dtype] + v[l:] = fill + return v + return np.array([ resize_and_pad(v) for v in values ]) + def set_array(self, name, frame, value): """ Add array for given structure. diff --git a/tests/generic/test_flattenedstorage.py b/tests/generic/test_flattenedstorage.py index 89b375bc6..03f63734e 100644 --- a/tests/generic/test_flattenedstorage.py +++ b/tests/generic/test_flattenedstorage.py @@ -192,6 +192,33 @@ def test_get_array_full(self): self.assertTrue(np.array_equal(chunk, [-1, -2, -3]), f"get_array return did not return correct flat array, but {chunk}.") + def test_get_array_filled(self): + """get_array_filled should return a padded array of all elements in the storage.""" + + store = FlattenedStorage(elem=[ [1], [2, 3], [4, 5, 6] ], chunk=[-1, -2, -3]) + store.add_array("fill", fill=23.42) + store.set_array("fill", 0, [-1]) + store.set_array("fill", 1, [-2, -3]) + store.set_array("fill", 2, [-4, -5, -6]) + val = store.get_array_filled("elem") + self.assertEqual(val.shape, (3, 3), "shape not correct!") + self.assertTrue(np.array_equal(val, [[1, -1, -1], [2, 3, -1], [4, 5, 6]]), + "values in returned array not the same as in original array!") + self.assertEqual(store.get_array_filled("fill")[0, 1], 23.42, + "incorrect fill value!") + + def test_get_array_ragged(self): + """get_array_ragged should return a raggend array of all elements in the storage.""" + + store = FlattenedStorage(elem=[ [1], [2, 3], [4, 5, 6] ], chunk=[-1, -2, -3]) + val = store.get_array_ragged("elem") + self.assertEqual(val.shape, (3,), "shape not correct!") + for i, v in enumerate(val): + self.assertEqual(len(v), store._per_chunk_arrays["length"][i], + f"array {i} has incorrect length!") + self.assertTrue(np.array_equal(v, [[1], [2, 3], [4, 5, 6]][i]), + f"array {i} has incorrect values, {v}!") + def test_has_array(self): """hasarray should return correct information for added array; None otherwise.""" From 4c09aec1061c5c3c3534f7b2366ebd99bbe52a1d Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Thu, 21 Oct 2021 13:20:57 +0200 Subject: [PATCH 2/4] Fix resizing for arrays with shape!=() & more tests --- pyiron_base/generic/flattenedstorage.py | 8 +++++--- tests/generic/test_flattenedstorage.py | 19 +++++++++++++++++++ 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/pyiron_base/generic/flattenedstorage.py b/pyiron_base/generic/flattenedstorage.py index 141e29a3e..bce39bfec 100644 --- a/pyiron_base/generic/flattenedstorage.py +++ b/pyiron_base/generic/flattenedstorage.py @@ -353,7 +353,7 @@ def get_array_ragged(self, name): numpy.ndarray, dtype=object: ragged arrray of all elements in all chunks """ if name in self._per_chunk_arrays: - return self._per_chunk_arrays[name].copy() + return self.get_array(name) return np.array([self.get_array(name, i) for i in range(len(self))], dtype=object) @@ -374,12 +374,14 @@ def get_array_filled(self, name): numpy.ndarray: padded arrray of all elements in all chunks """ if name in self._per_chunk_arrays: - return self._per_chunk_arrays[name].copy() + return self.get_array(name) values = self.get_array_ragged(name) max_len = self._per_chunk_arrays["length"].max() def resize_and_pad(v): l = len(v) - v = np.resize(v, max_len) + per_shape = self._per_element_arrays[name].shape[1:] + v = np.resize(v, max_len * np.prod(per_shape, dtype=int)) + v = v.reshape((max_len,) + per_shape) if name in self._fill_values: fill = self._fill_values[name] else: diff --git a/tests/generic/test_flattenedstorage.py b/tests/generic/test_flattenedstorage.py index 03f63734e..2f4202a4b 100644 --- a/tests/generic/test_flattenedstorage.py +++ b/tests/generic/test_flattenedstorage.py @@ -200,12 +200,27 @@ def test_get_array_filled(self): store.set_array("fill", 0, [-1]) store.set_array("fill", 1, [-2, -3]) store.set_array("fill", 2, [-4, -5, -6]) + store.add_array("complex", shape=(3,), dtype=np.float64) + store.set_array("complex", 0, [ [1, 1, 1] ]) + store.set_array("complex", 1, [ [2, 2, 2], + [2, 2, 2], + ]) + store.set_array("complex", 2, [ [3, 3, 3], + [3, 3, 3], + [3, 3, 3], + ]) val = store.get_array_filled("elem") self.assertEqual(val.shape, (3, 3), "shape not correct!") self.assertTrue(np.array_equal(val, [[1, -1, -1], [2, 3, -1], [4, 5, 6]]), "values in returned array not the same as in original array!") self.assertEqual(store.get_array_filled("fill")[0, 1], 23.42, "incorrect fill value!") + val = store.get_array_filled("complex") + self.assertEqual(val.shape, (3, 3, 3), "shape not correct!") + self.assertTrue(np.array_equal( + store.get_array("chunk"), + store.get_array_filled("chunk"), + ), "get_array_filled does not give same result as get_array for per chunk array") def test_get_array_ragged(self): """get_array_ragged should return a raggend array of all elements in the storage.""" @@ -218,6 +233,10 @@ def test_get_array_ragged(self): f"array {i} has incorrect length!") self.assertTrue(np.array_equal(v, [[1], [2, 3], [4, 5, 6]][i]), f"array {i} has incorrect values, {v}!") + self.assertTrue(np.array_equal( + store.get_array("chunk"), + store.get_array_ragged("chunk"), + ), "get_array_ragged does not give same result as get_array for per chunk array") def test_has_array(self): """hasarray should return correct information for added array; None otherwise.""" From c0d37d9d709c5fa4d6d6745a61257c493de839a2 Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Fri, 22 Oct 2021 09:33:26 +0000 Subject: [PATCH 3/4] Add type hints Co-authored-by: Sudarsan Surendralal --- pyiron_base/generic/flattenedstorage.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyiron_base/generic/flattenedstorage.py b/pyiron_base/generic/flattenedstorage.py index bce39bfec..bb89731eb 100644 --- a/pyiron_base/generic/flattenedstorage.py +++ b/pyiron_base/generic/flattenedstorage.py @@ -339,7 +339,7 @@ def get_array(self, name, frame=None): else: raise KeyError(f"no array named {name}") - def get_array_ragged(self, name): + def get_array_ragged(self, name: str) -> np.ndarray: """ Return elements of array `name` in all chunks. Values are returned in a ragged array of dtype=object. @@ -357,7 +357,7 @@ def get_array_ragged(self, name): return np.array([self.get_array(name, i) for i in range(len(self))], dtype=object) - def get_array_filled(self, name): + def get_array_filled(self, name: str) -> np.ndarray: """ Return elements of array `name` in all chunks. Arrays are padded to be all of the same length. From 89dcb782fb4271ebfb61fef8261b385db8d573e9 Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Mon, 25 Oct 2021 10:53:23 +0200 Subject: [PATCH 4/4] Move default fill values to add_array Every array now has a default fill value associated --- pyiron_base/generic/flattenedstorage.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/pyiron_base/generic/flattenedstorage.py b/pyiron_base/generic/flattenedstorage.py index bb89731eb..372d4b43e 100644 --- a/pyiron_base/generic/flattenedstorage.py +++ b/pyiron_base/generic/flattenedstorage.py @@ -304,6 +304,16 @@ def add_array(self, name, shape=(), dtype=np.float64, fill=None, per="element"): store[name] = np.empty(shape=shape, dtype=dtype) else: store[name] = np.full(shape=shape, fill_value=fill, dtype=dtype) + + _default_fill_values = { + np.dtype("int32"): -1, + np.dtype("int64"): -1, + np.dtype("float32"): np.nan, + np.dtype("float64"): np.nan, + } + if fill is None and store[name].dtype in _default_fill_values: + fill = _default_fill_values[store[name].dtype] + if fill is not None: self._fill_values[name] = fill def get_array(self, name, frame=None): @@ -385,11 +395,7 @@ def resize_and_pad(v): if name in self._fill_values: fill = self._fill_values[name] else: - fill = {np.dtype("int32"): -1, - np.dtype("int64"): -1, - np.dtype("float32"): np.nan, - np.dtype("float64"): np.nan, - }[self._per_element_arrays[name].dtype] + fill = np.zeros(1, dtype=self._per_element_arrays[name].dtype)[0] v[l:] = fill return v return np.array([ resize_and_pad(v) for v in values ])