Skip to content

Commit

Permalink
Add methods to get padded and ragged arrays to FlattenedStorage
Browse files Browse the repository at this point in the history
  • Loading branch information
pmrv committed Oct 21, 2021
1 parent 4b5d0e2 commit 81fd14c
Show file tree
Hide file tree
Showing 2 changed files with 80 additions and 0 deletions.
53 changes: 53 additions & 0 deletions pyiron_base/generic/flattenedstorage.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,59 @@ def get_array(self, name, frame=None):
else:
raise KeyError(f"no array named {name}")

def get_array_ragged(self, name):
"""
Return elements of array `name` in all chunks. Values are returned in a ragged array of dtype=object.
If `name` specifies a per chunk array, there's nothing to pad and this method is equivalent to
:method:`.get_array`.
Args:
name (str): name of array to fetch
Returns:
numpy.ndarray, dtype=object: ragged arrray of all elements in all chunks
"""
if name in self._per_chunk_arrays:
return self._per_chunk_arrays[name].copy()
return np.array([self.get_array(name, i) for i in range(len(self))],
dtype=object)

def get_array_filled(self, name):
"""
Return elements of array `name` in all chunks. Arrays are padded to be all of the same length.
The padding value depends on the datatpye of the array or can be configured via the `fill` parameter of
:method:`.add_array`.
If `name` specifies a per chunk array, there's nothing to pad and this method is equivalent to
:method:`.get_array`.
Args:
name (str): name of array to fetch
Returns:
numpy.ndarray: padded arrray of all elements in all chunks
"""
if name in self._per_chunk_arrays:
return self._per_chunk_arrays[name].copy()
values = self.get_array_ragged(name)
max_len = self._per_chunk_arrays["length"].max()
def resize_and_pad(v):
l = len(v)
v = np.resize(v, max_len)
if name in self._fill_values:
fill = self._fill_values[name]
else:
fill = {np.dtype("int32"): -1,
np.dtype("int64"): -1,
np.dtype("float32"): np.nan,
np.dtype("float64"): np.nan,
}[self._per_element_arrays[name].dtype]
v[l:] = fill
return v
return np.array([ resize_and_pad(v) for v in values ])

def set_array(self, name, frame, value):
"""
Add array for given structure.
Expand Down
27 changes: 27 additions & 0 deletions tests/generic/test_flattenedstorage.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,33 @@ def test_get_array_full(self):
self.assertTrue(np.array_equal(chunk, [-1, -2, -3]),
f"get_array return did not return correct flat array, but {chunk}.")

def test_get_array_filled(self):
"""get_array_filled should return a padded array of all elements in the storage."""

store = FlattenedStorage(elem=[ [1], [2, 3], [4, 5, 6] ], chunk=[-1, -2, -3])
store.add_array("fill", fill=23.42)
store.set_array("fill", 0, [-1])
store.set_array("fill", 1, [-2, -3])
store.set_array("fill", 2, [-4, -5, -6])
val = store.get_array_filled("elem")
self.assertEqual(val.shape, (3, 3), "shape not correct!")
self.assertTrue(np.array_equal(val, [[1, -1, -1], [2, 3, -1], [4, 5, 6]]),
"values in returned array not the same as in original array!")
self.assertEqual(store.get_array_filled("fill")[0, 1], 23.42,
"incorrect fill value!")

def test_get_array_ragged(self):
"""get_array_ragged should return a raggend array of all elements in the storage."""

store = FlattenedStorage(elem=[ [1], [2, 3], [4, 5, 6] ], chunk=[-1, -2, -3])
val = store.get_array_ragged("elem")
self.assertEqual(val.shape, (3,), "shape not correct!")
for i, v in enumerate(val):
self.assertEqual(len(v), store._per_chunk_arrays["length"][i],
f"array {i} has incorrect length!")
self.assertTrue(np.array_equal(v, [[1], [2, 3], [4, 5, 6]][i]),
f"array {i} has incorrect values, {v}!")

def test_has_array(self):
"""hasarray should return correct information for added array; None otherwise."""

Expand Down

0 comments on commit 81fd14c

Please sign in to comment.