Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Operators work directly on raw arrays if most agents are active #843

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
115 changes: 98 additions & 17 deletions starsim/arrays.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,12 +233,47 @@ def __setitem__(self, key, value):
self.raw[key] = value
return

def __gt__(self, other): return self.asnew(self.values > other, cls=BoolArr)
def __lt__(self, other): return self.asnew(self.values < other, cls=BoolArr)
def __ge__(self, other): return self.asnew(self.values >= other, cls=BoolArr)
def __le__(self, other): return self.asnew(self.values <= other, cls=BoolArr)
def __eq__(self, other): return self.asnew(self.values == other, cls=BoolArr)
def __ne__(self, other): return self.asnew(self.values != other, cls=BoolArr)
def _use_raw(self):
"""Internal performance optimization for operators to use raw arrays if the active fraction is high

If most of the agents are active, then it's cheaper to perform logical operations on the inactive agents
than it is to work out which array indices to insert the result of those logical operations into. This
also removes the need to create an intermediate array, so there is a corresponding speedup there as well.

The current threshold of 0.5 is arbitrary and should eventually be adjusted based on a suite of use cases.
"""
return len(self)/len(self.raw) > 0.5

def __gt__(self, other):
if isinstance(other, Arr) and self._use_raw:
return self.asnew(raw = self.raw > other.raw, cls=BoolArr)
else:
return self.asnew(self.values > other, cls=BoolArr)
def __lt__(self, other):
if isinstance(other, Arr) and self._use_raw:
return self.asnew(raw = self.raw < other.raw, cls=BoolArr)
else:
return self.asnew(self.values < other, cls=BoolArr)
def __ge__(self, other):
if isinstance(other, Arr) and self._use_raw:
return self.asnew(raw = self.raw >= other.raw, cls=BoolArr)
else:
return self.asnew(self.values >= other, cls=BoolArr)
def __le__(self, other):
if isinstance(other, Arr) and self._use_raw:
return self.asnew(raw = self.raw <= other.raw, cls=BoolArr)
else:
return self.asnew(self.values <= other, cls=BoolArr)
def __eq__(self, other):
if isinstance(other, Arr) and self._use_raw:
return self.asnew(raw = self.raw == other.raw, cls=BoolArr)
else:
return self.asnew(self.values == other, cls=BoolArr)
def __ne__(self, other):
if isinstance(other, Arr) and self._use_raw:
return self.asnew(raw = self.raw != other.raw, cls=BoolArr)
else:
return self.asnew(self.values != other, cls=BoolArr)

def __and__(self, other): raise BooleanOperationError(self)
def __or__(self, other): raise BooleanOperationError(self)
Expand Down Expand Up @@ -333,21 +368,51 @@ def init_vals(self):
self.initialized = True
return

def asnew(self, arr=None, cls=None, name=None):
""" Duplicate and copy (rather than link) data, optionally resetting the array """
def asnew(self, arr=None, cls=None, name=None, raw=None):
"""
Duplicate and copy (rather than link) data, optionally resetting the array

The values in the new array are drawn from one of three options
- If a `raw` array is specified, it will be used for the new array directly (not copied)
- Otherwise, if `arr` is specified, it will be used for the values in the new array (for active agents only)
- Otherwise, a copy of the `raw` array for the Arr instance will be used

Passing in a `raw` array directly is a performance optimization when the calling code has
already generated a suitable array that does not need to be copied again.

Args:
arr: The array to use for values, with length matching the number of active UIDs
cls: The class to use for the new array
name: The name to use for the new array
raw: The raw array to use for the new array


"""
if cls is None:
cls = self.__class__
if arr is None:
arr = self.values

if raw is None and arr is None:
raw = self.raw.copy()

if raw is not None:
dtype = raw.dtype
else:
dtype = arr.dtype

new = object.__new__(cls) # Create a new Arr instance
new.__dict__ = self.__dict__.copy() # Copy pointers
new.dtype = arr.dtype # Set to correct dtype
new.dtype = dtype # Set to correct dtype
new.name = name # In most cases, the asnew Arr has different values to the original Arr so the original name no longer makes sense
new.raw = np.empty(new.raw.shape, dtype=new.dtype) # Copy values, breaking reference
new.raw[new.auids] = arr

if raw is not None:
new.raw = raw
else:
new.raw = np.empty(new.raw.shape, dtype=new.dtype) # Copy values, breaking reference
new.raw[new.auids] = arr
return new

def true(self):

""" Efficiently convert truthy values to UIDs """
return self.auids[self.values.astype(bool)]

Expand Down Expand Up @@ -405,10 +470,26 @@ def __init__(self, name=None, nan=False, **kwargs): # No good NaN equivalent for
super().__init__(name=name, dtype=ss_bool, nan=nan, **kwargs)
return

def __and__(self, other): return self.asnew(self.values & other)
def __or__(self, other): return self.asnew(self.values | other)
def __xor__(self, other): return self.asnew(self.values ^ other)
def __invert__(self): return self.asnew(~self.values)
def __and__(self, other):
if isinstance(other, Arr) and self._use_raw:
return self.asnew(raw = self.raw & other.raw)
else:
return self.asnew(self.values & other)
def __or__(self, other):
if isinstance(other, Arr) and self._use_raw:
return self.asnew(raw = self.raw | other.raw)
else:
return self.asnew(self.values | other)
def __xor__(self, other):
if isinstance(other, Arr) and self._use_raw:
return self.asnew(raw = self.raw ^ other.raw)
else:
return self.asnew(self.values ^ other)
def __invert__(self):
if self._use_raw:
return self.asnew(raw=~self.raw)
else:
return self.asnew(~self.values)

# BoolArr cannot store NaNs so report all entries as being not-NaN
@property
Expand Down
Loading