Skip to content

Commit

Permalink
Cleanup
Browse files Browse the repository at this point in the history
* move functions from `helper` that were only used in `inhibitory_neuron_densities_optimization`
* removed unused `with_descendants` argument from `_compute_region_cell_counts`
* made `compute_region_volumes` faster
  • Loading branch information
mgeplf committed Mar 1, 2024
1 parent 6e76e8c commit aeec00b
Show file tree
Hide file tree
Showing 6 changed files with 213 additions and 242 deletions.
123 changes: 1 addition & 122 deletions atlas_densities/densities/inhibitory_neuron_densities_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,12 @@
"""

import logging
from typing import List, Optional, Set, Tuple
from typing import List

import numpy as np
import pandas as pd
from atlas_commons.typing import FloatArray

from atlas_densities.exceptions import AtlasDensitiesError

L = logging.getLogger(__name__)
MinMaxPair = Tuple[float, Optional[float]]


def average_densities_to_cell_counts(
Expand All @@ -34,11 +30,6 @@ def average_densities_to_cell_counts(
:func:`atlas_densities.densities.utils.compute_region_volumes`.
The volumes to be used are stored in the "volume" column.
Note: Volumes can refer to volumes of entire regions, including all descendants
subregions, or volumes of single region identifiers, depending on the value of the
option `with_descendants` used when creating `region_volumes`.
Returns:
data frame containing the cell counts of brain regions and their associated standard
deviations. The data frame index is `average_densities.index` (brain region names) and
Expand All @@ -54,35 +45,6 @@ def average_densities_to_cell_counts(
return result


def resize_average_densities(
average_densities: pd.DataFrame, hierarchy_info: pd.DataFrame
) -> pd.DataFrame:
"""
Resize the `average_densities` data frame in such a way that the resized index coincides with
`hierarchy_info["brain_region"]`.
Missing entries are set to ``np.nan``.
average_densities: a data frame whose columns are described in
:func:`atlas_densities.densities.fitting.linear_fitting` containing the average
cell densities of brain regions and their associated standard deviations. Columns are
labelled by T and T_standard_deviation for various cell types T. The index of
`average_densities` is a list of region names.
hierarchy_info: data frame returned by
:func:`atlas_densities.densities.utils.get_hierarchy_info`.
Returns: a data frame containing all the entries of `average_densities` and whose index is
`hierarchy_info["brain_region"]`. New entries are set to ``np.nan``.
"""
resized = pd.DataFrame(
np.nan, index=hierarchy_info["brain_region"], columns=average_densities.columns
)
resized.loc[average_densities.index] = average_densities

return resized


def get_cell_types(data_frame: pd.DataFrame) -> List[str]:
"""
Extract cell types from column labels.
Expand All @@ -96,86 +58,3 @@ def get_cell_types(data_frame: pd.DataFrame) -> List[str]:
"""

return sorted({column.replace("_standard_deviation", "") for column in data_frame.columns})


def check_region_counts_consistency(
region_counts: pd.DataFrame, hierarchy_info: pd.DataFrame, tolerance: float = 0.0
) -> None:
"""
Check that cell counts considered as certain are consistent across the brain regions hierarchy.
Args:
region_counts: data frame returned by
:fun:`densities.inhibitory_densities_helper.average_densities_to_cell_counts`.
A region is understood as a region of the brain hierarchy and includes all descendant
subregions.
hierarchy_info: data frame returned by
:func:`atlas_densities.densities.utils.get_hierarchy_info`.
tolerance: non-negative float number used as tolerance when comparing counts.
Defaults to 0.0.
Raises:
AtlasDensitiesError if the sum of the cell counts of some leaf regions, all given with
certainty, exceeds the cell count of an ancestor, also given with certainty.
"""

def _check_descendants_consistency(
region_counts, hierarchy_info, region_name: str, id_set: Set[int], cell_type: str
):
if region_counts.at[region_name, f"{cell_type}_standard_deviation"] == 0.0:
count = region_counts.at[region_name, cell_type]
descendants_names = hierarchy_info.loc[list(id_set), "brain_region"]
zero_std_mask = (
region_counts.loc[descendants_names, f"{cell_type}_standard_deviation"] == 0.0
)
mask = zero_std_mask & (
region_counts.loc[descendants_names, cell_type] > count + tolerance
)
descendants_counts = region_counts.loc[descendants_names, cell_type][mask]
if not descendants_counts.empty:
raise AtlasDensitiesError(
f"Counts of {cell_type} cells in regions {list(descendants_names)} all exceed "
f"the count of its ancestor region {region_name} and each count is given "
f"for certain. The counts {descendants_counts} are all larger than "
f"{count}."
)
names_with_certainty = descendants_names[zero_std_mask.to_list()].to_list()
leaf_names = [
hierarchy_info.at[id_, "brain_region"]
for id_ in id_set
if len(hierarchy_info.at[id_, "descendant_id_set"]) == 1
and hierarchy_info.at[id_, "brain_region"] in names_with_certainty
]
leaves_count_sum = np.sum(region_counts.loc[leaf_names, cell_type])
if leaves_count_sum > count + tolerance:
raise AtlasDensitiesError(
f"The sum of the counts of {cell_type} cells in leaf regions",
f" which are given with certainty, exceeds the count of the ancestor"
f" region {region_name}, also given with certainty: "
f"{leaves_count_sum} > {count}.",
)

cell_types = get_cell_types(region_counts)
for region_name, id_set in zip(
hierarchy_info["brain_region"], hierarchy_info["descendant_id_set"]
):
for cell_type in cell_types:
_check_descendants_consistency(
region_counts, hierarchy_info, region_name, id_set, cell_type
)


def replace_inf_with_none(bounds: FloatArray) -> List[MinMaxPair]:
"""
Replace the upper bounds equal to ``np.inf`` by None so as to comply with
the `bounds` interface of scipy.optimize.linprog.
Args:
bounds: float array of shape (N, 2). Values in `bounds[..., 1]` can be
``np.inf`` to indicate the absence of a constraining upper bound.
Return:
list of pairs (min, max) where min is a float and max either a float or None.
"""
return [(float(min_), None if np.isinf(max_) else float(max_)) for min_, max_ in bounds]
139 changes: 119 additions & 20 deletions atlas_densities/densities/inhibitory_neuron_densities_optimization.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,11 @@
variable deltas used in this module corresponds to the decision variables ``delta_{r, m}`` of
the pdf file.
"""
from __future__ import annotations

import logging
import warnings
from typing import Dict, List, Tuple
from typing import Dict, List, Optional, Set, Tuple

import numpy as np
import pandas as pd
Expand All @@ -51,25 +52,134 @@
from atlas_densities.densities import utils
from atlas_densities.densities.inhibitory_neuron_densities_helper import (
average_densities_to_cell_counts,
check_region_counts_consistency,
get_cell_types,
replace_inf_with_none,
resize_average_densities,
)
from atlas_densities.exceptions import AtlasDensitiesError, AtlasDensitiesWarning

L = logging.getLogger(__name__)

SKIP = np.inf # Used to signify that a delta variable of the linear program should be removed
KEEP = np.nan # Used to signify that a delta variable should be added to the linear program
MinMaxPair = Tuple[float, Optional[float]]


def _resize_average_densities(
average_densities: pd.DataFrame, hierarchy_info: pd.DataFrame
) -> pd.DataFrame:
"""
Resize the `average_densities` data frame in such a way that the resized index coincides with
`hierarchy_info["brain_region"]`.
Missing entries are set to ``np.nan``.
average_densities: a data frame whose columns are described in
:func:`atlas_densities.densities.fitting.linear_fitting` containing the average
cell densities of brain regions and their associated standard deviations. Columns are
labelled by T and T_standard_deviation for various cell types T. The index of
`average_densities` is a list of region names.
hierarchy_info: data frame returned by
:func:`atlas_densities.densities.utils.get_hierarchy_info`.
Returns: a data frame containing all the entries of `average_densities` and whose index is
`hierarchy_info["brain_region"]`. New entries are set to ``np.nan``.
"""
resized = pd.DataFrame(
np.nan, index=hierarchy_info["brain_region"], columns=average_densities.columns
)
resized.loc[average_densities.index] = average_densities

return resized


def _check_region_counts_consistency(
region_counts: pd.DataFrame, hierarchy_info: pd.DataFrame, tolerance: float = 0.0
) -> None:
"""
Check that cell counts considered as certain are consistent across the brain regions hierarchy.
Args:
region_counts: data frame returned by
:fun:`densities.inhibitory_densities_helper.average_densities_to_cell_counts`.
A region is understood as a region of the brain hierarchy and includes all descendant
subregions.
hierarchy_info: data frame returned by
:func:`atlas_densities.densities.utils.get_hierarchy_info`.
tolerance: non-negative float number used as tolerance when comparing counts.
Defaults to 0.0.
Raises:
AtlasDensitiesError if the sum of the cell counts of some leaf regions, all given with
certainty, exceeds the cell count of an ancestor, also given with certainty.
"""

def _check_descendants_consistency(
region_counts, hierarchy_info, region_name: str, id_set: Set[int], cell_type: str
):
if region_counts.at[region_name, f"{cell_type}_standard_deviation"] == 0.0:
count = region_counts.at[region_name, cell_type]
descendants_names = hierarchy_info.loc[list(id_set), "brain_region"]
zero_std_mask = (
region_counts.loc[descendants_names, f"{cell_type}_standard_deviation"] == 0.0
)
mask = zero_std_mask & (
region_counts.loc[descendants_names, cell_type] > count + tolerance
)
descendants_counts = region_counts.loc[descendants_names, cell_type][mask]
if not descendants_counts.empty:
raise AtlasDensitiesError(
f"Counts of {cell_type} cells in regions {list(descendants_names)} all exceed "
f"the count of its ancestor region {region_name} and each count is given "
f"for certain. The counts {descendants_counts} are all larger than "
f"{count}."
)
names_with_certainty = descendants_names[zero_std_mask.to_list()].to_list()
leaf_names = [
hierarchy_info.at[id_, "brain_region"]
for id_ in id_set
if len(hierarchy_info.at[id_, "descendant_id_set"]) == 1
and hierarchy_info.at[id_, "brain_region"] in names_with_certainty
]
leaves_count_sum = np.sum(region_counts.loc[leaf_names, cell_type])
if leaves_count_sum > count + tolerance:
raise AtlasDensitiesError(
f"The sum of the counts of {cell_type} cells in leaf regions",
f" which are given with certainty, exceeds the count of the ancestor"
f" region {region_name}, also given with certainty: "
f"{leaves_count_sum} > {count}.",
)

cell_types = get_cell_types(region_counts)
for region_name, id_set in zip(
hierarchy_info["brain_region"], hierarchy_info["descendant_id_set"]
):
for cell_type in cell_types:
_check_descendants_consistency(
region_counts, hierarchy_info, region_name, id_set, cell_type
)


def _replace_inf_with_none(bounds: FloatArray) -> List[MinMaxPair]:
"""
Replace the upper bounds equal to ``np.inf`` by None so as to comply with
the `bounds` interface of scipy.optimize.linprog.
Args:
bounds: float array of shape (N, 2). Values in `bounds[..., 1]` can be
``np.inf`` to indicate the absence of a constraining upper bound.
Return:
list of pairs (min, max) where min is a float and max either a float or None.
"""
return [(float(min_), None if np.isinf(max_) else float(max_)) for min_, max_ in bounds]


def _compute_region_cell_counts(
annotation: AnnotationT,
density: FloatArray,
voxel_volume: float,
hierarchy_info: "pd.DataFrame",
with_descendants: bool = True,
) -> "pd.DataFrame":
"""
Compute the cell count of every 3D region in `annotation` labeled by a unique
Expand All @@ -85,10 +195,6 @@ def _compute_region_cell_counts(
This is (25 * 1e-6) ** 3 for an AIBS atlas nrrd file with 25um resolution.
hierarchy_info: data frame returned by
:func:`atlas_densities.densities.utils.get_hierarchy_info`.
with_descendants: if True, a computed cell count refers to the entire 3D region
designated by a region name, including all descendants subregions. Otherwise, the
computed cell count is the cell count of the 3D region labeled by the unique region
identifier. Defaults to True.
Returns:
DataFrame of the following form (values are fake):
Expand All @@ -108,13 +214,6 @@ def _compute_region_cell_counts(
index=hierarchy_info.index,
)

if with_descendants:
counts = []
for id_set in tqdm(hierarchy_info["descendant_id_set"]):
count = result.loc[list(id_set), "cell_count"].sum()
counts.append(count)
result["cell_count"] = counts

return result


Expand Down Expand Up @@ -492,7 +591,7 @@ def _compute_initial_cell_counts(

# Detect cell counts inconsistency between a region and its descendants when
# estimates are deemed as certain.
check_region_counts_consistency(region_counts, hierarchy_info)
_check_region_counts_consistency(region_counts, hierarchy_info)

L.info("Computing cell count estimates in atomic 3D region ...")
volumes.drop("volume", axis=1, inplace=True)
Expand Down Expand Up @@ -662,7 +761,7 @@ def create_inhibitory_neuron_densities( # pylint: disable=too-many-locals
"""

hierarchy_info = utils.get_hierarchy_info(RegionMap.from_dict(hierarchy), root=region_name)
average_densities = resize_average_densities(average_densities, hierarchy_info)
average_densities = _resize_average_densities(average_densities, hierarchy_info)

L.info("Initialization of the linear program: started")
region_counts, id_counts = _compute_initial_cell_counts(
Expand All @@ -671,7 +770,7 @@ def create_inhibitory_neuron_densities( # pylint: disable=too-many-locals

L.info("Retrieving overall neuron counts in atomic 3D regions ...")
neuron_counts = _compute_region_cell_counts(
annotation, neuron_density, voxel_volume, hierarchy_info, with_descendants=False
annotation, neuron_density, voxel_volume, hierarchy_info
)
assert np.all(neuron_counts["cell_count"] >= 0.0)

Expand Down Expand Up @@ -712,7 +811,7 @@ def create_inhibitory_neuron_densities( # pylint: disable=too-many-locals
c=c_row,
A_ub=a_ub,
b_ub=b_ub,
bounds=replace_inf_with_none(bounds),
bounds=_replace_inf_with_none(bounds),
method="highs",
)
if not result.success:
Expand Down
Loading

0 comments on commit aeec00b

Please sign in to comment.