Skip to content

Commit

Permalink
fix bug slice_hd with no dataset_id prefix
Browse files Browse the repository at this point in the history
  • Loading branch information
LucaMarconato committed Feb 3, 2025
1 parent ca8b9aa commit 70f5060
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 7 deletions.
10 changes: 9 additions & 1 deletion src/spatialdata_io/readers/visium.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ def visium(
fullres_image_file: str | Path | None = None,
tissue_positions_file: str | Path | None = None,
scalefactors_file: str | Path | None = None,
var_names_make_unique: bool = True,
imread_kwargs: Mapping[str, Any] = MappingProxyType({}),
image_models_kwargs: Mapping[str, Any] = MappingProxyType({}),
**kwargs: Any,
Expand Down Expand Up @@ -72,6 +73,8 @@ def visium(
Path to the tissue positions file.
scalefactors_file
Path to the scalefactors file.
var_names_make_unique
If `True`, call `.var_names_make_unique()` on each `AnnData` table.
imread_kwargs
Keyword arguments passed to :func:`dask_image.imread.imread`.
image_models_kwargs
Expand Down Expand Up @@ -114,7 +117,10 @@ def visium(
assert counts_file is not None

if library_id is None and dataset_id is None:
raise ValueError("Cannot determine the `library_id`. Please provide `dataset_id`.")
raise ValueError(
"Cannot determine the `library_id`. Please provide `dataset_id`; the `dataset_id` value will be used to "
"name the elements in the `SpatialData` object."
)

if dataset_id is not None:
if dataset_id != library_id and library_id is not None:
Expand Down Expand Up @@ -210,6 +216,8 @@ def visium(
shapes[dataset_id] = circles
adata.obs["region"] = dataset_id
table = TableModel.parse(adata, region=dataset_id, region_key="region", instance_key="spot_id")
if var_names_make_unique:
table.var_names_make_unique()

images = {}
if fullres_image_file is not None:
Expand Down
26 changes: 20 additions & 6 deletions src/spatialdata_io/readers/visium_hd.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ def visium_hd(
annotate_table_by_labels: bool = False,
fullres_image_file: str | Path | None = None,
load_all_images: bool = False,
var_names_make_unique: bool = True,
imread_kwargs: Mapping[str, Any] = MappingProxyType({}),
image_models_kwargs: Mapping[str, Any] = MappingProxyType({}),
anndata_kwargs: Mapping[str, Any] = MappingProxyType({}),
Expand All @@ -61,7 +62,8 @@ def visium_hd(
path
Path to directory containing the *10x Genomics* Visium HD output.
dataset_id
Unique identifier of the dataset. If `None`, it tries to infer it from the file name of the feature slice file.
Unique identifier of the dataset, used to name the elements of the `SpatialData` object. If `None`, it tries to
infer it from the file name of the feature slice file.
filtered_counts_file
It sets the value of `counts_file` to ``{vx.FILTERED_COUNTS_FILE!r}`` (when `True`) or to
``{vx.RAW_COUNTS_FILE!r}`` (when `False`).
Expand All @@ -80,6 +82,8 @@ def visium_hd(
load_all_images
If `False`, load only the full resolution, high resolution and low resolution images. If `True`, also the
following images: ``{vx.IMAGE_CYTASSIST!r}``.
var_names_make_unique
If `True`, call `.var_names_make_unique()` on each `AnnData` table.
imread_kwargs
Keyword arguments for :func:`imageio.imread`.
image_models_kwargs
Expand All @@ -101,10 +105,7 @@ def visium_hd(
if dataset_id is None:
dataset_id = _infer_dataset_id(path)

if dataset_id == "":
filename_prefix = ""
else:
filename_prefix = f"{dataset_id}_"
filename_prefix = _get_filename_prefix(path, dataset_id)

def load_image(path: Path, suffix: str, scale_factors: list[int] | None = None) -> None:
_load_image(
Expand Down Expand Up @@ -269,6 +270,8 @@ def _get_bins(path_bins: Path) -> list[str]:
region_key=str(VisiumHDKeys.REGION_KEY),
instance_key=str(VisiumHDKeys.INSTANCE_KEY),
)
if var_names_make_unique:
tables[bin_size_str].var_names_make_unique()

# read full resolution image
if fullres_image_file is not None:
Expand Down Expand Up @@ -392,7 +395,8 @@ def _infer_dataset_id(path: Path) -> str:
files = [file.name for file in path.iterdir() if file.is_file() and file.name.endswith(suffix)]
if len(files) == 0 or len(files) > 1:
raise ValueError(
f"Cannot infer `dataset_id` from the feature slice file in {path}, please pass `dataset_id` as an argument."
f"Cannot infer `dataset_id` from the feature slice file in {path}, please pass `dataset_id` as an "
f"argument. The `dataset_id` value will be used to name the elements in the `SpatialData` object."
)
return files[0].replace(suffix, "")

Expand Down Expand Up @@ -444,6 +448,16 @@ def _get_affine(coefficients: list[int]) -> Affine:
return Affine(matrix, input_axes=("x", "y"), output_axes=("x", "y"))


def _get_filename_prefix(path: Path, dataset_id: str) -> str:
if (path / f"{dataset_id}_{VisiumHDKeys.FEATURE_SLICE_FILE.value}").exists():
return f"{dataset_id}_"
assert (path / VisiumHDKeys.FEATURE_SLICE_FILE.value).exists(), (
f"Cannot locate the feature slice file, please ensure the file is present in the {path} directory and/or adjust"
"the `dataset_id` parameter"
)
return ""


def _parse_metadata(path: Path, filename_prefix: str) -> tuple[dict[str, Any], dict[str, Any]]:
with h5py.File(path / f"{filename_prefix}{VisiumHDKeys.FEATURE_SLICE_FILE.value}", "r") as f5:
metadata = json.loads(dict(f5.attrs)[VisiumHDKeys.METADATA_JSON])
Expand Down

0 comments on commit 70f5060

Please sign in to comment.