diff --git a/src/spatialdata_io/readers/visium.py b/src/spatialdata_io/readers/visium.py index 19bc740a..4bcdf5ab 100644 --- a/src/spatialdata_io/readers/visium.py +++ b/src/spatialdata_io/readers/visium.py @@ -33,6 +33,7 @@ def visium( fullres_image_file: str | Path | None = None, tissue_positions_file: str | Path | None = None, scalefactors_file: str | Path | None = None, + var_names_make_unique: bool = True, imread_kwargs: Mapping[str, Any] = MappingProxyType({}), image_models_kwargs: Mapping[str, Any] = MappingProxyType({}), **kwargs: Any, @@ -72,6 +73,8 @@ def visium( Path to the tissue positions file. scalefactors_file Path to the scalefactors file. + var_names_make_unique + If `True`, call `.var_names_make_unique()` on each `AnnData` table. imread_kwargs Keyword arguments passed to :func:`dask_image.imread.imread`. image_models_kwargs @@ -114,7 +117,10 @@ def visium( assert counts_file is not None if library_id is None and dataset_id is None: - raise ValueError("Cannot determine the `library_id`. Please provide `dataset_id`.") + raise ValueError( + "Cannot determine the `library_id`. Please provide `dataset_id`; the `dataset_id` value will be used to " + "name the elements in the `SpatialData` object." + ) if dataset_id is not None: if dataset_id != library_id and library_id is not None: @@ -210,6 +216,8 @@ def visium( shapes[dataset_id] = circles adata.obs["region"] = dataset_id table = TableModel.parse(adata, region=dataset_id, region_key="region", instance_key="spot_id") + if var_names_make_unique: + table.var_names_make_unique() images = {} if fullres_image_file is not None: diff --git a/src/spatialdata_io/readers/visium_hd.py b/src/spatialdata_io/readers/visium_hd.py index bef200ef..b15e3816 100644 --- a/src/spatialdata_io/readers/visium_hd.py +++ b/src/spatialdata_io/readers/visium_hd.py @@ -45,6 +45,7 @@ def visium_hd( annotate_table_by_labels: bool = False, fullres_image_file: str | Path | None = None, load_all_images: bool = False, + var_names_make_unique: bool = True, imread_kwargs: Mapping[str, Any] = MappingProxyType({}), image_models_kwargs: Mapping[str, Any] = MappingProxyType({}), anndata_kwargs: Mapping[str, Any] = MappingProxyType({}), @@ -61,7 +62,8 @@ def visium_hd( path Path to directory containing the *10x Genomics* Visium HD output. dataset_id - Unique identifier of the dataset. If `None`, it tries to infer it from the file name of the feature slice file. + Unique identifier of the dataset, used to name the elements of the `SpatialData` object. If `None`, it tries to + infer it from the file name of the feature slice file. filtered_counts_file It sets the value of `counts_file` to ``{vx.FILTERED_COUNTS_FILE!r}`` (when `True`) or to ``{vx.RAW_COUNTS_FILE!r}`` (when `False`). @@ -80,6 +82,8 @@ def visium_hd( load_all_images If `False`, load only the full resolution, high resolution and low resolution images. If `True`, also the following images: ``{vx.IMAGE_CYTASSIST!r}``. + var_names_make_unique + If `True`, call `.var_names_make_unique()` on each `AnnData` table. imread_kwargs Keyword arguments for :func:`imageio.imread`. image_models_kwargs @@ -101,10 +105,7 @@ def visium_hd( if dataset_id is None: dataset_id = _infer_dataset_id(path) - if dataset_id == "": - filename_prefix = "" - else: - filename_prefix = f"{dataset_id}_" + filename_prefix = _get_filename_prefix(path, dataset_id) def load_image(path: Path, suffix: str, scale_factors: list[int] | None = None) -> None: _load_image( @@ -269,6 +270,8 @@ def _get_bins(path_bins: Path) -> list[str]: region_key=str(VisiumHDKeys.REGION_KEY), instance_key=str(VisiumHDKeys.INSTANCE_KEY), ) + if var_names_make_unique: + tables[bin_size_str].var_names_make_unique() # read full resolution image if fullres_image_file is not None: @@ -392,7 +395,8 @@ def _infer_dataset_id(path: Path) -> str: files = [file.name for file in path.iterdir() if file.is_file() and file.name.endswith(suffix)] if len(files) == 0 or len(files) > 1: raise ValueError( - f"Cannot infer `dataset_id` from the feature slice file in {path}, please pass `dataset_id` as an argument." + f"Cannot infer `dataset_id` from the feature slice file in {path}, please pass `dataset_id` as an " + f"argument. The `dataset_id` value will be used to name the elements in the `SpatialData` object." ) return files[0].replace(suffix, "") @@ -444,6 +448,16 @@ def _get_affine(coefficients: list[int]) -> Affine: return Affine(matrix, input_axes=("x", "y"), output_axes=("x", "y")) +def _get_filename_prefix(path: Path, dataset_id: str) -> str: + if (path / f"{dataset_id}_{VisiumHDKeys.FEATURE_SLICE_FILE.value}").exists(): + return f"{dataset_id}_" + assert (path / VisiumHDKeys.FEATURE_SLICE_FILE.value).exists(), ( + f"Cannot locate the feature slice file, please ensure the file is present in the {path} directory and/or adjust" + "the `dataset_id` parameter" + ) + return "" + + def _parse_metadata(path: Path, filename_prefix: str) -> tuple[dict[str, Any], dict[str, Any]]: with h5py.File(path / f"{filename_prefix}{VisiumHDKeys.FEATURE_SLICE_FILE.value}", "r") as f5: metadata = json.loads(dict(f5.attrs)[VisiumHDKeys.METADATA_JSON])