Source code for squidpy.datasets._datasets

"""Public dataset interface functions using hardcoded dataset names.

This module provides the public API for downloading squidpy datasets.
All functions fetch datasets by their known names from the registry.
"""

from __future__ import annotations

from dataclasses import dataclass
from pathlib import Path
from typing import TYPE_CHECKING, Any, Literal

from scanpy import settings

from squidpy.datasets._downloader import get_downloader
from squidpy.datasets._registry import DatasetType, get_registry
from squidpy.read._utils import PathLike

if TYPE_CHECKING:
    import spatialdata as sd
    from anndata import AnnData


# =============================================================================
# Hardcoded dataset name types
# =============================================================================

# 10x Genomics Visium datasets (visium_10x type)
VisiumDatasets = Literal[
    # spaceranger version 1.1.0 datasets
    "V1_Breast_Cancer_Block_A_Section_1",
    "V1_Breast_Cancer_Block_A_Section_2",
    "V1_Human_Heart",
    "V1_Human_Lymph_Node",
    "V1_Mouse_Kidney",
    "V1_Adult_Mouse_Brain",
    "V1_Mouse_Brain_Sagittal_Posterior",
    "V1_Mouse_Brain_Sagittal_Posterior_Section_2",
    "V1_Mouse_Brain_Sagittal_Anterior",
    "V1_Mouse_Brain_Sagittal_Anterior_Section_2",
    "V1_Human_Brain_Section_1",
    "V1_Human_Brain_Section_2",
    "V1_Adult_Mouse_Brain_Coronal_Section_1",
    "V1_Adult_Mouse_Brain_Coronal_Section_2",
    # spaceranger version 1.2.0 datasets
    "Targeted_Visium_Human_Cerebellum_Neuroscience",
    "Parent_Visium_Human_Cerebellum",
    "Targeted_Visium_Human_SpinalCord_Neuroscience",
    "Parent_Visium_Human_SpinalCord",
    "Targeted_Visium_Human_Glioblastoma_Pan_Cancer",
    "Parent_Visium_Human_Glioblastoma",
    "Targeted_Visium_Human_BreastCancer_Immunology",
    "Parent_Visium_Human_BreastCancer",
    "Targeted_Visium_Human_OvarianCancer_Pan_Cancer",
    "Targeted_Visium_Human_OvarianCancer_Immunology",
    "Parent_Visium_Human_OvarianCancer",
    "Targeted_Visium_Human_ColorectalCancer_GeneSignature",
    "Parent_Visium_Human_ColorectalCancer",
    # spaceranger version 1.3.0 datasets
    "Visium_FFPE_Mouse_Brain",
    "Visium_FFPE_Mouse_Brain_IF",
    "Visium_FFPE_Mouse_Kidney",
    "Visium_FFPE_Human_Breast_Cancer",
    "Visium_FFPE_Human_Prostate_Acinar_Cell_Carcinoma",
    "Visium_FFPE_Human_Prostate_Cancer",
    "Visium_FFPE_Human_Prostate_IF",
    "Visium_FFPE_Human_Normal_Prostate",
]

# AnnData datasets (.h5ad)
AnnDataDatasets = Literal[
    "four_i",
    "imc",
    "seqfish",
    "visium_hne_adata",
    "visium_hne_adata_crop",
    "visium_fluo_adata",
    "visium_fluo_adata_crop",
    "sc_mouse_cortex",
    "mibitof",
    "merfish",
    "slideseqv2",
]

# Image datasets (.tiff)
ImageDatasets = Literal[
    "visium_fluo_image_crop",
    "visium_hne_image_crop",
    "visium_hne_image",
]

# SpatialData datasets (.zarr)
SpatialDataDatasets = Literal["visium_hne_sdata", "cells"]


# =============================================================================
# 10x Genomics Visium functions
# =============================================================================


[docs] def visium( sample_id: VisiumDatasets, *, include_hires_tiff: bool = False, base_dir: PathLike | None = None, ) -> AnnData: """ Download Visium `datasets <https://support.10xgenomics.com/spatial-gene-expression/datasets>`_ from *10x Genomics*. Parameters ---------- sample_id Name of the Visium dataset. include_hires_tiff Whether to download the high-resolution tissue section into :attr:`anndata.AnnData.uns` ``['spatial']['{sample_id}']['metadata']['source_image_path']``. base_dir Directory where to download the data. If `None`, uses :attr:`scanpy.settings.datasetdir`. Returns ------- :class:`anndata.AnnData` Spatial AnnData object. """ # Validate sample_id against known names downloader = get_downloader() if sample_id not in downloader.registry: msg = f"Unknown Visium sample: {sample_id}. " msg += f"Available samples: {downloader.registry.visium_datasets}" raise ValueError(msg) # Use scanpy.settings.datasetdir/visium if base_dir not specified if base_dir is None: base_dir = Path(settings.datasetdir) / "visium" return downloader.download(sample_id, base_dir, include_hires_tiff=include_hires_tiff)
def visium_hne_sdata(folderpath: Path | str | None = None) -> sd.SpatialData: """ Download a Visium H&E dataset as a SpatialData object. Parameters ---------- folderpath A folder path where the dataset will be downloaded and extracted. If `None`, uses :attr:`scanpy.settings.datasetdir`. Returns ------- :class:`spatialdata.SpatialData` The downloaded and extracted Visium H&E dataset. """ downloader = get_downloader() return downloader.download("visium_hne_sdata", folderpath) def cells(folderpath: Path | str | None = None) -> sd.SpatialData: """ Download the cells dataset as a SpatialData object. Parameters ---------- folderpath A folder path where the dataset will be downloaded and extracted. If `None`, uses :attr:`scanpy.settings.datasetdir`. Returns ------- :class:`spatialdata.SpatialData` The downloaded and extracted cells dataset. """ downloader = get_downloader() return downloader.download("cells", folderpath) # ============================================================================= # Dataset loader factory # ============================================================================= @dataclass(frozen=True) class _DocParts: """Documentation parts for dataset loader functions.""" shape_prefix: str path_desc: str kwargs_desc: str return_type: str _ANNDATA_DOC = _DocParts( shape_prefix="The shape of this :class:`anndata.AnnData` object", path_desc="Path where to save the dataset.", kwargs_desc="Keyword arguments for ``anndata.read_h5ad``.", return_type=":class:`anndata.AnnData`\n The dataset.", ) _IMAGE_DOC = _DocParts( shape_prefix="The shape of this image is", path_desc="Path where to save the .tiff image.", kwargs_desc="Keyword arguments for :meth:`squidpy.im.ImageContainer.add_img`.", return_type=":class:`squidpy.im.ImageContainer`\n The image data.", ) _DOC_PARTS_BY_TYPE: dict[DatasetType, _DocParts] = { DatasetType.ANNDATA: _ANNDATA_DOC, DatasetType.IMAGE: _IMAGE_DOC, } def _make_loader(dataset_name: str): """Factory function to create dataset loader functions. Automatically derives documentation from the registry based on dataset type. """ entry = get_registry().get(dataset_name) if entry is None: raise ValueError(f"Unknown dataset: {dataset_name}") doc_parts = _DOC_PARTS_BY_TYPE.get(entry.type) if doc_parts is None: raise ValueError(f"Unsupported type for loader factory: {entry.type}") def loader(path: PathLike | None = None, **kwargs: Any): return get_downloader().download(dataset_name, path, **kwargs) loader.__doc__ = f""" {entry.doc_header} {doc_parts.shape_prefix} ``{entry.shape}``. Parameters ---------- path {doc_parts.path_desc} kwargs {doc_parts.kwargs_desc} Returns ------- {doc_parts.return_type} """ loader.__name__ = dataset_name return loader # AnnData datasets four_i = _make_loader("four_i") imc = _make_loader("imc") seqfish = _make_loader("seqfish") visium_hne_adata = _make_loader("visium_hne_adata") visium_fluo_adata = _make_loader("visium_fluo_adata") visium_hne_adata_crop = _make_loader("visium_hne_adata_crop") visium_fluo_adata_crop = _make_loader("visium_fluo_adata_crop") sc_mouse_cortex = _make_loader("sc_mouse_cortex") mibitof = _make_loader("mibitof") merfish = _make_loader("merfish") slideseqv2 = _make_loader("slideseqv2") # Image datasets visium_fluo_image_crop = _make_loader("visium_fluo_image_crop") visium_hne_image_crop = _make_loader("visium_hne_image_crop") visium_hne_image = _make_loader("visium_hne_image")