diff --git a/.vscode/settings.json b/.vscode/settings.json index 17d70d9420..9f5e9f8407 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -19,6 +19,6 @@ "python.testing.pytestArgs": ["-vv", "--color=yes", "--internet-tests"], "python.testing.pytestEnabled": true, "python.terminal.activateEnvironment": true, - "python-envs.defaultEnvManager": "flying-sheep.hatch:hatch", - "python-envs.defaultPackageManager": "flying-sheep.hatch:hatch", + "python-envs.defaultEnvManager": "ms-python.python:venv", + "python-envs.defaultPackageManager": "ms-python.python:pip", } diff --git a/docs/api/deprecated.md b/docs/api/deprecated.md index ba6868d68a..a95ddda4d1 100644 --- a/docs/api/deprecated.md +++ b/docs/api/deprecated.md @@ -9,6 +9,7 @@ :nosignatures: :toctree: ../generated/ + read_visium pp.subsample tl.louvain logging.print_versions diff --git a/docs/api/io.md b/docs/api/io.md index 2254b30c39..a1b55b223f 100644 --- a/docs/api/io.md +++ b/docs/api/io.md @@ -42,7 +42,6 @@ Read 10x formatted hdf5 files and directories containing `.mtx` files using read_10x_h5 read_10x_mtx - read_visium ``` Read other formats using functions borrowed from {mod}`anndata` diff --git a/src/scanpy/_settings/presets.py b/src/scanpy/_settings/presets.py index bef0280b39..728631c904 100644 --- a/src/scanpy/_settings/presets.py +++ b/src/scanpy/_settings/presets.py @@ -69,6 +69,10 @@ def _get_value(self, preset: Preset) -> object: return getattr(params, param) +class Read10xPreset(NamedTuple): + layer: str | None + + class HVGPreset(NamedTuple): flavor: HVGFlavor return_df: bool @@ -165,6 +169,14 @@ def _generate_next_value_( ScanpyV2Preview = enum.auto() """: Scanpy 2.*’s feature default settings. (Preview: subject to change!)""" + @preset_property + def read_10x() -> Mapping[Preset, Read10xPreset]: + """Target for :func:`~scanpy.read_10x_h5` and :func:`~scanpy.read_10x_mtx`.""" + return { + Preset.ScanpyV1: Read10xPreset(layer=None), + Preset.ScanpyV2Preview: Read10xPreset(layer="counts"), + } + @preset_property def highly_variable_genes() -> Mapping[Preset, HVGPreset]: """Flavor for :func:`~scanpy.pp.highly_variable_genes`.""" diff --git a/src/scanpy/readwrite.py b/src/scanpy/readwrite.py index d01520f404..3be03be42b 100644 --- a/src/scanpy/readwrite.py +++ b/src/scanpy/readwrite.py @@ -4,7 +4,6 @@ import json import warnings -from functools import partial from pathlib import Path, PurePath from typing import TYPE_CHECKING, cast, get_args, overload @@ -44,10 +43,11 @@ if TYPE_CHECKING: from collections.abc import Callable from os import PathLike - from typing import IO, Literal + from typing import IO, Concatenate, Literal from numpy.typing import DTypeLike + # .gz and .bz2 suffixes are also allowed for text formats text_exts = { "csv", @@ -166,6 +166,7 @@ def read( def read_10x_h5( filename: PathLike[str] | str, + layer: str | None | Default = Default(preset=("read_10x", "layer")), *, genome: str | None = None, gex_only: bool = True, @@ -208,6 +209,8 @@ def read_10x_h5( """ path = Path(filename) + if isinstance(layer, Default): + layer = settings.preset.read_10x.layer start = logg.info(f"reading {path}") is_present = _check_datafile_present_and_download(path, backup_url=backup_url) if not is_present: @@ -221,7 +224,7 @@ def read_10x_h5( warnings.filterwarnings( "ignore", r".*names are not unique", UserWarning ) - adata = _read_10x_h5(path, _read_v3_10x_h5) + adata = _read_10x_h5(path, _read_v3_10x_h5, layer=layer) if genome: if genome not in adata.var["genome"].values: msg = ( @@ -235,16 +238,21 @@ def read_10x_h5( if adata.is_view: adata = adata.copy() else: - adata = _read_10x_h5(path, partial(_read_legacy_10x_h5, genome=genome)) + adata = _read_10x_h5(path, _read_legacy_10x_h5, layer=layer, genome=genome) logg.info("", time=start) return adata -def _read_10x_h5(path: Path, cb: Callable[[h5py.File], AnnData]) -> AnnData: +def _read_10x_h5[**P]( + path: Path, + cb: Callable[Concatenate[h5py.File, P], AnnData], + *args: P.args, + **kwargs: P.kwargs, +) -> AnnData: """Read hdf5 file from Cell Ranger v3 or later versions.""" with h5py.File(str(path), "r") as f: try: - return cb(f) + return cb(f, *args, **kwargs) except KeyError as e: msg = "File is missing one or more required datasets." raise Exception(msg) from e @@ -258,7 +266,7 @@ def _collect_datasets(dsets: dict, group: h5py.Group) -> None: _collect_datasets(dsets, v) -def _read_v3_10x_h5(f: h5py.File) -> AnnData: +def _read_v3_10x_h5(f: h5py.File, *, layer: str | None) -> AnnData: dsets = {} _collect_datasets(dsets, f["matrix"]) @@ -307,10 +315,13 @@ def _read_v3_10x_h5(f: h5py.File) -> AnnData: else: msg = "10x h5 has no features group" raise ValueError(msg) - return AnnData(matrix, obs=obs_dict, var=var_dict) + x, layers = (matrix, None) if layer is None else (None, {layer: matrix}) + return AnnData(x, layers=layers, obs=obs_dict, var=var_dict) -def _read_legacy_10x_h5(f: h5py.File, genome: str | None) -> AnnData: +def _read_legacy_10x_h5( + f: h5py.File, *, layer: str | None, genome: str | None +) -> AnnData: children = list(f.keys()) if not genome: if len(children) > 1: @@ -347,8 +358,10 @@ def _read_legacy_10x_h5(f: h5py.File, genome: str | None) -> AnnData: ) # the csc matrix is automatically the transposed csr matrix # as scanpy expects it, so, no need for a further transpostion + x, layers = (matrix, None) if layer is None else (None, {layer: matrix}) adata = AnnData( - matrix, + x, + layers=layers, obs=dict(obs_names=dsets["barcodes"].astype(str)), var=dict( var_names=dsets["gene_names"].astype(str), @@ -525,6 +538,7 @@ def read_visium( def read_10x_mtx( path: PathLike[str] | str, *, + layer: str | None | Default = Default(preset=("read_10x", "layer")), var_names: Literal["gene_symbols", "gene_ids"] = "gene_symbols", make_unique: bool = True, cache: bool = False, @@ -575,6 +589,8 @@ def read_10x_mtx( """ path = Path(path) + if isinstance(layer, Default): + layer = settings.preset.read_10x.layer prefix = "" if prefix is None else prefix is_legacy = (path / f"{prefix}genes.tsv").is_file() with warnings.catch_warnings(): @@ -582,6 +598,7 @@ def read_10x_mtx( warnings.filterwarnings("ignore", r".*names are not unique", UserWarning) adata = _read_10x_mtx( path, + layer=layer, var_names=var_names, make_unique=make_unique, cache=cache, @@ -600,6 +617,7 @@ def read_10x_mtx( def _read_mtx( filename: Path, *, + layer: str | None, dtype: DTypeLike, sparse_format: Literal["csr", "csc", "coo"], ) -> AnnData: @@ -614,12 +632,14 @@ def _read_mtx( x = csr_matrix(x) elif sparse_format == "csc": x = csc_matrix(x) - return AnnData(x) + x, layers = (x, None) if layer is None else (None, {layer: x}) + return AnnData(x, layers=layers) def _read_10x_mtx( path: Path, *, + layer: str | None, var_names: Literal["gene_symbols", "gene_ids"], make_unique: bool, cache: bool, @@ -634,6 +654,7 @@ def _read_10x_mtx( suffix = "" if is_legacy else (".gz" if compressed else "") adata = read( path / f"{prefix}matrix.mtx{suffix}", + layer=layer, cache=cache, cache_compression=cache_compression, # transposing will convert e.g. CSR to CSC and vice versa