Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,6 @@
"python.testing.pytestArgs": ["-vv", "--color=yes", "--internet-tests"],
"python.testing.pytestEnabled": true,
"python.terminal.activateEnvironment": true,
"python-envs.defaultEnvManager": "flying-sheep.hatch:hatch",
"python-envs.defaultPackageManager": "flying-sheep.hatch:hatch",
"python-envs.defaultEnvManager": "ms-python.python:venv",
"python-envs.defaultPackageManager": "ms-python.python:pip",
}
1 change: 1 addition & 0 deletions docs/api/deprecated.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
:nosignatures:
:toctree: ../generated/

read_visium
pp.subsample
tl.louvain
logging.print_versions
Expand Down
1 change: 0 additions & 1 deletion docs/api/io.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ Read 10x formatted hdf5 files and directories containing `.mtx` files using

read_10x_h5
read_10x_mtx
read_visium
```

Read other formats using functions borrowed from {mod}`anndata`
Expand Down
12 changes: 12 additions & 0 deletions src/scanpy/_settings/presets.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,10 @@ def _get_value(self, preset: Preset) -> object:
return getattr(params, param)


class Read10xPreset(NamedTuple):
layer: str | None


class HVGPreset(NamedTuple):
flavor: HVGFlavor
return_df: bool
Expand Down Expand Up @@ -165,6 +169,14 @@ def _generate_next_value_(
ScanpyV2Preview = enum.auto()
""": Scanpy 2.*’s feature default settings. (Preview: subject to change!)"""

@preset_property
def read_10x() -> Mapping[Preset, Read10xPreset]:
"""Target for :func:`~scanpy.read_10x_h5` and :func:`~scanpy.read_10x_mtx`."""
return {
Preset.ScanpyV1: Read10xPreset(layer=None),
Preset.ScanpyV2Preview: Read10xPreset(layer="counts"),
}

@preset_property
def highly_variable_genes() -> Mapping[Preset, HVGPreset]:
"""Flavor for :func:`~scanpy.pp.highly_variable_genes`."""
Expand Down
43 changes: 32 additions & 11 deletions src/scanpy/readwrite.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

import json
import warnings
from functools import partial
from pathlib import Path, PurePath
from typing import TYPE_CHECKING, cast, get_args, overload

Expand Down Expand Up @@ -44,10 +43,11 @@
if TYPE_CHECKING:
from collections.abc import Callable
from os import PathLike
from typing import IO, Literal
from typing import IO, Concatenate, Literal

from numpy.typing import DTypeLike


# .gz and .bz2 suffixes are also allowed for text formats
text_exts = {
"csv",
Expand Down Expand Up @@ -166,6 +166,7 @@ def read(

def read_10x_h5(
filename: PathLike[str] | str,
layer: str | None | Default = Default(preset=("read_10x", "layer")),
*,
genome: str | None = None,
gex_only: bool = True,
Expand Down Expand Up @@ -208,6 +209,8 @@ def read_10x_h5(

"""
path = Path(filename)
if isinstance(layer, Default):
layer = settings.preset.read_10x.layer
start = logg.info(f"reading {path}")
is_present = _check_datafile_present_and_download(path, backup_url=backup_url)
if not is_present:
Expand All @@ -221,7 +224,7 @@ def read_10x_h5(
warnings.filterwarnings(
"ignore", r".*names are not unique", UserWarning
)
adata = _read_10x_h5(path, _read_v3_10x_h5)
adata = _read_10x_h5(path, _read_v3_10x_h5, layer=layer)
if genome:
if genome not in adata.var["genome"].values:
msg = (
Expand All @@ -235,16 +238,21 @@ def read_10x_h5(
if adata.is_view:
adata = adata.copy()
else:
adata = _read_10x_h5(path, partial(_read_legacy_10x_h5, genome=genome))
adata = _read_10x_h5(path, _read_legacy_10x_h5, layer=layer, genome=genome)
logg.info("", time=start)
return adata


def _read_10x_h5(path: Path, cb: Callable[[h5py.File], AnnData]) -> AnnData:
def _read_10x_h5[**P](
path: Path,
cb: Callable[Concatenate[h5py.File, P], AnnData],
*args: P.args,
**kwargs: P.kwargs,
) -> AnnData:
"""Read hdf5 file from Cell Ranger v3 or later versions."""
with h5py.File(str(path), "r") as f:
try:
return cb(f)
return cb(f, *args, **kwargs)
except KeyError as e:
msg = "File is missing one or more required datasets."
raise Exception(msg) from e
Expand All @@ -258,7 +266,7 @@ def _collect_datasets(dsets: dict, group: h5py.Group) -> None:
_collect_datasets(dsets, v)


def _read_v3_10x_h5(f: h5py.File) -> AnnData:
def _read_v3_10x_h5(f: h5py.File, *, layer: str | None) -> AnnData:
dsets = {}
_collect_datasets(dsets, f["matrix"])

Expand Down Expand Up @@ -307,10 +315,13 @@ def _read_v3_10x_h5(f: h5py.File) -> AnnData:
else:
msg = "10x h5 has no features group"
raise ValueError(msg)
return AnnData(matrix, obs=obs_dict, var=var_dict)
x, layers = (matrix, None) if layer is None else (None, {layer: matrix})
return AnnData(x, layers=layers, obs=obs_dict, var=var_dict)


def _read_legacy_10x_h5(f: h5py.File, genome: str | None) -> AnnData:
def _read_legacy_10x_h5(
f: h5py.File, *, layer: str | None, genome: str | None
) -> AnnData:
children = list(f.keys())
if not genome:
if len(children) > 1:
Expand Down Expand Up @@ -347,8 +358,10 @@ def _read_legacy_10x_h5(f: h5py.File, genome: str | None) -> AnnData:
)
# the csc matrix is automatically the transposed csr matrix
# as scanpy expects it, so, no need for a further transpostion
x, layers = (matrix, None) if layer is None else (None, {layer: matrix})
adata = AnnData(
matrix,
x,
layers=layers,
obs=dict(obs_names=dsets["barcodes"].astype(str)),
var=dict(
var_names=dsets["gene_names"].astype(str),
Expand Down Expand Up @@ -525,6 +538,7 @@ def read_visium(
def read_10x_mtx(
path: PathLike[str] | str,
*,
layer: str | None | Default = Default(preset=("read_10x", "layer")),
var_names: Literal["gene_symbols", "gene_ids"] = "gene_symbols",
make_unique: bool = True,
cache: bool = False,
Expand Down Expand Up @@ -575,13 +589,16 @@ def read_10x_mtx(

"""
path = Path(path)
if isinstance(layer, Default):
layer = settings.preset.read_10x.layer
prefix = "" if prefix is None else prefix
is_legacy = (path / f"{prefix}genes.tsv").is_file()
with warnings.catch_warnings():
# this will be thrown below in “adata[:, ...].copy()”
warnings.filterwarnings("ignore", r".*names are not unique", UserWarning)
adata = _read_10x_mtx(
path,
layer=layer,
var_names=var_names,
make_unique=make_unique,
cache=cache,
Expand All @@ -600,6 +617,7 @@ def read_10x_mtx(
def _read_mtx(
filename: Path,
*,
layer: str | None,
dtype: DTypeLike,
sparse_format: Literal["csr", "csc", "coo"],
) -> AnnData:
Expand All @@ -614,12 +632,14 @@ def _read_mtx(
x = csr_matrix(x)
elif sparse_format == "csc":
x = csc_matrix(x)
return AnnData(x)
x, layers = (x, None) if layer is None else (None, {layer: x})
return AnnData(x, layers=layers)


def _read_10x_mtx(
path: Path,
*,
layer: str | None,
var_names: Literal["gene_symbols", "gene_ids"],
make_unique: bool,
cache: bool,
Expand All @@ -634,6 +654,7 @@ def _read_10x_mtx(
suffix = "" if is_legacy else (".gz" if compressed else "")
adata = read(
path / f"{prefix}matrix.mtx{suffix}",
layer=layer,
cache=cache,
cache_compression=cache_compression,
# transposing will convert e.g. CSR to CSC and vice versa
Expand Down
Loading