Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 6 additions & 11 deletions .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,26 +15,21 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.6, 3.7, 3.8]
python-version: ['3.10', '3.11', '3.12']

steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install flake8 pytest
pip install .
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
- name: Lint with flake8
pip install ".[dev]"
- name: Lint with ruff
run: |
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
ruff check scnym/ tests/
- name: Test with pytest
run: |
pytest
29 changes: 29 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,34 @@
dist/
**/__pycache__/
*.pyc
.ipynb_checkpoints/
build/
*.egg-info/
.claude/
CLAUDE.md
data/
tmp/

# Testing / linting caches
.pytest_cache/
.ruff_cache/
.mypy_cache/
htmlcov/
.coverage

# Environment
.env
*.env

# Editors
.vscode/
.idea/

# OS
.DS_Store
Thumbs.db

# Model outputs
*.pt
*.pth
hallmark.gmt
16 changes: 16 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.6.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-yaml
- id: check-merge-conflict
- id: check-added-large-files
args: ['--maxkb=500']

- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.4.4
hooks:
- id: ruff
args: [--fix, --exit-non-zero-on-fix]
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -91,17 +91,17 @@ First, clone the repository:

We recommend creating a virtual environment for use with `scNym`.
This is easily accomplished using `virtualenv` or `conda`.
We recommend using `python=3.8` for `scNym`, as some of our dependencies don't currently support the newest Python versions.
We recommend using `python=3.10` or newer for `scNym`.

```bash
$ python3 -m venv scnym_env # python3 is python3.8
$ python3 -m venv scnym_env
$ source scnym_env/bin/activate
```

or

```bash
$ conda create -n scnym_env -c conda-forge python=3.8
$ conda create -n scnym_env -c conda-forge python=3.10
$ conda activate scnym_env
```

Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.3.3
0.4.0
2 changes: 1 addition & 1 deletion demo_script.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ mv 15467792 lung.h5ad
# export metadata as a separate CSV for scNym
echo "EXPORTING METADATA AND GENE NAMES"
echo "NORMALIZING COUNTS TO LOG(CPM + 1)"
python -c "import anndata; import numpy as np; import scanpy.api as sc; a=anndata.read_h5ad('lung.h5ad'); a.obs.to_csv('metadata.csv'); np.savetxt('gene_names.csv', a.var_names, fmt='%s'); sc.pp.normalize_per_cell(a, counts_per_cell_after=1e6); sc.pp.log1p(a); a.write_h5ad('lung.h5ad')"
python -c "import anndata; import numpy as np; import scanpy as sc; a=anndata.read_h5ad('lung.h5ad'); a.obs.to_csv('metadata.csv'); np.savetxt('gene_names.csv', a.var_names, fmt='%s'); sc.pp.normalize_total(a, target_sum=1e6); sc.pp.log1p(a); a.write_h5ad('lung.h5ad')"

# return to the original directory
cd -
Expand Down
25 changes: 25 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
[tool.ruff]
line-length = 127
target-version = "py310"

[tool.ruff.lint]
select = [
"E9", # Runtime errors
"F63", # Invalid print format
"F7", # Syntax errors
"F82", # Undefined names
"F", # Pyflakes
"E", # pycodestyle errors
"W", # pycodestyle warnings
]
ignore = [
"E501", # line too long (handled by formatter if desired)
"E741", # ambiguous variable name (common in scientific code)
"F401", # unused imports (star imports in losses)
"F403", # star imports
"F405", # may be undefined from star import
"E722", # bare except
]

[tool.pytest.ini_options]
testpaths = ["tests"]
44 changes: 14 additions & 30 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,32 +1,16 @@
anndata==0.8.*
anndata==0.11.*
ConfigArgParse==1.1
h5py==3.10.*
leidenalg==0.8.10
louvain==0.7.2
numba==0.60.*
numpy==1.26.*
numpy-groupies==0.10.*
leidenalg==0.11.*
numpy==2.2.*
pandas==2.2.*
pytest==5.4.*
python-dateutil==2.8.*
PyYAML==5.3.*
requests==2.26.*
requests-cache==0.5.*
requests-oauthlib==1.3.*
requests-toolbelt==0.9.*
matplotlib==3.6.*
scanpy==1.9.*
scikit-learn==1.3.*
scikit-misc==0.2.*
scipy==1.14
six==1.17.*
tensorboard==2.6.*
tensorboard-plugin-wit==1.6.*
tensorboardX==2.1
torch==2.2.*
torchvision==0.17.*
tqdm==4.44.*
umap-learn==0.3.*
urllib3==1.26.*
protobuf==3.20.*

PyYAML==6.0.*
requests==2.32.*
scanpy==1.11.*
scikit-learn==1.7.*
scikit-misc==0.5.*
scipy==1.15.*
tensorboardX==2.6.*
torch==2.6.*
torchvision==0.21.*
tqdm==4.67.*
umap-learn==0.5.*
2 changes: 1 addition & 1 deletion scnym/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
__author__ = "Jacob C. Kimmel, David R. Kelley"
__email__ = "jacobkimmel+scnym@gmail.com, drk@calicolabs.com"
__version__ = "0.3.4"
__version__ = "0.4.0"
Comment thread
davek44 marked this conversation as resolved.

# populate the namespace so top level imports work
# e.g.
Expand Down
10 changes: 6 additions & 4 deletions scnym/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
them onto a user supplied target dataset.
"""
from typing import Optional, Union, List, Tuple
import anndata
from anndata import AnnData
import scanpy as sc
import numpy as np
Expand Down Expand Up @@ -437,7 +438,7 @@ def scnym_train(
# set all samples for training
train_adata = adata
# set no samples as `target_bidx`
target_bidx = np.zeros(adata.shape[0], dtype=np.bool)
target_bidx = np.zeros(adata.shape[0], dtype=bool)
else:
print(f"{n_unlabeled} unlabeled observations found.")
print(
Expand Down Expand Up @@ -583,7 +584,8 @@ def scnym_train(
"traintest_idx": traintest_idx,
"val_idx": val_idx,
}
assert osp.exists(results["model_path"])
if not osp.exists(results["model_path"]):
raise FileNotFoundError(f"Model path not found: {results['model_path']}")

adata.uns["scNym_train_results"] = results

Expand Down Expand Up @@ -909,8 +911,8 @@ def atlas2target(
logger.info(msg)

# join the target and atlas data
joint_adata = atlas.concatenate(
adata,
joint_adata = anndata.concat(
[atlas, adata],
join="inner",
)

Expand Down
24 changes: 11 additions & 13 deletions scnym/dataprep.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@ class SingleCellDS(Dataset):

def __init__(
self,
X: Union[sparse.csr.csr_matrix, np.ndarray],
y: Union[sparse.csr.csr_matrix, np.ndarray],
domain: Union[sparse.csr.csr_matrix, np.ndarray] = None,
X: Union[sparse.csr_matrix, np.ndarray],
y: Union[sparse.csr_matrix, np.ndarray],
domain: Union[sparse.csr_matrix, np.ndarray] = None,
transform: Callable = None,
num_classes: int = -1,
num_domains: int = -1,
Expand Down Expand Up @@ -139,7 +139,7 @@ def __getitem__(
# retrieve relevant sample vector and associated label
# store in a hash table for later manipulation and retrieval

# input_ is either an `np.ndarray` or `sparse.csr.csr_matrix`
# input_ is either an `np.ndarray` or `sparse.csr_matrix`
input_ = self.X[idx, ...]
# label is already a `torch.Tensor`
label = self.y[idx]
Expand Down Expand Up @@ -365,7 +365,7 @@ def __call__(
if self.depth_ratio is None:
# tile the specified depth for all cells
depth = np.tile(np.array(self.depth).reshape(1, -1), (x.size(0), 1)).astype(
np.int
int
)
else:
# compute a range of depths based on the library size
Expand All @@ -376,20 +376,18 @@ def __call__(
np.ceil(self.depth_ratio[1] * size).reshape(-1, 1),
],
axis=1,
).astype(np.int)
).astype(int)

# sample from a multinomial
# np.random.multinomial is ~100X faster than the native
# torch.distributions.Multinomial, implemented in Notes
m = np.zeros(x.size())
for i in range(x.size(0)):

d = int(
np.random.choice(
np.arange(depth[i, 0], depth[i, 1]),
size=1,
)
)
d = np.random.choice(
np.arange(depth[i, 0], depth[i, 1]),
size=1,
).item()

m[i, :] = np.random.multinomial(
d,
Expand Down Expand Up @@ -460,7 +458,7 @@ def __call__(
np.arange(n_genes),
size=int(np.floor(n_genes * p_drop)),
replace=False,
).astype(np.int)
).astype(int)
x[i, idx] = 0

sample["input"] = x
Expand Down
8 changes: 4 additions & 4 deletions scnym/interpret.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ class in `.class_names` for which to compute gradients.
msg = f"{target_class} is not in `.class_names`"
raise ValueError(msg)

target_idx = np.where(target_class == self.class_names)[0].astype(np.int)
target_idx = np.where(target_class == self.class_names)[0].astype(int)
target_idx = int(target_idx)

self.model.zero_grad()
Expand Down Expand Up @@ -297,7 +297,7 @@ def rank_genes_by_saliency(
s = self.get_saliency(**kwargs)
sort_idx = torch.argsort(s)
idx = sort_idx[0].numpy()[::-1]
return self.gene_names[idx.astype(np.int)]
return self.gene_names[idx.astype(int)]


class IntegratedGradient(object):
Expand Down Expand Up @@ -709,7 +709,7 @@ class in `self.class_names` and `adata.obs[groupby]`
raise ValueError(msg)

# get the indices for cells of the target class
cell_idx = np.where(adata.obs[groupby] == target_class)[0].astype(np.int)
cell_idx = np.where(adata.obs[groupby] == target_class)[0].astype(int)
if n_cells is not None:
if n_cells < len(cell_idx):
# subset if a specific number of cells was specified
Expand Down Expand Up @@ -1033,7 +1033,7 @@ class name for source class to use as reference cells for expected

target_bidx = adata.obs[self.cell_type_col] == target
if source in self.background_vals:
source_bidx = np.ones(adata.shape[0], dtype=np.bool)
source_bidx = np.ones(adata.shape[0], dtype=bool)
# ensure target cells aren't in the source data
source_bidx[target_bidx] = False
else:
Expand Down
14 changes: 10 additions & 4 deletions scnym/losses.py
Original file line number Diff line number Diff line change
Expand Up @@ -371,7 +371,11 @@ def _update_teacher(
# normalization statistics
for m in self.teacher.modules():
if isinstance(m, nn.BatchNorm1d):
assert m.track_running_stats == self.teacher_bn_running_stats
if m.track_running_stats != self.teacher_bn_running_stats:
raise RuntimeError(
f"Teacher BatchNorm track_running_stats={m.track_running_stats} "
f"does not match expected={self.teacher_bn_running_stats}"
)

return

Expand All @@ -398,7 +402,7 @@ def _update_teacher_params(
# new parameters
zipped_params = zip(self.teacher.parameters(), model.parameters())
for teacher_param, model_param in zipped_params:
(teacher_param.data.mul_(alpha).add_(1 - alpha, model_param.data))
(teacher_param.data.mul_(alpha).add_(model_param.data, alpha=1 - alpha))
return

def __call__(
Expand Down Expand Up @@ -483,7 +487,8 @@ def __call__(
mixed_output = F.softmax(
model(mixed_sample["input"]),
)
assert mixed_output.requires_grad
if not mixed_output.requires_grad:
raise RuntimeError("mixed_output does not require grad")

# set outputs as attributes for later access
self.mixed_output = mixed_output
Expand Down Expand Up @@ -1771,7 +1776,8 @@ def __init__(
# if the prior_matrix was provided, always prefer it.
self.prior_matrix = prior_matrix

assert self.prior_matrix is not None
if self.prior_matrix is None:
raise ValueError("prior_matrix must be set, either via argument or gene sets")
return

def _set_prior_matrix_from_gene_sets(
Expand Down
Loading
Loading