calico · davek44 · Apr 4, 2026 · Apr 4, 2026 · Apr 4, 2026 · Apr 4, 2026
diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
@@ -15,26 +15,21 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: [3.6, 3.7, 3.8]
+        python-version: ['3.10', '3.11', '3.12']
 
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v4
     - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v2
+      uses: actions/setup-python@v5
       with:
         python-version: ${{ matrix.python-version }}
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        pip install flake8 pytest
-        pip install .
-        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
-    - name: Lint with flake8
+        pip install ".[dev]"
+    - name: Lint with ruff
       run: |
-        # stop the build if there are Python syntax errors or undefined names
-        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
-        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
-        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+        ruff check scnym/ tests/
     - name: Test with pytest
       run: |
         pytest
diff --git a/.gitignore b/.gitignore
@@ -1,5 +1,34 @@
 dist/
 **/__pycache__/
+*.pyc
 .ipynb_checkpoints/
 build/
 *.egg-info/
+.claude/
+CLAUDE.md
+data/
+tmp/
+
+# Testing / linting caches
+.pytest_cache/
+.ruff_cache/
+.mypy_cache/
+htmlcov/
+.coverage
+
+# Environment
+.env
+*.env
+
+# Editors
+.vscode/
+.idea/
+
+# OS
+.DS_Store
+Thumbs.db
+
+# Model outputs
+*.pt
+*.pth
+hallmark.gmt
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,16 @@
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.6.0
+    hooks:
+      - id: trailing-whitespace
+      - id: end-of-file-fixer
+      - id: check-yaml
+      - id: check-merge-conflict
+      - id: check-added-large-files
+        args: ['--maxkb=500']
+
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.4.4
+    hooks:
+      - id: ruff
+        args: [--fix, --exit-non-zero-on-fix]
diff --git a/README.md b/README.md
@@ -91,17 +91,17 @@ First, clone the repository:
 
 We recommend creating a virtual environment for use with `scNym`. 
 This is easily accomplished using `virtualenv` or `conda`.
-We recommend using `python=3.8` for `scNym`, as some of our dependencies don't currently support the newest Python versions.
+We recommend using `python=3.10` or newer for `scNym`.
 
 ```bash
-$ python3 -m venv scnym_env # python3 is python3.8
+$ python3 -m venv scnym_env
 $ source scnym_env/bin/activate
 ```
 
 or 
 
 ```bash
-$ conda create -n scnym_env -c conda-forge python=3.8
+$ conda create -n scnym_env -c conda-forge python=3.10
 $ conda activate scnym_env
 ```
 

diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-0.3.3
+0.4.0
diff --git a/demo_script.sh b/demo_script.sh
@@ -26,7 +26,7 @@ mv 15467792 lung.h5ad
 # export metadata as a separate CSV for scNym
 echo "EXPORTING METADATA AND GENE NAMES"
 echo "NORMALIZING COUNTS TO LOG(CPM + 1)"
-python -c "import anndata; import numpy as np; import scanpy.api as sc; a=anndata.read_h5ad('lung.h5ad'); a.obs.to_csv('metadata.csv'); np.savetxt('gene_names.csv', a.var_names, fmt='%s'); sc.pp.normalize_per_cell(a, counts_per_cell_after=1e6); sc.pp.log1p(a); a.write_h5ad('lung.h5ad')"
+python -c "import anndata; import numpy as np; import scanpy as sc; a=anndata.read_h5ad('lung.h5ad'); a.obs.to_csv('metadata.csv'); np.savetxt('gene_names.csv', a.var_names, fmt='%s'); sc.pp.normalize_total(a, target_sum=1e6); sc.pp.log1p(a); a.write_h5ad('lung.h5ad')"
 
 # return to the original directory
 cd -

diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,25 @@
+[tool.ruff]
+line-length = 127
+target-version = "py310"
+
+[tool.ruff.lint]
+select = [
+    "E9",   # Runtime errors
+    "F63",  # Invalid print format
+    "F7",   # Syntax errors
+    "F82",  # Undefined names
+    "F",    # Pyflakes
+    "E",    # pycodestyle errors
+    "W",    # pycodestyle warnings
+]
+ignore = [
+    "E501",  # line too long (handled by formatter if desired)
+    "E741",  # ambiguous variable name (common in scientific code)
+    "F401",  # unused imports (star imports in losses)
+    "F403",  # star imports
+    "F405",  # may be undefined from star import
+    "E722",  # bare except
+]
+
+[tool.pytest.ini_options]
+testpaths = ["tests"]
diff --git a/requirements.txt b/requirements.txt
@@ -1,32 +1,16 @@
-anndata==0.8.*
+anndata==0.11.*
 ConfigArgParse==1.1
-h5py==3.10.*
-leidenalg==0.8.10
-louvain==0.7.2
-numba==0.60.*
-numpy==1.26.*
-numpy-groupies==0.10.*
+leidenalg==0.11.*
+numpy==2.2.*
 pandas==2.2.*
-pytest==5.4.*
-python-dateutil==2.8.*
-PyYAML==5.3.*
-requests==2.26.*
-requests-cache==0.5.*
-requests-oauthlib==1.3.*
-requests-toolbelt==0.9.*
-matplotlib==3.6.*
-scanpy==1.9.*
-scikit-learn==1.3.*
-scikit-misc==0.2.*
-scipy==1.14
-six==1.17.*
-tensorboard==2.6.*
-tensorboard-plugin-wit==1.6.*
-tensorboardX==2.1
-torch==2.2.*
-torchvision==0.17.*
-tqdm==4.44.*
-umap-learn==0.3.*
-urllib3==1.26.*
-protobuf==3.20.*
-
+PyYAML==6.0.*
+requests==2.32.*
+scanpy==1.11.*
+scikit-learn==1.7.*
+scikit-misc==0.5.*
+scipy==1.15.*
+tensorboardX==2.6.*
+torch==2.6.*
+torchvision==0.21.*
+tqdm==4.67.*
+umap-learn==0.5.*
diff --git a/scnym/__init__.py b/scnym/__init__.py
@@ -1,6 +1,6 @@
 __author__ = "Jacob C. Kimmel, David R. Kelley"
 __email__ = "jacobkimmel+scnym@gmail.com, drk@calicolabs.com"
-__version__ = "0.3.4"
+__version__ = "0.4.0"
 
 # populate the namespace so top level imports work
 # e.g.

diff --git a/scnym/api.py b/scnym/api.py
@@ -12,6 +12,7 @@
 them onto a user supplied target dataset.
 """
 from typing import Optional, Union, List, Tuple
+import anndata
 from anndata import AnnData
 import scanpy as sc
 import numpy as np
@@ -437,7 +438,7 @@ def scnym_train(
         # set all samples for training
         train_adata = adata
         # set no samples as `target_bidx`
-        target_bidx = np.zeros(adata.shape[0], dtype=np.bool)
+        target_bidx = np.zeros(adata.shape[0], dtype=bool)
     else:
         print(f"{n_unlabeled} unlabeled observations found.")
         print(
@@ -583,7 +584,8 @@ def scnym_train(
         "traintest_idx": traintest_idx,
         "val_idx": val_idx,
     }
-    assert osp.exists(results["model_path"])
+    if not osp.exists(results["model_path"]):
+        raise FileNotFoundError(f"Model path not found: {results['model_path']}")
 
     adata.uns["scNym_train_results"] = results
 
@@ -909,8 +911,8 @@ def atlas2target(
         logger.info(msg)
 
     # join the target and atlas data
-    joint_adata = atlas.concatenate(
-        adata,
+    joint_adata = anndata.concat(
+        [atlas, adata],
         join="inner",
     )
 

diff --git a/scnym/dataprep.py b/scnym/dataprep.py
@@ -30,9 +30,9 @@ class SingleCellDS(Dataset):
 
     def __init__(
         self,
-        X: Union[sparse.csr.csr_matrix, np.ndarray],
-        y: Union[sparse.csr.csr_matrix, np.ndarray],
-        domain: Union[sparse.csr.csr_matrix, np.ndarray] = None,
+        X: Union[sparse.csr_matrix, np.ndarray],
+        y: Union[sparse.csr_matrix, np.ndarray],
+        domain: Union[sparse.csr_matrix, np.ndarray] = None,
         transform: Callable = None,
         num_classes: int = -1,
         num_domains: int = -1,
@@ -139,7 +139,7 @@ def __getitem__(
         # retrieve relevant sample vector and associated label
         # store in a hash table for later manipulation and retrieval
 
-        # input_ is either an `np.ndarray` or `sparse.csr.csr_matrix`
+        # input_ is either an `np.ndarray` or `sparse.csr_matrix`
         input_ = self.X[idx, ...]
         # label is already a `torch.Tensor`
         label = self.y[idx]
@@ -365,7 +365,7 @@ def __call__(
         if self.depth_ratio is None:
             # tile the specified depth for all cells
             depth = np.tile(np.array(self.depth).reshape(1, -1), (x.size(0), 1)).astype(
-                np.int
+                int
             )
         else:
             # compute a range of depths based on the library size
@@ -376,20 +376,18 @@ def __call__(
                     np.ceil(self.depth_ratio[1] * size).reshape(-1, 1),
                 ],
                 axis=1,
-            ).astype(np.int)
+            ).astype(int)
 
         # sample from a multinomial
         # np.random.multinomial is ~100X faster than the native
         # torch.distributions.Multinomial, implemented in Notes
         m = np.zeros(x.size())
         for i in range(x.size(0)):
 
-            d = int(
-                np.random.choice(
-                    np.arange(depth[i, 0], depth[i, 1]),
-                    size=1,
-                )
-            )
+            d = np.random.choice(
+                np.arange(depth[i, 0], depth[i, 1]),
+                size=1,
+            ).item()
 
             m[i, :] = np.random.multinomial(
                 d,
@@ -460,7 +458,7 @@ def __call__(
                 np.arange(n_genes),
                 size=int(np.floor(n_genes * p_drop)),
                 replace=False,
-            ).astype(np.int)
+            ).astype(int)
             x[i, idx] = 0
 
         sample["input"] = x

diff --git a/scnym/interpret.py b/scnym/interpret.py
@@ -241,7 +241,7 @@ class in `.class_names` for which to compute gradients.
             msg = f"{target_class} is not in `.class_names`"
             raise ValueError(msg)
 
-        target_idx = np.where(target_class == self.class_names)[0].astype(np.int)
+        target_idx = np.where(target_class == self.class_names)[0].astype(int)
         target_idx = int(target_idx)
 
         self.model.zero_grad()
@@ -297,7 +297,7 @@ def rank_genes_by_saliency(
         s = self.get_saliency(**kwargs)
         sort_idx = torch.argsort(s)
         idx = sort_idx[0].numpy()[::-1]
-        return self.gene_names[idx.astype(np.int)]
+        return self.gene_names[idx.astype(int)]
 
 
 class IntegratedGradient(object):
@@ -709,7 +709,7 @@ class in `self.class_names` and `adata.obs[groupby]`
             raise ValueError(msg)
 
         # get the indices for cells of the target class
-        cell_idx = np.where(adata.obs[groupby] == target_class)[0].astype(np.int)
+        cell_idx = np.where(adata.obs[groupby] == target_class)[0].astype(int)
         if n_cells is not None:
             if n_cells < len(cell_idx):
                 # subset if a specific number of cells was specified
@@ -1033,7 +1033,7 @@ class name for source class to use as reference cells for expected
 
         target_bidx = adata.obs[self.cell_type_col] == target
         if source in self.background_vals:
-            source_bidx = np.ones(adata.shape[0], dtype=np.bool)
+            source_bidx = np.ones(adata.shape[0], dtype=bool)
             # ensure target cells aren't in the source data
             source_bidx[target_bidx] = False
         else:

diff --git a/scnym/losses.py b/scnym/losses.py
@@ -371,7 +371,11 @@ def _update_teacher(
             # normalization statistics
             for m in self.teacher.modules():
                 if isinstance(m, nn.BatchNorm1d):
-                    assert m.track_running_stats == self.teacher_bn_running_stats
+                    if m.track_running_stats != self.teacher_bn_running_stats:
+                        raise RuntimeError(
+                            f"Teacher BatchNorm track_running_stats={m.track_running_stats} "
+                            f"does not match expected={self.teacher_bn_running_stats}"
+                        )
 
         return
 
@@ -398,7 +402,7 @@ def _update_teacher_params(
         # new parameters
         zipped_params = zip(self.teacher.parameters(), model.parameters())
         for teacher_param, model_param in zipped_params:
-            (teacher_param.data.mul_(alpha).add_(1 - alpha, model_param.data))
+            (teacher_param.data.mul_(alpha).add_(model_param.data, alpha=1 - alpha))
         return
 
     def __call__(
@@ -483,7 +487,8 @@ def __call__(
         mixed_output = F.softmax(
             model(mixed_sample["input"]),
         )
-        assert mixed_output.requires_grad
+        if not mixed_output.requires_grad:
+            raise RuntimeError("mixed_output does not require grad")
 
         # set outputs as attributes for later access
         self.mixed_output = mixed_output
@@ -1771,7 +1776,8 @@ def __init__(
             # if the prior_matrix was provided, always prefer it.
             self.prior_matrix = prior_matrix
 
-        assert self.prior_matrix is not None
+        if self.prior_matrix is None:
+            raise ValueError("prior_matrix must be set, either via argument or gene sets")
         return
 
     def _set_prior_matrix_from_gene_sets(