diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index bc26fee..43e21b6 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -15,26 +15,21 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: [3.6, 3.7, 3.8]
+        python-version: ['3.10', '3.11', '3.12']
 
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v4
     - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v2
+      uses: actions/setup-python@v5
       with:
         python-version: ${{ matrix.python-version }}
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        pip install flake8 pytest
-        pip install .
-        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
-    - name: Lint with flake8
+        pip install ".[dev]"
+    - name: Lint with ruff
       run: |
-        # stop the build if there are Python syntax errors or undefined names
-        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
-        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
-        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+        ruff check scnym/ tests/
     - name: Test with pytest
       run: |
         pytest
diff --git a/.gitignore b/.gitignore
index b0f5e9b..296a2ac 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,34 @@
 dist/
 **/__pycache__/
+*.pyc
 .ipynb_checkpoints/
 build/
 *.egg-info/
+.claude/
+CLAUDE.md
+data/
+tmp/
+
+# Testing / linting caches
+.pytest_cache/
+.ruff_cache/
+.mypy_cache/
+htmlcov/
+.coverage
+
+# Environment
+.env
+*.env
+
+# Editors
+.vscode/
+.idea/
+
+# OS
+.DS_Store
+Thumbs.db
+
+# Model outputs
+*.pt
+*.pth
+hallmark.gmt
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..5aa0fe5
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,16 @@
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.6.0
+    hooks:
+      - id: trailing-whitespace
+      - id: end-of-file-fixer
+      - id: check-yaml
+      - id: check-merge-conflict
+      - id: check-added-large-files
+        args: ['--maxkb=500']
+
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.4.4
+    hooks:
+      - id: ruff
+        args: [--fix, --exit-non-zero-on-fix]
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..510b4af
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,2 @@
+include VERSION
+include requirements.txt
diff --git a/README.md b/README.md
index 4fec6a1..7e7400a 100644
--- a/README.md
+++ b/README.md
@@ -91,17 +91,17 @@ First, clone the repository:
 
 We recommend creating a virtual environment for use with `scNym`. 
 This is easily accomplished using `virtualenv` or `conda`.
-We recommend using `python=3.8` for `scNym`, as some of our dependencies don't currently support the newest Python versions.
+We recommend using `python=3.10` or newer for `scNym`.
 
 ```bash
-$ python3 -m venv scnym_env # python3 is python3.8
+$ python3 -m venv scnym_env
 $ source scnym_env/bin/activate
 ```
 
 or 
 
 ```bash
-$ conda create -n scnym_env -c conda-forge python=3.8
+$ conda create -n scnym_env -c conda-forge python=3.10
 $ conda activate scnym_env
 ```
 
diff --git a/VERSION b/VERSION
index 1c09c74..1d0ba9e 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-0.3.3
+0.4.0
diff --git a/demo_script.sh b/demo_script.sh
index dc7502b..cc8e954 100755
--- a/demo_script.sh
+++ b/demo_script.sh
@@ -26,7 +26,7 @@ mv 15467792 lung.h5ad
 # export metadata as a separate CSV for scNym
 echo "EXPORTING METADATA AND GENE NAMES"
 echo "NORMALIZING COUNTS TO LOG(CPM + 1)"
-python -c "import anndata; import numpy as np; import scanpy.api as sc; a=anndata.read_h5ad('lung.h5ad'); a.obs.to_csv('metadata.csv'); np.savetxt('gene_names.csv', a.var_names, fmt='%s'); sc.pp.normalize_per_cell(a, counts_per_cell_after=1e6); sc.pp.log1p(a); a.write_h5ad('lung.h5ad')"
+python -c "import anndata; import numpy as np; import scanpy as sc; a=anndata.read_h5ad('lung.h5ad'); a.obs.to_csv('metadata.csv'); np.savetxt('gene_names.csv', a.var_names, fmt='%s'); sc.pp.normalize_total(a, target_sum=1e6); sc.pp.log1p(a); a.write_h5ad('lung.h5ad')"
 
 # return to the original directory
 cd -
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..2ca4a0d
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,25 @@
+[tool.ruff]
+line-length = 127
+target-version = "py310"
+
+[tool.ruff.lint]
+select = [
+    "E9",   # Runtime errors
+    "F63",  # Invalid print format
+    "F7",   # Syntax errors
+    "F82",  # Undefined names
+    "F",    # Pyflakes
+    "E",    # pycodestyle errors
+    "W",    # pycodestyle warnings
+]
+ignore = [
+    "E501",  # line too long (handled by formatter if desired)
+    "E741",  # ambiguous variable name (common in scientific code)
+    "F401",  # unused imports (star imports in losses)
+    "F403",  # star imports
+    "F405",  # may be undefined from star import
+    "E722",  # bare except
+]
+
+[tool.pytest.ini_options]
+testpaths = ["tests"]
diff --git a/requirements.txt b/requirements.txt
index e895253..0608143 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,32 +1,16 @@
-anndata==0.8.*
+anndata==0.11.*
 ConfigArgParse==1.1
-h5py==3.10.*
-leidenalg==0.8.10
-louvain==0.7.2
-numba==0.60.*
-numpy==1.26.*
-numpy-groupies==0.10.*
+leidenalg==0.11.*
+numpy==2.2.*
 pandas==2.2.*
-pytest==5.4.*
-python-dateutil==2.8.*
-PyYAML==5.3.*
-requests==2.26.*
-requests-cache==0.5.*
-requests-oauthlib==1.3.*
-requests-toolbelt==0.9.*
-matplotlib==3.6.*
-scanpy==1.9.*
-scikit-learn==1.3.*
-scikit-misc==0.2.*
-scipy==1.14
-six==1.17.*
-tensorboard==2.6.*
-tensorboard-plugin-wit==1.6.*
-tensorboardX==2.1
-torch==2.2.*
-torchvision==0.17.*
-tqdm==4.44.*
-umap-learn==0.3.*
-urllib3==1.26.*
-protobuf==3.20.*
-
+PyYAML==6.0.*
+requests==2.32.*
+scanpy==1.11.*
+scikit-learn==1.7.*
+scikit-misc==0.5.*
+scipy==1.15.*
+tensorboardX==2.6.*
+torch==2.6.*
+torchvision==0.21.*
+tqdm==4.67.*
+umap-learn==0.5.*
diff --git a/scnym/__init__.py b/scnym/__init__.py
index 14f386b..ee2650f 100644
--- a/scnym/__init__.py
+++ b/scnym/__init__.py
@@ -1,6 +1,6 @@
 __author__ = "Jacob C. Kimmel, David R. Kelley"
 __email__ = "jacobkimmel+scnym@gmail.com, drk@calicolabs.com"
-__version__ = "0.3.4"
+__version__ = "0.4.0"
 
 # populate the namespace so top level imports work
 # e.g.
diff --git a/scnym/api.py b/scnym/api.py
index bdfca1a..db26409 100644
--- a/scnym/api.py
+++ b/scnym/api.py
@@ -12,6 +12,7 @@
 them onto a user supplied target dataset.
 """
 from typing import Optional, Union, List, Tuple
+import anndata
 from anndata import AnnData
 import scanpy as sc
 import numpy as np
@@ -437,7 +438,7 @@ def scnym_train(
         # set all samples for training
         train_adata = adata
         # set no samples as `target_bidx`
-        target_bidx = np.zeros(adata.shape[0], dtype=np.bool)
+        target_bidx = np.zeros(adata.shape[0], dtype=bool)
     else:
         print(f"{n_unlabeled} unlabeled observations found.")
         print(
@@ -583,7 +584,8 @@ def scnym_train(
         "traintest_idx": traintest_idx,
         "val_idx": val_idx,
     }
-    assert osp.exists(results["model_path"])
+    if not osp.exists(results["model_path"]):
+        raise FileNotFoundError(f"Model path not found: {results['model_path']}")
 
     adata.uns["scNym_train_results"] = results
 
@@ -909,8 +911,8 @@ def atlas2target(
         logger.info(msg)
 
     # join the target and atlas data
-    joint_adata = atlas.concatenate(
-        adata,
+    joint_adata = anndata.concat(
+        [atlas, adata],
         join="inner",
     )
 
diff --git a/scnym/dataprep.py b/scnym/dataprep.py
index 1dbf1f0..f46e783 100644
--- a/scnym/dataprep.py
+++ b/scnym/dataprep.py
@@ -30,9 +30,9 @@ class SingleCellDS(Dataset):
 
     def __init__(
         self,
-        X: Union[sparse.csr.csr_matrix, np.ndarray],
-        y: Union[sparse.csr.csr_matrix, np.ndarray],
-        domain: Union[sparse.csr.csr_matrix, np.ndarray] = None,
+        X: Union[sparse.csr_matrix, np.ndarray],
+        y: Union[sparse.csr_matrix, np.ndarray],
+        domain: Union[sparse.csr_matrix, np.ndarray] = None,
         transform: Callable = None,
         num_classes: int = -1,
         num_domains: int = -1,
@@ -139,7 +139,7 @@ def __getitem__(
         # retrieve relevant sample vector and associated label
         # store in a hash table for later manipulation and retrieval
 
-        # input_ is either an `np.ndarray` or `sparse.csr.csr_matrix`
+        # input_ is either an `np.ndarray` or `sparse.csr_matrix`
         input_ = self.X[idx, ...]
         # label is already a `torch.Tensor`
         label = self.y[idx]
@@ -365,7 +365,7 @@ def __call__(
         if self.depth_ratio is None:
             # tile the specified depth for all cells
             depth = np.tile(np.array(self.depth).reshape(1, -1), (x.size(0), 1)).astype(
-                np.int
+                int
             )
         else:
             # compute a range of depths based on the library size
@@ -376,7 +376,7 @@ def __call__(
                     np.ceil(self.depth_ratio[1] * size).reshape(-1, 1),
                 ],
                 axis=1,
-            ).astype(np.int)
+            ).astype(int)
 
         # sample from a multinomial
         # np.random.multinomial is ~100X faster than the native
@@ -384,12 +384,10 @@ def __call__(
         m = np.zeros(x.size())
         for i in range(x.size(0)):
 
-            d = int(
-                np.random.choice(
-                    np.arange(depth[i, 0], depth[i, 1]),
-                    size=1,
-                )
-            )
+            d = np.random.choice(
+                np.arange(depth[i, 0], depth[i, 1]),
+                size=1,
+            ).item()
 
             m[i, :] = np.random.multinomial(
                 d,
@@ -460,7 +458,7 @@ def __call__(
                 np.arange(n_genes),
                 size=int(np.floor(n_genes * p_drop)),
                 replace=False,
-            ).astype(np.int)
+            ).astype(int)
             x[i, idx] = 0
 
         sample["input"] = x
diff --git a/scnym/interpret.py b/scnym/interpret.py
index 70c9c98..15ce37a 100644
--- a/scnym/interpret.py
+++ b/scnym/interpret.py
@@ -241,7 +241,7 @@ class in `.class_names` for which to compute gradients.
             msg = f"{target_class} is not in `.class_names`"
             raise ValueError(msg)
 
-        target_idx = np.where(target_class == self.class_names)[0].astype(np.int)
+        target_idx = np.where(target_class == self.class_names)[0].astype(int)
         target_idx = int(target_idx)
 
         self.model.zero_grad()
@@ -297,7 +297,7 @@ def rank_genes_by_saliency(
         s = self.get_saliency(**kwargs)
         sort_idx = torch.argsort(s)
         idx = sort_idx[0].numpy()[::-1]
-        return self.gene_names[idx.astype(np.int)]
+        return self.gene_names[idx.astype(int)]
 
 
 class IntegratedGradient(object):
@@ -709,7 +709,7 @@ class in `self.class_names` and `adata.obs[groupby]`
             raise ValueError(msg)
 
         # get the indices for cells of the target class
-        cell_idx = np.where(adata.obs[groupby] == target_class)[0].astype(np.int)
+        cell_idx = np.where(adata.obs[groupby] == target_class)[0].astype(int)
         if n_cells is not None:
             if n_cells < len(cell_idx):
                 # subset if a specific number of cells was specified
@@ -1033,7 +1033,7 @@ class name for source class to use as reference cells for expected
 
         target_bidx = adata.obs[self.cell_type_col] == target
         if source in self.background_vals:
-            source_bidx = np.ones(adata.shape[0], dtype=np.bool)
+            source_bidx = np.ones(adata.shape[0], dtype=bool)
             # ensure target cells aren't in the source data
             source_bidx[target_bidx] = False
         else:
diff --git a/scnym/losses.py b/scnym/losses.py
index 8b950a4..310f8c9 100644
--- a/scnym/losses.py
+++ b/scnym/losses.py
@@ -371,7 +371,11 @@ def _update_teacher(
             # normalization statistics
             for m in self.teacher.modules():
                 if isinstance(m, nn.BatchNorm1d):
-                    assert m.track_running_stats == self.teacher_bn_running_stats
+                    if m.track_running_stats != self.teacher_bn_running_stats:
+                        raise RuntimeError(
+                            f"Teacher BatchNorm track_running_stats={m.track_running_stats} "
+                            f"does not match expected={self.teacher_bn_running_stats}"
+                        )
 
         return
 
@@ -398,7 +402,7 @@ def _update_teacher_params(
         # new parameters
         zipped_params = zip(self.teacher.parameters(), model.parameters())
         for teacher_param, model_param in zipped_params:
-            (teacher_param.data.mul_(alpha).add_(1 - alpha, model_param.data))
+            (teacher_param.data.mul_(alpha).add_(model_param.data, alpha=1 - alpha))
         return
 
     def __call__(
@@ -483,7 +487,8 @@ def __call__(
         mixed_output = F.softmax(
             model(mixed_sample["input"]),
         )
-        assert mixed_output.requires_grad
+        if not mixed_output.requires_grad:
+            raise RuntimeError("mixed_output does not require grad")
 
         # set outputs as attributes for later access
         self.mixed_output = mixed_output
@@ -1771,7 +1776,8 @@ def __init__(
             # if the prior_matrix was provided, always prefer it.
             self.prior_matrix = prior_matrix
 
-        assert self.prior_matrix is not None
+        if self.prior_matrix is None:
+            raise ValueError("prior_matrix must be set, either via argument or gene sets")
         return
 
     def _set_prior_matrix_from_gene_sets(
diff --git a/scnym/main.py b/scnym/main.py
index 59d84ee..b6ea24d 100644
--- a/scnym/main.py
+++ b/scnym/main.py
@@ -28,20 +28,6 @@
 from .predict import Predicter
 from . import utils
 
-# allow tensorboard outputs even though TF2 is installed
-# TF2 broke the tensorboard/pytorch API, so we need to alias
-# the old API endpoint below
-try:
-    import tensorflow as tf
-    tfv = int(tf.__version__.split(".")[0])
-except ImportError:
-    print("tensorflow is not installed, assuming tensorboard is independent")
-    tfv = 1
-
-if tfv > 1:
-    import tensorboard as tb
-
-    tf.io.gfile = tb.compat.tensorflow_stub.io.gfile
 
 
 logger = logging.getLogger(__name__)
@@ -83,7 +69,7 @@ def repeater(data_loader):
 
 
 def fit_model(
-    X: Union[np.ndarray, sparse.csr.csr_matrix],
+    X: Union[np.ndarray, sparse.csr_matrix],
     y: np.ndarray,
     traintest_idx: Union[np.ndarray, tuple],
     val_idx: np.ndarray,
@@ -705,7 +691,7 @@ def fit_model(
 
 
 def train_cv(
-    X: Union[np.ndarray, sparse.csr.csr_matrix],
+    X: Union[np.ndarray, sparse.csr_matrix],
     y: np.ndarray,
     batch_size: int,
     n_epochs: int,
@@ -821,7 +807,7 @@ def train_cv(
 
 
 def train_all(
-    X: Union[np.ndarray, sparse.csr.csr_matrix],
+    X: Union[np.ndarray, sparse.csr_matrix],
     y: np.ndarray,
     batch_size: int,
     n_epochs: int,
@@ -930,7 +916,7 @@ def train_all(
 
 
 def train_tissue_independent_cv(
-    X: Union[np.ndarray, sparse.csr.csr_matrix],
+    X: Union[np.ndarray, sparse.csr_matrix],
     metadata: pd.DataFrame,
     out_path: str,
     balanced_classes: bool = False,
@@ -1055,7 +1041,7 @@ def train_tissue_independent_cv(
 
 
 def train_one_tissue_cv(
-    X: Union[np.ndarray, sparse.csr.csr_matrix],
+    X: Union[np.ndarray, sparse.csr_matrix],
     metadata: pd.DataFrame,
     out_path: str,
     balanced_classes: bool = False,
@@ -1171,7 +1157,7 @@ def train_one_tissue_cv(
 
 
 def predict_cell_types(
-    X: Union[np.ndarray, sparse.csr.csr_matrix],
+    X: Union[np.ndarray, sparse.csr_matrix],
     model_path: str,
     out_path: str,
     upper_groups: Union[list, np.ndarray] = None,
@@ -1182,7 +1168,7 @@ def predict_cell_types(
 
     Parameters
     ----------
-    X : np.ndarray, sparse.csr.csr_matrix
+    X : np.ndarray, sparse.csr_matrix
         [Cells, Genes] of log1p transformed, normalized values.
         log1p and normalization performed using scanpy defaults.
     model_path : str
@@ -1248,7 +1234,7 @@ def predict_cell_types(
 
 def load_data(
     path: str,
-) -> Union[np.ndarray, sparse.csr.csr_matrix]:
+) -> Union[np.ndarray, sparse.csr_matrix]:
     """Load a counts matrix from a file path.
 
     Parameters
@@ -1557,7 +1543,7 @@ def main():
         if args.ssl_config is not None:
             print(f"Loading Semi-Supervised Learning parameters for {args.ssl_method}")
             with open(args.ssl_config, "r") as f:
-                ssl_kwargs = yaml.load(f, Loader=yaml.Loader)
+                ssl_kwargs = yaml.safe_load(f)
             print("SSL kwargs:")
             for k, v in ssl_kwargs.items():
                 print(f"{k}\t\t:\t\t{v}")
@@ -1586,7 +1572,7 @@ def main():
         if args.unlabeled_domain is not None:
             unlabeled_domain = np.loadtxt(
                 args.unlabeled_domain,
-            ).astype(np.int)
+            ).astype(int)
         else:
             unlabeled_domain = None
     else:
diff --git a/scnym/predict.py b/scnym/predict.py
index 8b84b42..fa84556 100644
--- a/scnym/predict.py
+++ b/scnym/predict.py
@@ -106,7 +106,7 @@ def __init__(
 
     def predict(
         self,
-        X: Union[np.ndarray, sparse.csr.csr_matrix, torch.FloatTensor],
+        X: Union[np.ndarray, sparse.csr_matrix, torch.FloatTensor],
         output: str = None,
         batch_size: int = 1024,
         **kwargs,
@@ -116,7 +116,7 @@ def predict(
 
         Parameters
         ----------
-        X : np.ndarray, sparse.csr.csr_matrix, torch.FloatTensor
+        X : np.ndarray, sparse.csr_matrix, torch.FloatTensor
             [Cells, Genes]
         output : str
             additional output to include as an optional third tuple.
diff --git a/scnym/trainer.py b/scnym/trainer.py
index c3c8522..fdd3216 100644
--- a/scnym/trainer.py
+++ b/scnym/trainer.py
@@ -7,15 +7,11 @@
 import json
 import logging
 from typing import Callable, Iterable, Union, List
-from .dataprep import SampleMixUp
-from .utils import compute_entropy_of_mixing
-from .model import CellTypeCLF, DANN
-import copy
-from torch.utils.tensorboard import SummaryWriter
-
 from .dataprep import SampleMixUp
 from .utils import compute_entropy_of_mixing
 from .model import CellTypeCLF, DANN, AE
+import copy
+from tensorboardX import SummaryWriter
 from .losses import *
 
 
diff --git a/scnym/utils.py b/scnym/utils.py
index 1e4cab1..8e4316c 100644
--- a/scnym/utils.py
+++ b/scnym/utils.py
@@ -80,15 +80,15 @@ def l1_layer0(
 
 
 def append_categorical_to_data(
-    X: Union[np.ndarray, sparse.csr.csr_matrix],
+    X: Union[np.ndarray, sparse.csr_matrix],
     categorical: np.ndarray,
-) -> (Union[np.ndarray, sparse.csr.csr_matrix], np.ndarray):
+) -> (Union[np.ndarray, sparse.csr_matrix], np.ndarray):
     """Convert `categorical` to a one-hot vector and append
     this vector to each sample in `X`.
 
     Parameters
     ----------
-    X : np.ndarray, sparse.csr.csr_matrix
+    X : np.ndarray, sparse.csr_matrix
         [Cells, Features]
     categorical : np.ndarray
         [Cells,]
@@ -126,7 +126,7 @@ def append_categorical_to_data(
 
 def get_adata_asarray(
     adata: anndata.AnnData,
-) -> Union[np.ndarray, sparse.csr.csr_matrix]:
+) -> Union[np.ndarray, sparse.csr_matrix]:
     """Get the gene expression matrix `.X` of an
     AnnData object as an array rather than a view.
 
@@ -137,7 +137,7 @@ def get_adata_asarray(
 
     Returns
     -------
-    X : np.ndarray, sparse.csr.csr_matrix
+    X : np.ndarray, sparse.csr_matrix
         [Cells, Genes] `.X` attribute as an array
         in memory.
 
@@ -146,18 +146,18 @@ def get_adata_asarray(
     Returned `X` will match the type of `adata.X` view.
     """
     if sparse.issparse(adata.X):
-        X = sparse.csr.csr_matrix(adata.X)
+        X = sparse.csr_matrix(adata.X)
     else:
         X = np.array(adata.X)
     return X
 
 
 def build_classification_matrix(
-    X: Union[np.ndarray, sparse.csr.csr_matrix],
+    X: Union[np.ndarray, sparse.csr_matrix],
     model_genes: np.ndarray,
     sample_genes: np.ndarray,
     gene_batch_size: int = 512,
-) -> Union[np.ndarray, sparse.csr.csr_matrix]:
+) -> Union[np.ndarray, sparse.csr_matrix]:
     """
     Build a matrix for classification using only genes that overlap
     between the current sample and the pre-trained model.
@@ -182,7 +182,7 @@ def build_classification_matrix(
         as zeros. `type(N)` will match `type(X)`.
     """
     # check types
-    if type(X) not in (np.ndarray, sparse.csr.csr_matrix):
+    if type(X) not in (np.ndarray, sparse.csr_matrix):
         msg = f"X is type {type(X)}, must `np.ndarray` or `sparse.csr_matrix`"
         raise TypeError(msg)
     n_cells = X.shape[0]
@@ -211,7 +211,7 @@ def build_classification_matrix(
     common_genes = 0
     for i, g in tqdm.tqdm(enumerate(sample_genes), desc="mapping genes"):
         if np.sum(g == model_genes) > 0:
-            model_genes_indices.append(int(np.where(g == model_genes)[0]))
+            model_genes_indices.append(np.where(g == model_genes)[0].item())
             sample_genes_indices.append(
                 i,
             )
@@ -396,7 +396,7 @@ def __call__(
         self,
         distances: np.ndarray,
     ) -> np.ndarray:
-        """Generate a set of weights based on distances to a point
+        r"""Generate a set of weights based on distances to a point
         with a radial basis function kernel.
 
         Parameters
@@ -667,13 +667,13 @@ def compute_entropy_of_mixing(
 def _optimize_clustering(adata, resolution: list = [0.1, 0.2, 0.3, 0.5, 1.0]):
     scores = []
     for r in resolution:
-        sc.tl.leiden(adata, resolution=r)
+        sc.tl.leiden(adata, resolution=r, flavor="igraph", n_iterations=2)
         s = calinski_harabasz_score(adata.obsm["X_scnym"], adata.obs["leiden"])
         scores.append(s)
     cl_opt_df = pd.DataFrame({"resolution": resolution, "score": scores})
     best_idx = np.argmax(cl_opt_df["score"])
     res = cl_opt_df.iloc[best_idx, 0]
-    sc.tl.leiden(adata, resolution=res)
+    sc.tl.leiden(adata, resolution=res, flavor="igraph", n_iterations=2)
     print("Best resolution: ", res)
     return cl_opt_df
 
diff --git a/setup.py b/setup.py
index f3566cd..d29cadb 100644
--- a/setup.py
+++ b/setup.py
@@ -1,29 +1,38 @@
-import sys
-if sys.version_info < (3,):
-    sys.exit('scnym requires Python >= 3.6')
 from pathlib import Path
 
 from setuptools import setup, find_packages
 
+_here = Path(__file__).resolve().parent
+
 try:
     from scnym import __author__, __email__
 except ImportError:  # Deps not yet installed
     __author__ = __email__ = ''
 
+# Single source of truth for version
+_version = (_here / 'VERSION').read_text('utf-8').strip()
+
 setup(
     name='scnym',
-    version='0.3.3',
-    description="Semi supervised adversarial network networks for single cell classification",
+    version=_version,
+    description="Semi supervised adversarial neural networks for single cell classification",
     long_description="scNym uses the semi-supervised MixMatch framework and domain adversarial training to take advantage of information in both the labeled and unlabeled datasets.",
     url='http://github.com/calico/scnym',
     author=__author__,
     author_email=__email__,
     license='Apache',
-    python_requires='>=3.6',
+    python_requires='>=3.10',
     install_requires=[
         l.strip() for l in
-        Path('requirements.txt').read_text('utf-8').splitlines()
+        (_here / 'requirements.txt').read_text('utf-8').splitlines()
+        if l.strip()
     ],
+    extras_require={
+        'dev': [
+            'pytest>=7.0',
+            'ruff',
+        ],
+    },
     packages=find_packages(),
     entry_points=dict(
         console_scripts=['scnym=scnym.main:main', 'scnym_ad=scnym.scnym_ad:main'],
diff --git a/tests/test_api.py b/tests/test_api.py
index 06dac73..48fe5ef 100644
--- a/tests/test_api.py
+++ b/tests/test_api.py
@@ -121,23 +121,27 @@ def test_assumption_checking():
     )
     adata = adata[ridx, :].copy()
 
-    # test that an input anndata with duplicate genes
-    # throws an error
+    # test that duplicate genes are rejected — either by anndata
+    # (>= 0.11 raises at assignment) or by scnym's own check
     adata_dup_genes = adata.copy()
-    var_names_with_dups = np.array(adata_dup_genes.var_names)
+    var_names_with_dups = np.array(adata_dup_genes.var_names).copy()
     var_names_with_dups[-1] = var_names_with_dups[-2]
-    adata_dup_genes.var_names = var_names_with_dups
-
-    with pytest.raises(ValueError, match="Duplicate Genes"):
-        # this should throw an error about duplicate genes
-        config = {"n_epochs": 1}
-        scnym_api(
-            adata=adata_dup_genes,
-            task="train",
-            groupby="cell",
-            out_path=str(sc.settings.datasetdir),
-            config=config,
-        )
+    try:
+        adata_dup_genes.var_names = var_names_with_dups
+    except ValueError as exc:
+        # anndata >= 0.11 rejects duplicate var_names at assignment
+        assert "duplicate" in str(exc).lower() or "unique" in str(exc).lower()
+    else:
+        # older anndata accepted duplicates; scnym should catch them
+        with pytest.raises(ValueError, match="Duplicate Genes"):
+            config = {"n_epochs": 1}
+            scnym_api(
+                adata=adata_dup_genes,
+                task="train",
+                groupby="cell",
+                out_path=str(sc.settings.datasetdir),
+                config=config,
+            )
 
     # test that an input anndata with `.X` formatted as something
     # other than log1p(CPM) will throw an error
diff --git a/tests/test_guide.py b/tests/test_guide.py
index a46f29b..739a876 100644
--- a/tests/test_guide.py
+++ b/tests/test_guide.py
@@ -101,13 +101,13 @@ def test_sparsity_loss():
 
     # load 10x human PBMC data as a sample
     adata = sc.datasets.pbmc3k()
-    sc.pp.normalize_per_cell(adata, counts_per_cell_after=1e6)
+    sc.pp.normalize_total(adata, target_sum=1e6)
     sc.pp.log1p(adata)
     sc.pp.highly_variable_genes(adata, n_top_genes=2000)
     sc.pp.pca(adata)
     sc.pp.neighbors(adata, n_neighbors=15)
     # generate clusters to use as class labels
-    sc.tl.leiden(adata, resolution=0.5, key_added="leiden")
+    sc.tl.leiden(adata, resolution=0.5, key_added="leiden", flavor="igraph", n_iterations=2)
 
     adata.obs["class"] = pd.Categorical(
         adata.obs["leiden"],
@@ -281,13 +281,13 @@ def test_nonneg_guide():
 
     # load 10x human PBMC data as a sample
     adata = sc.datasets.pbmc3k()
-    sc.pp.normalize_per_cell(adata, counts_per_cell_after=1e6)
+    sc.pp.normalize_total(adata, target_sum=1e6)
     sc.pp.log1p(adata)
     sc.pp.highly_variable_genes(adata, n_top_genes=2000)
     sc.pp.pca(adata)
     sc.pp.neighbors(adata, n_neighbors=15)
     # generate clusters to use as class labels
-    sc.tl.leiden(adata, resolution=0.5, key_added="leiden")
+    sc.tl.leiden(adata, resolution=0.5, key_added="leiden", flavor="igraph", n_iterations=2)
 
     adata.obs["class"] = pd.Categorical(
         adata.obs["leiden"],
diff --git a/tests/test_interpret.py b/tests/test_interpret.py
index ed7be93..96d2cc2 100644
--- a/tests/test_interpret.py
+++ b/tests/test_interpret.py
@@ -14,18 +14,18 @@ def _load_10x_pbmc():
     adata = sc.datasets.pbmc3k()
     sc.pp.filter_cells(adata, min_counts=100)
     sc.pp.filter_genes(adata, min_cells=100)
-    sc.pp.normalize_per_cell(adata, counts_per_cell_after=1e6)
+    sc.pp.normalize_total(adata, target_sum=1e6)
     sc.pp.log1p(adata)
     sc.pp.highly_variable_genes(adata, n_top_genes=3000)
     sc.pp.pca(adata)
     sc.pp.neighbors(adata, n_neighbors=15)
-    sc.tl.leiden(adata, resolution=0.3)
+    sc.tl.leiden(adata, resolution=0.3, flavor="igraph", n_iterations=2)
     # name one class T cell and one B cell
     cd4 = adata.obs_vector("CD4")
     cd22 = adata.obs_vector("CD22")
     leiden = adata.obs_vector("leiden")
     tmp = pd.DataFrame({"CD4": cd4, "CD22": cd22, "leiden": leiden})
-    grp = tmp.groupby("leiden").mean().reset_index()
+    grp = tmp.groupby("leiden", observed=True).mean().reset_index()
     print(grp)
     t_cell_cl = grp.sort_values("CD4", ascending=False)["leiden"].tolist()[0]
     b_cell_cl = grp.sort_values("CD22", ascending=False)["leiden"].tolist()[0]
diff --git a/tests/test_mixmatch.py b/tests/test_mixmatch.py
index 6677ef3..0247042 100644
--- a/tests/test_mixmatch.py
+++ b/tests/test_mixmatch.py
@@ -82,7 +82,7 @@ def test_mixmatch_forward():
     adata = sc.datasets.pbmc3k()
     sc.pp.filter_cells(adata, min_counts=100)
     sc.pp.filter_genes(adata, min_cells=100)
-    sc.pp.normalize_per_cell(adata, counts_per_cell_after=1e6)
+    sc.pp.normalize_total(adata, target_sum=1e6)
     sc.pp.log1p(adata)
 
     # generate fake class labels
@@ -181,7 +181,7 @@ def test_mixmatch_forward_with_confthresh():
     adata = sc.datasets.pbmc3k()
     sc.pp.filter_cells(adata, min_counts=100)
     sc.pp.filter_genes(adata, min_cells=100)
-    sc.pp.normalize_per_cell(adata, counts_per_cell_after=1e6)
+    sc.pp.normalize_total(adata, target_sum=1e6)
     sc.pp.log1p(adata)
 
     # generate fake class labels
@@ -343,7 +343,7 @@ def test_mixmatch_forward_with_teacher_bn_runnning_stats():
     adata = sc.datasets.pbmc3k()
     sc.pp.filter_cells(adata, min_counts=100)
     sc.pp.filter_genes(adata, min_cells=100)
-    sc.pp.normalize_per_cell(adata, counts_per_cell_after=1e6)
+    sc.pp.normalize_total(adata, target_sum=1e6)
     sc.pp.log1p(adata)
 
     # generate fake class labels
@@ -435,7 +435,7 @@ def test_train_mixmatch():
     adata = sc.datasets.pbmc3k()
     sc.pp.filter_cells(adata, min_counts=100)
     sc.pp.filter_genes(adata, min_cells=100)
-    sc.pp.normalize_per_cell(adata, counts_per_cell_after=1e6)
+    sc.pp.normalize_total(adata, target_sum=1e6)
     sc.pp.log1p(adata)
 
     # generate fake class labels
diff --git a/tests/test_multitask.py b/tests/test_multitask.py
index 398c2e6..94473c1 100644
--- a/tests/test_multitask.py
+++ b/tests/test_multitask.py
@@ -23,7 +23,7 @@ def test_multitask_mixmatch():
     adata = sc.datasets.pbmc3k()
     sc.pp.filter_cells(adata, min_counts=100)
     sc.pp.filter_genes(adata, min_cells=100)
-    sc.pp.normalize_per_cell(adata, counts_per_cell_after=1e6)
+    sc.pp.normalize_total(adata, target_sum=1e6)
     sc.pp.log1p(adata)
 
     # generate fake class labels
@@ -133,7 +133,7 @@ def test_multitask_dan():
     adata = sc.datasets.pbmc3k()
     sc.pp.filter_cells(adata, min_counts=100)
     sc.pp.filter_genes(adata, min_cells=100)
-    sc.pp.normalize_per_cell(adata, counts_per_cell_after=1e6)
+    sc.pp.normalize_total(adata, target_sum=1e6)
     sc.pp.log1p(adata)
 
     # create dataloaders
@@ -196,7 +196,7 @@ def test_multitask_trainer():
     sc.pp.pca(adata)
     sc.pp.neighbors(adata, n_neighbors=15)
     # generate clusters to use as class labels
-    sc.tl.leiden(adata, resolution=0.5, key_added="leiden")
+    sc.tl.leiden(adata, resolution=0.5, key_added="leiden", flavor="igraph", n_iterations=2)
     adata.obs["class"] = pd.Categorical(
         adata.obs["leiden"],
     ).codes
diff --git a/tests/test_reconstruction.py b/tests/test_reconstruction.py
index 093ce8b..7ec7035 100644
--- a/tests/test_reconstruction.py
+++ b/tests/test_reconstruction.py
@@ -30,7 +30,7 @@ def test_reconstruction_loss():
     sc.pp.pca(adata)
     sc.pp.neighbors(adata, n_neighbors=15)
     # generate clusters to use as class labels
-    sc.tl.leiden(adata, resolution=0.5, key_added="leiden")
+    sc.tl.leiden(adata, resolution=0.5, key_added="leiden", flavor="igraph", n_iterations=2)
     adata.obs["class"] = pd.Categorical(
         adata.obs["leiden"],
     ).codes
diff --git a/tests/test_trainer.py b/tests/test_trainer.py
index 66fbb4d..6d5adfc 100644
--- a/tests/test_trainer.py
+++ b/tests/test_trainer.py
@@ -16,13 +16,13 @@ def test_trainer():
 
     # load 10x human PBMC data as a sample
     adata = sc.datasets.pbmc3k()
-    sc.pp.normalize_per_cell(adata, counts_per_cell_after=1e6)
+    sc.pp.normalize_total(adata, target_sum=1e6)
     sc.pp.log1p(adata)
     sc.pp.highly_variable_genes(adata, n_top_genes=2000)
     sc.pp.pca(adata)
     sc.pp.neighbors(adata, n_neighbors=15)
     # generate clusters to use as class labels
-    sc.tl.leiden(adata, resolution=0.5, key_added="leiden")
+    sc.tl.leiden(adata, resolution=0.5, key_added="leiden", flavor="igraph", n_iterations=2)
 
     adata.obs["class"] = pd.Categorical(
         adata.obs["leiden"],
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 6724172..262b715 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -34,7 +34,7 @@ def test_build_classification_matrix_dense():
 
     # X should have the genes of B in the order of A
     for i, g in enumerate(A_genes):
-        j = int(np.where(B_genes == g)[0])
+        j = np.where(B_genes == g)[0].item()
         assert np.all(X[:, i] == B[:, j])
     return
 
@@ -68,7 +68,7 @@ def test_build_classification_matrix_sparse():
 
     # X should have the genes of B in the order of A
     for i, g in enumerate(A_genes):
-        j = int(np.where(B_genes == g)[0])
+        j = np.where(B_genes == g)[0].item()
         assert np.all(X[:, i].toarray() == B[:, j].toarray())
     return
 
@@ -78,7 +78,7 @@ def test_get_adata_asarray():
     # test getting a dense matrix
     import scnym
 
-    adata = anndata.AnnData(X=np.random.random((100, 100)))
+    adata = anndata.AnnData(X=np.random.random((100, 100)).astype(np.float32))
     X = scnym.utils.get_adata_asarray(adata=adata)
     assert type(X) == np.ndarray
 
@@ -87,7 +87,7 @@ def test_get_adata_asarray():
     ridx = np.random.choice(A.size, size=1000, replace=True)
     A.flat[ridx] = 1
     A = sparse.csr_matrix(A)
-    adata = anndata.AnnData(X=A)
+    adata = anndata.AnnData(X=A.astype(np.float32))
     X = scnym.utils.get_adata_asarray(adata=adata)
     assert sparse.issparse(X)
     return