From 5c23a0cde16970a795cc2fc78a9b6a0f1fb6355a Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 17 Dec 2025 17:47:20 +0000
Subject: [PATCH 1/8] Initial plan


From 416502083d540f2f958085116a63e7b6989638cb Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 17 Dec 2025 17:57:33 +0000
Subject: [PATCH 2/8] Add migrate command with migration framework and initial
 migrations

- Add migrate.py module with base migration framework including Migration class, MigrationRegistry, and decorators
- Add migrations.py with three specific migrations:
  - standardize_generatedby: Convert pre-standard provenance to GeneratedBy (BEP028)
  - fix_inheritance_overloading: Check for deprecated inheritance overloading patterns (PR #1834)
  - fix_tsv_entity_prefix: Check for missing entity prefixes in TSV files (PR #2281)
- Add CLI commands: 'bst migrate list', 'bst migrate run', 'bst migrate all'
- Add comprehensive tests for migration framework and specific migrations
- All tests passing (24 tests total)

Co-authored-by: yarikoptic <39889+yarikoptic@users.noreply.github.com>
---
 .../src/bidsschematools/__main__.py           | 149 +++++++
 .../schemacode/src/bidsschematools/migrate.py | 258 ++++++++++++
 .../src/bidsschematools/migrations.py         | 388 ++++++++++++++++++
 .../src/bidsschematools/tests/test_migrate.py | 198 +++++++++
 .../bidsschematools/tests/test_migrations.py  | 256 ++++++++++++
 5 files changed, 1249 insertions(+)
 create mode 100644 tools/schemacode/src/bidsschematools/migrate.py
 create mode 100644 tools/schemacode/src/bidsschematools/migrations.py
 create mode 100644 tools/schemacode/src/bidsschematools/tests/test_migrate.py
 create mode 100644 tools/schemacode/src/bidsschematools/tests/test_migrations.py

diff --git a/tools/schemacode/src/bidsschematools/__main__.py b/tools/schemacode/src/bidsschematools/__main__.py
index b0f8e5f967..3979fcd141 100644
--- a/tools/schemacode/src/bidsschematools/__main__.py
+++ b/tools/schemacode/src/bidsschematools/__main__.py
@@ -4,6 +4,7 @@
 import re
 import sys
 from itertools import chain
+from pathlib import Path
 
 import click
 
@@ -180,5 +181,153 @@ def pre_receive_hook(schema, input_, output):
     sys.exit(rc)
 
 
+@cli.group()
+def migrate():
+    """Migrate BIDS datasets to adopt standardized conventions"""
+    pass
+
+
+@migrate.command("list")
+def migrate_list():
+    """List all available migrations"""
+    from .migrate import registry
+    from . import migrations  # noqa: F401 - Import to register migrations
+    
+    migrations_list = registry.list_migrations()
+    
+    if not migrations_list:
+        lgr.info("No migrations available")
+        return
+    
+    click.echo("Available migrations:\n")
+    for mig in sorted(migrations_list, key=lambda x: x["version"]):
+        click.echo(f"  {mig['name']} (version {mig['version']})")
+        click.echo(f"    {mig['description']}\n")
+
+
+@migrate.command("run")
+@click.argument("migration_name")
+@click.argument("dataset_path", type=click.Path(exists=True))
+@click.option(
+    "--dry-run",
+    is_flag=True,
+    help="Show what would be changed without modifying files",
+)
+def migrate_run(migration_name, dataset_path, dry_run):
+    """Run a specific migration on a BIDS dataset
+    
+    MIGRATION_NAME is the name of the migration to run (use 'migrate list' to see available)
+    
+    DATASET_PATH is the path to the BIDS dataset root directory
+    """
+    from .migrate import registry
+    from . import migrations  # noqa: F401 - Import to register migrations
+    
+    dataset_path = Path(dataset_path).resolve()
+    
+    if not (dataset_path / "dataset_description.json").exists():
+        lgr.error(
+            f"No dataset_description.json found in {dataset_path}. "
+            "Is this a valid BIDS dataset?"
+        )
+        sys.exit(1)
+    
+    try:
+        result = registry.run(migration_name, dataset_path, dry_run=dry_run)
+    except ValueError as e:
+        lgr.error(str(e))
+        lgr.info("Use 'bst migrate list' to see available migrations")
+        sys.exit(1)
+    
+    # Display results
+    if result.get("modified_files"):
+        click.echo(f"\nModified files ({len(result['modified_files'])}):")
+        for filepath in result["modified_files"]:
+            click.echo(f"  - {filepath}")
+    
+    if result.get("warnings"):
+        click.echo(f"\nWarnings ({len(result['warnings'])}):")
+        for warning in result["warnings"]:
+            click.echo(f"  - {warning}")
+    
+    if result.get("suggestions"):
+        click.echo(f"\nSuggestions ({len(result['suggestions'])}):")
+        for suggestion in result["suggestions"]:
+            click.echo(f"  - {suggestion}")
+    
+    click.echo(f"\n{result['message']}")
+    
+    if not result["success"]:
+        sys.exit(1)
+
+
+@migrate.command("all")
+@click.argument("dataset_path", type=click.Path(exists=True))
+@click.option(
+    "--dry-run",
+    is_flag=True,
+    help="Show what would be changed without modifying files",
+)
+@click.option(
+    "--skip",
+    multiple=True,
+    help="Skip specific migrations (can be used multiple times)",
+)
+def migrate_all(dataset_path, dry_run, skip):
+    """Run all available migrations on a BIDS dataset
+    
+    DATASET_PATH is the path to the BIDS dataset root directory
+    """
+    from .migrate import registry
+    from . import migrations  # noqa: F401 - Import to register migrations
+    
+    dataset_path = Path(dataset_path).resolve()
+    
+    if not (dataset_path / "dataset_description.json").exists():
+        lgr.error(
+            f"No dataset_description.json found in {dataset_path}. "
+            "Is this a valid BIDS dataset?"
+        )
+        sys.exit(1)
+    
+    migrations_list = registry.list_migrations()
+    skip_set = set(skip)
+    
+    if not migrations_list:
+        click.echo("No migrations available")
+        return
+    
+    click.echo(f"Running {len(migrations_list)} migration(s) on {dataset_path}")
+    if dry_run:
+        click.echo("DRY RUN: No files will be modified\n")
+    
+    results = []
+    for mig in sorted(migrations_list, key=lambda x: x["version"]):
+        if mig["name"] in skip_set:
+            click.echo(f"Skipping: {mig['name']}")
+            continue
+        
+        click.echo(f"\nRunning: {mig['name']} (version {mig['version']})")
+        result = registry.run(mig["name"], dataset_path, dry_run=dry_run)
+        results.append((mig["name"], result))
+        
+        if result.get("modified_files"):
+            click.echo(f"  Modified {len(result['modified_files'])} file(s)")
+        if result.get("warnings"):
+            click.echo(f"  {len(result['warnings'])} warning(s)")
+        if result.get("suggestions"):
+            click.echo(f"  {len(result['suggestions'])} suggestion(s)")
+        click.echo(f"  {result['message']}")
+    
+    # Summary
+    click.echo("\n" + "=" * 60)
+    click.echo("Migration Summary:")
+    click.echo("=" * 60)
+    
+    for name, result in results:
+        status = "✓" if result["success"] else "✗"
+        click.echo(f"{status} {name}: {result['message']}")
+
+
 if __name__ == "__main__":
     cli()
diff --git a/tools/schemacode/src/bidsschematools/migrate.py b/tools/schemacode/src/bidsschematools/migrate.py
new file mode 100644
index 0000000000..19c4a8d04d
--- /dev/null
+++ b/tools/schemacode/src/bidsschematools/migrate.py
@@ -0,0 +1,258 @@
+"""BIDS dataset migration utilities.
+
+This module provides functionality to migrate BIDS datasets from older versions
+to newer standardized forms, helping users adopt new conventions and metadata fields.
+"""
+
+import json
+import os
+from pathlib import Path
+from typing import Callable, Dict, List, Optional, Any
+import logging
+
+lgr = logging.getLogger(__name__)
+
+
+class Migration:
+    """Represents a single migration operation.
+    
+    Attributes
+    ----------
+    name : str
+        Human-readable name of the migration
+    version : str
+        BIDS version when this migration applies (e.g., "1.10.0")
+    description : str
+        Description of what this migration does
+    func : Callable
+        The function that performs the migration
+    """
+    
+    def __init__(
+        self,
+        name: str,
+        version: str,
+        description: str,
+        func: Callable[[Path], Dict[str, Any]],
+    ):
+        """Initialize a migration.
+        
+        Parameters
+        ----------
+        name : str
+            Human-readable name of the migration
+        version : str
+            BIDS version when this migration applies
+        description : str
+            Description of what this migration does
+        func : Callable
+            The function that performs the migration, takes dataset path
+            and returns a dict with 'success', 'modified_files', and 'message' keys
+        """
+        self.name = name
+        self.version = version
+        self.description = description
+        self.func = func
+    
+    def __call__(self, dataset_path: Path, **kwargs) -> Dict[str, Any]:
+        """Execute the migration.
+        
+        Parameters
+        ----------
+        dataset_path : Path
+            Path to the BIDS dataset root
+        **kwargs
+            Additional keyword arguments for the migration function
+            
+        Returns
+        -------
+        dict
+            Result of the migration with keys:
+            - success: bool indicating if migration succeeded
+            - modified_files: list of modified file paths
+            - message: str with details about the migration
+        """
+        return self.func(dataset_path, **kwargs)
+
+
+class MigrationRegistry:
+    """Registry for BIDS dataset migrations."""
+    
+    def __init__(self):
+        """Initialize the migration registry."""
+        self._migrations: Dict[str, Migration] = {}
+    
+    def register(
+        self,
+        name: str,
+        version: str,
+        description: str,
+    ) -> Callable:
+        """Decorator to register a migration function.
+        
+        Parameters
+        ----------
+        name : str
+            Human-readable name of the migration
+        version : str
+            BIDS version when this migration applies
+        description : str
+            Description of what this migration does
+            
+        Returns
+        -------
+        Callable
+            Decorator function
+            
+        Examples
+        --------
+        >>> registry = MigrationRegistry()
+        >>> @registry.register(
+        ...     name="standardize_generatedby",
+        ...     version="1.10.0",
+        ...     description="Convert pre-standard provenance to GeneratedBy field"
+        ... )
+        ... def migrate_generatedby(dataset_path):
+        ...     # migration code here
+        ...     return {"success": True, "modified_files": [], "message": "Done"}
+        """
+        def decorator(func: Callable) -> Migration:
+            migration = Migration(name, version, description, func)
+            self._migrations[name] = migration
+            lgr.debug(f"Registered migration: {name} (version {version})")
+            return migration
+        return decorator
+    
+    def get(self, name: str) -> Optional[Migration]:
+        """Get a migration by name.
+        
+        Parameters
+        ----------
+        name : str
+            Name of the migration
+            
+        Returns
+        -------
+        Migration or None
+            The migration if found, None otherwise
+        """
+        return self._migrations.get(name)
+    
+    def list_migrations(self) -> List[Dict[str, str]]:
+        """List all registered migrations.
+        
+        Returns
+        -------
+        list of dict
+            List of migration metadata with name, version, and description
+        """
+        return [
+            {
+                "name": mig.name,
+                "version": mig.version,
+                "description": mig.description,
+            }
+            for mig in self._migrations.values()
+        ]
+    
+    def run(
+        self,
+        name: str,
+        dataset_path: Path,
+        dry_run: bool = False,
+        **kwargs,
+    ) -> Dict[str, Any]:
+        """Run a specific migration.
+        
+        Parameters
+        ----------
+        name : str
+            Name of the migration to run
+        dataset_path : Path
+            Path to the BIDS dataset root
+        dry_run : bool, optional
+            If True, don't actually modify files, default False
+        **kwargs
+            Additional keyword arguments for the migration
+            
+        Returns
+        -------
+        dict
+            Result of the migration
+            
+        Raises
+        ------
+        ValueError
+            If migration not found
+        """
+        migration = self.get(name)
+        if migration is None:
+            raise ValueError(f"Migration '{name}' not found")
+        
+        lgr.info(f"Running migration: {migration.name} (version {migration.version})")
+        lgr.info(f"Description: {migration.description}")
+        
+        if dry_run:
+            lgr.info("DRY RUN: No files will be modified")
+            kwargs["dry_run"] = True
+        
+        result = migration(dataset_path, **kwargs)
+        
+        if result.get("success"):
+            lgr.info(f"Migration completed successfully: {result.get('message', '')}")
+        else:
+            lgr.warning(f"Migration failed or had issues: {result.get('message', '')}")
+        
+        return result
+
+
+# Global registry instance
+registry = MigrationRegistry()
+
+
+def load_json(filepath: Path) -> Optional[Dict]:
+    """Load JSON file safely.
+    
+    Parameters
+    ----------
+    filepath : Path
+        Path to JSON file
+        
+    Returns
+    -------
+    dict or None
+        Loaded JSON data, or None if error
+    """
+    try:
+        with open(filepath, 'r') as f:
+            return json.load(f)
+    except (json.JSONDecodeError, FileNotFoundError) as e:
+        lgr.error(f"Error loading {filepath}: {e}")
+        return None
+
+
+def save_json(filepath: Path, data: Dict, indent: int = 2) -> bool:
+    """Save JSON file safely.
+    
+    Parameters
+    ----------
+    filepath : Path
+        Path to JSON file
+    data : dict
+        Data to save
+    indent : int, optional
+        JSON indentation, default 2
+        
+    Returns
+    -------
+    bool
+        True if successful, False otherwise
+    """
+    try:
+        with open(filepath, 'w') as f:
+            json.dump(data, f, indent=indent, ensure_ascii=False)
+            f.write('\n')  # Add newline at end of file
+        return True
+    except Exception as e:
+        lgr.error(f"Error saving {filepath}: {e}")
+        return False
diff --git a/tools/schemacode/src/bidsschematools/migrations.py b/tools/schemacode/src/bidsschematools/migrations.py
new file mode 100644
index 0000000000..b2b05d4679
--- /dev/null
+++ b/tools/schemacode/src/bidsschematools/migrations.py
@@ -0,0 +1,388 @@
+"""Specific BIDS dataset migrations.
+
+This module contains individual migration functions for various BIDS version updates.
+"""
+
+import json
+import logging
+from pathlib import Path
+from typing import Dict, Any, List
+
+from .migrate import registry, load_json, save_json
+
+lgr = logging.getLogger(__name__)
+
+
+@registry.register(
+    name="standardize_generatedby",
+    version="1.10.0",
+    description=(
+        "Convert pre-standard provenance metadata to standardized GeneratedBy field. "
+        "This migration helps adopt BEP028 conventions for dataset provenance tracking."
+    ),
+)
+def migrate_generatedby(dataset_path: Path, dry_run: bool = False) -> Dict[str, Any]:
+    """Migrate pre-standard provenance to GeneratedBy field.
+    
+    This migration looks for common pre-standard provenance fields and converts
+    them to the standardized GeneratedBy array format introduced in BEP028.
+    
+    Common pre-standard fields that may be migrated:
+    - Provenance
+    - Pipeline
+    - ProcessingPipeline
+    - Software
+    
+    Parameters
+    ----------
+    dataset_path : Path
+        Path to the BIDS dataset root
+    dry_run : bool, optional
+        If True, don't actually modify files, default False
+        
+    Returns
+    -------
+    dict
+        Migration result with success status, modified files, and message
+    """
+    modified_files = []
+    issues = []
+    
+    # Look for dataset_description.json files (including in derivatives)
+    desc_files = list(dataset_path.glob("dataset_description.json"))
+    desc_files.extend(dataset_path.glob("derivatives/*/dataset_description.json"))
+    desc_files.extend(dataset_path.glob("derivatives/*/*/dataset_description.json"))
+    
+    if not desc_files:
+        return {
+            "success": True,
+            "modified_files": [],
+            "message": "No dataset_description.json files found"
+        }
+    
+    for desc_file in desc_files:
+        data = load_json(desc_file)
+        if data is None:
+            issues.append(f"Could not load {desc_file}")
+            continue
+        
+        # Check if GeneratedBy already exists
+        if "GeneratedBy" in data:
+            lgr.debug(f"GeneratedBy already exists in {desc_file}, skipping")
+            continue
+        
+        # Look for pre-standard fields to migrate
+        generated_by = []
+        migrated_fields = []
+        
+        # Check for common pre-standard fields
+        pre_standard_fields = {
+            "Provenance": "provenance",
+            "Pipeline": "pipeline", 
+            "ProcessingPipeline": "processing_pipeline",
+            "Software": "software",
+            "Tool": "tool",
+        }
+        
+        for old_field, field_type in pre_standard_fields.items():
+            if old_field in data:
+                value = data[old_field]
+                migrated_fields.append(old_field)
+                
+                # Convert to GeneratedBy format
+                if isinstance(value, str):
+                    # Simple string, create basic entry
+                    entry = {
+                        "Name": value,
+                        "Description": f"Migrated from {old_field} field"
+                    }
+                    generated_by.append(entry)
+                elif isinstance(value, dict):
+                    # Already structured, try to map fields
+                    entry = {}
+                    if "Name" in value:
+                        entry["Name"] = value["Name"]
+                    elif "name" in value:
+                        entry["Name"] = value["name"]
+                    else:
+                        entry["Name"] = old_field
+                    
+                    if "Version" in value:
+                        entry["Version"] = value["Version"]
+                    elif "version" in value:
+                        entry["Version"] = value["version"]
+                    
+                    if "Description" in value:
+                        entry["Description"] = value["Description"]
+                    elif "description" in value:
+                        entry["Description"] = value["description"]
+                    else:
+                        entry["Description"] = f"Migrated from {old_field} field"
+                    
+                    # Handle container info if present
+                    if "Container" in value or "container" in value:
+                        entry["Container"] = value.get("Container", value.get("container"))
+                    
+                    generated_by.append(entry)
+                elif isinstance(value, list):
+                    # List of entries
+                    for item in value:
+                        if isinstance(item, str):
+                            generated_by.append({
+                                "Name": item,
+                                "Description": f"Migrated from {old_field} field"
+                            })
+                        elif isinstance(item, dict):
+                            entry = {}
+                            entry["Name"] = item.get("Name", item.get("name", old_field))
+                            if "Version" in item or "version" in item:
+                                entry["Version"] = item.get("Version", item.get("version"))
+                            entry["Description"] = item.get(
+                                "Description",
+                                item.get("description", f"Migrated from {old_field} field")
+                            )
+                            if "Container" in item or "container" in item:
+                                entry["Container"] = item.get("Container", item.get("container"))
+                            generated_by.append(entry)
+        
+        if generated_by:
+            if not dry_run:
+                # Add GeneratedBy field
+                data["GeneratedBy"] = generated_by
+                
+                # Remove old fields
+                for field in migrated_fields:
+                    del data[field]
+                
+                # Save updated file
+                if save_json(desc_file, data):
+                    modified_files.append(str(desc_file.relative_to(dataset_path)))
+                    lgr.info(
+                        f"Migrated {len(generated_by)} provenance entries in {desc_file.name}: "
+                        f"removed fields {migrated_fields}"
+                    )
+                else:
+                    issues.append(f"Failed to save {desc_file}")
+            else:
+                lgr.info(
+                    f"[DRY RUN] Would migrate {len(generated_by)} provenance entries in "
+                    f"{desc_file}: remove fields {migrated_fields}"
+                )
+                modified_files.append(f"{desc_file.relative_to(dataset_path)} (dry run)")
+    
+    if issues:
+        message = f"Completed with issues: {'; '.join(issues)}"
+        success = False
+    elif modified_files:
+        message = f"Migrated provenance in {len(modified_files)} file(s)"
+        success = True
+    else:
+        message = "No pre-standard provenance fields found to migrate"
+        success = True
+    
+    return {
+        "success": success,
+        "modified_files": modified_files,
+        "message": message,
+    }
+
+
+@registry.register(
+    name="fix_inheritance_overloading",
+    version="1.10.1",
+    description=(
+        "Warn about deprecated inheritance overloading patterns. "
+        "Per PR #1834, overloading metadata values in the inheritance principle "
+        "will be deprecated in BIDS 2.0. This migration scans for potential issues."
+    ),
+)
+def check_inheritance_overloading(dataset_path: Path, dry_run: bool = False) -> Dict[str, Any]:
+    """Check for inheritance overloading patterns that will be deprecated.
+    
+    This migration scans for cases where the inheritance principle is being
+    used to overload values (e.g., using different values in different scopes
+    for the same metadata field). This pattern is deprecated per PR #1834.
+    
+    Parameters
+    ----------
+    dataset_path : Path
+        Path to the BIDS dataset root
+    dry_run : bool, optional
+        Not used for this check-only migration
+        
+    Returns
+    -------
+    dict
+        Migration result with warnings about overloading patterns
+    """
+    warnings = []
+    metadata_by_scope = {}
+    
+    # Find all JSON sidecar files
+    json_files = list(dataset_path.rglob("*.json"))
+    json_files = [f for f in json_files if f.name != "dataset_description.json"]
+    
+    if not json_files:
+        return {
+            "success": True,
+            "modified_files": [],
+            "message": "No JSON sidecar files found to check"
+        }
+    
+    # Analyze metadata fields across different scopes
+    for json_file in json_files:
+        data = load_json(json_file)
+        if data is None:
+            continue
+        
+        # Determine scope level (dataset, subject, session, etc.)
+        parts = json_file.relative_to(dataset_path).parts
+        if len(parts) > 1 and parts[0].startswith('sub-'):
+            if len(parts) > 2 and parts[1].startswith('ses-'):
+                scope = f"subject-session ({parts[0]}/{parts[1]})"
+            else:
+                scope = f"subject ({parts[0]})"
+        else:
+            scope = "dataset"
+        
+        # Track metadata by field name
+        for field, value in data.items():
+            if field not in metadata_by_scope:
+                metadata_by_scope[field] = {}
+            
+            if scope not in metadata_by_scope[field]:
+                metadata_by_scope[field][scope] = []
+            
+            metadata_by_scope[field][scope].append({
+                "file": str(json_file.relative_to(dataset_path)),
+                "value": value,
+            })
+    
+    # Check for overloading (same field with different values in different scopes)
+    for field, scopes in metadata_by_scope.items():
+        if len(scopes) > 1:
+            # Get unique values across scopes
+            all_values = []
+            for scope_data in scopes.values():
+                for item in scope_data:
+                    value_str = json.dumps(item["value"], sort_keys=True)
+                    if value_str not in all_values:
+                        all_values.append(value_str)
+            
+            # If multiple different values, this is potential overloading
+            if len(all_values) > 1:
+                scope_summary = []
+                for scope, items in scopes.items():
+                    scope_summary.append(f"{scope}: {len(items)} file(s)")
+                
+                warnings.append(
+                    f"Field '{field}' has different values across scopes "
+                    f"({', '.join(scope_summary)}). This inheritance overloading pattern "
+                    "is deprecated and will not be supported in BIDS 2.0. "
+                    "Consider using separate metadata fields or entity labels instead."
+                )
+    
+    if warnings:
+        message = (
+            f"Found {len(warnings)} potential inheritance overloading pattern(s). "
+            "See warnings for details."
+        )
+        lgr.warning(message)
+        for warning in warnings:
+            lgr.warning(warning)
+    else:
+        message = "No inheritance overloading patterns detected"
+    
+    return {
+        "success": True,
+        "modified_files": [],
+        "message": message,
+        "warnings": warnings,
+    }
+
+
+@registry.register(
+    name="fix_tsv_entity_prefix",
+    version="1.10.1",
+    description=(
+        "Check for missing entity prefixes in TSV column headers. "
+        "Per PR #2281, certain TSV files should use entity- prefix for their columns."
+    ),
+)
+def check_tsv_entity_prefix(dataset_path: Path, dry_run: bool = False) -> Dict[str, Any]:
+    """Check for and optionally fix missing entity prefixes in TSV files.
+    
+    Some TSV files are expected to have entity prefixes (e.g., 'sample-' prefix
+    in samples.tsv for columns that identify samples). This migration helps
+    identify inconsistencies.
+    
+    Parameters
+    ----------
+    dataset_path : Path
+        Path to the BIDS dataset root
+    dry_run : bool, optional
+        If True, don't actually modify files, default False
+        
+    Returns
+    -------
+    dict
+        Migration result with findings about entity prefix consistency
+    """
+    issues = []
+    suggestions = []
+    
+    # Files that should use entity prefixes
+    # Format: {filename_pattern: expected_entity_prefix}
+    expected_prefixes = {
+        "samples.tsv": "sample",
+        "participants.tsv": "participant",
+    }
+    
+    for pattern, entity in expected_prefixes.items():
+        tsv_files = list(dataset_path.glob(pattern))
+        tsv_files.extend(dataset_path.glob(f"*/{pattern}"))
+        
+        for tsv_file in tsv_files:
+            try:
+                with open(tsv_file, 'r') as f:
+                    header = f.readline().strip()
+                    if not header:
+                        continue
+                    
+                    columns = header.split('\t')
+                    
+                    # Check if first column lacks proper prefix
+                    if columns and not columns[0].startswith(f"{entity}_"):
+                        # Special case: participant_id is standard, don't flag it
+                        if tsv_file.name == "participants.tsv" and columns[0] == "participant_id":
+                            continue
+                        
+                        suggestions.append(
+                            f"{tsv_file.relative_to(dataset_path)}: "
+                            f"Column '{columns[0]}' should likely be '{entity}_{columns[0]}'"
+                        )
+            except Exception as e:
+                issues.append(f"Error reading {tsv_file}: {e}")
+    
+    if suggestions:
+        message = (
+            f"Found {len(suggestions)} TSV files with potential entity prefix issues. "
+            "Manual review recommended."
+        )
+        lgr.info(message)
+        for suggestion in suggestions:
+            lgr.info(f"  - {suggestion}")
+    else:
+        message = "No entity prefix issues detected in TSV files"
+    
+    if issues:
+        for issue in issues:
+            lgr.warning(issue)
+    
+    return {
+        "success": True,
+        "modified_files": [],
+        "message": message,
+        "suggestions": suggestions,
+        "issues": issues,
+    }
diff --git a/tools/schemacode/src/bidsschematools/tests/test_migrate.py b/tools/schemacode/src/bidsschematools/tests/test_migrate.py
new file mode 100644
index 0000000000..ce4bb8e2e6
--- /dev/null
+++ b/tools/schemacode/src/bidsschematools/tests/test_migrate.py
@@ -0,0 +1,198 @@
+"""Tests for BIDS dataset migration functionality."""
+
+import json
+import pytest
+from pathlib import Path
+from bidsschematools.migrate import Migration, MigrationRegistry, load_json, save_json
+
+
+class TestMigration:
+    """Tests for Migration class."""
+    
+    def test_migration_creation(self):
+        """Test creating a migration."""
+        def dummy_func(dataset_path):
+            return {"success": True, "modified_files": [], "message": "Done"}
+        
+        mig = Migration(
+            name="test_migration",
+            version="1.0.0",
+            description="Test migration",
+            func=dummy_func,
+        )
+        
+        assert mig.name == "test_migration"
+        assert mig.version == "1.0.0"
+        assert mig.description == "Test migration"
+        assert callable(mig.func)
+    
+    def test_migration_call(self, tmp_path):
+        """Test calling a migration."""
+        def dummy_func(dataset_path, **kwargs):
+            return {
+                "success": True,
+                "modified_files": ["test.json"],
+                "message": f"Processed {dataset_path}",
+            }
+        
+        mig = Migration(
+            name="test_migration",
+            version="1.0.0",
+            description="Test migration",
+            func=dummy_func,
+        )
+        
+        result = mig(tmp_path)
+        assert result["success"] is True
+        assert result["modified_files"] == ["test.json"]
+        assert str(tmp_path) in result["message"]
+
+
+class TestMigrationRegistry:
+    """Tests for MigrationRegistry class."""
+    
+    def test_registry_creation(self):
+        """Test creating a registry."""
+        registry = MigrationRegistry()
+        assert registry.list_migrations() == []
+    
+    def test_register_migration(self):
+        """Test registering a migration."""
+        registry = MigrationRegistry()
+        
+        @registry.register(
+            name="test_migration",
+            version="1.0.0",
+            description="Test migration",
+        )
+        def test_func(dataset_path):
+            return {"success": True, "modified_files": [], "message": "Done"}
+        
+        migrations = registry.list_migrations()
+        assert len(migrations) == 1
+        assert migrations[0]["name"] == "test_migration"
+        assert migrations[0]["version"] == "1.0.0"
+    
+    def test_get_migration(self):
+        """Test getting a migration by name."""
+        registry = MigrationRegistry()
+        
+        @registry.register(
+            name="test_migration",
+            version="1.0.0",
+            description="Test migration",
+        )
+        def test_func(dataset_path):
+            return {"success": True, "modified_files": [], "message": "Done"}
+        
+        mig = registry.get("test_migration")
+        assert mig is not None
+        assert mig.name == "test_migration"
+        
+        mig = registry.get("nonexistent")
+        assert mig is None
+    
+    def test_run_migration(self, tmp_path):
+        """Test running a migration."""
+        registry = MigrationRegistry()
+        
+        @registry.register(
+            name="test_migration",
+            version="1.0.0",
+            description="Test migration",
+        )
+        def test_func(dataset_path, **kwargs):
+            return {
+                "success": True,
+                "modified_files": [],
+                "message": "Migration completed",
+            }
+        
+        result = registry.run("test_migration", tmp_path)
+        assert result["success"] is True
+        assert result["message"] == "Migration completed"
+    
+    def test_run_nonexistent_migration(self, tmp_path):
+        """Test running a nonexistent migration raises error."""
+        registry = MigrationRegistry()
+        
+        with pytest.raises(ValueError, match="Migration 'nonexistent' not found"):
+            registry.run("nonexistent", tmp_path)
+    
+    def test_dry_run(self, tmp_path):
+        """Test dry run mode."""
+        registry = MigrationRegistry()
+        
+        @registry.register(
+            name="test_migration",
+            version="1.0.0",
+            description="Test migration",
+        )
+        def test_func(dataset_path, dry_run=False):
+            return {
+                "success": True,
+                "modified_files": [] if dry_run else ["test.json"],
+                "message": "Dry run" if dry_run else "Modified files",
+            }
+        
+        result = registry.run("test_migration", tmp_path, dry_run=True)
+        assert result["success"] is True
+        assert result["modified_files"] == []
+        assert "Dry run" in result["message"]
+
+
+class TestJsonHelpers:
+    """Tests for JSON helper functions."""
+    
+    def test_load_json(self, tmp_path):
+        """Test loading JSON file."""
+        test_file = tmp_path / "test.json"
+        test_data = {"key": "value", "number": 42}
+        
+        with open(test_file, 'w') as f:
+            json.dump(test_data, f)
+        
+        loaded = load_json(test_file)
+        assert loaded == test_data
+    
+    def test_load_json_nonexistent(self, tmp_path):
+        """Test loading nonexistent JSON file returns None."""
+        test_file = tmp_path / "nonexistent.json"
+        loaded = load_json(test_file)
+        assert loaded is None
+    
+    def test_load_json_invalid(self, tmp_path):
+        """Test loading invalid JSON returns None."""
+        test_file = tmp_path / "invalid.json"
+        
+        with open(test_file, 'w') as f:
+            f.write("not valid json{")
+        
+        loaded = load_json(test_file)
+        assert loaded is None
+    
+    def test_save_json(self, tmp_path):
+        """Test saving JSON file."""
+        test_file = tmp_path / "test.json"
+        test_data = {"key": "value", "number": 42}
+        
+        result = save_json(test_file, test_data)
+        assert result is True
+        
+        # Verify file contents
+        with open(test_file, 'r') as f:
+            loaded = json.load(f)
+        assert loaded == test_data
+    
+    def test_save_json_with_indentation(self, tmp_path):
+        """Test saving JSON file with custom indentation."""
+        test_file = tmp_path / "test.json"
+        test_data = {"key": "value"}
+        
+        save_json(test_file, test_data, indent=4)
+        
+        # Check that file is properly indented
+        with open(test_file, 'r') as f:
+            content = f.read()
+        assert "    " in content  # Should have 4-space indent
+        assert content.endswith('\n')  # Should have trailing newline
diff --git a/tools/schemacode/src/bidsschematools/tests/test_migrations.py b/tools/schemacode/src/bidsschematools/tests/test_migrations.py
new file mode 100644
index 0000000000..217737ca95
--- /dev/null
+++ b/tools/schemacode/src/bidsschematools/tests/test_migrations.py
@@ -0,0 +1,256 @@
+"""Tests for specific BIDS dataset migrations."""
+
+import json
+import pytest
+from pathlib import Path
+from bidsschematools.migrations import (
+    migrate_generatedby,
+    check_inheritance_overloading,
+    check_tsv_entity_prefix,
+)
+
+
+class TestMigrateGeneratedBy:
+    """Tests for GeneratedBy migration."""
+    
+    @pytest.fixture
+    def dataset_with_old_provenance(self, tmp_path):
+        """Create a dataset with pre-standard provenance fields."""
+        dataset_path = tmp_path / "dataset"
+        dataset_path.mkdir()
+        
+        # Create dataset_description.json with old-style provenance
+        desc_data = {
+            "Name": "Test Dataset",
+            "BIDSVersion": "1.9.0",
+            "Pipeline": {
+                "Name": "fmriprep",
+                "Version": "1.4.1",
+            }
+        }
+        
+        desc_file = dataset_path / "dataset_description.json"
+        with open(desc_file, 'w') as f:
+            json.dump(desc_data, f, indent=2)
+        
+        return dataset_path
+    
+    @pytest.fixture
+    def dataset_with_new_provenance(self, tmp_path):
+        """Create a dataset with standard GeneratedBy field."""
+        dataset_path = tmp_path / "dataset"
+        dataset_path.mkdir()
+        
+        desc_data = {
+            "Name": "Test Dataset",
+            "BIDSVersion": "1.10.0",
+            "GeneratedBy": [
+                {
+                    "Name": "fmriprep",
+                    "Version": "1.4.1",
+                }
+            ]
+        }
+        
+        desc_file = dataset_path / "dataset_description.json"
+        with open(desc_file, 'w') as f:
+            json.dump(desc_data, f, indent=2)
+        
+        return dataset_path
+    
+    def test_migrate_old_pipeline_field(self, dataset_with_old_provenance):
+        """Test migrating old Pipeline field to GeneratedBy."""
+        result = migrate_generatedby(dataset_with_old_provenance)
+        
+        assert result["success"] is True
+        assert len(result["modified_files"]) == 1
+        assert "dataset_description.json" in result["modified_files"][0]
+        
+        # Check that file was actually modified
+        desc_file = dataset_with_old_provenance / "dataset_description.json"
+        with open(desc_file, 'r') as f:
+            data = json.load(f)
+        
+        assert "GeneratedBy" in data
+        assert "Pipeline" not in data
+        assert data["GeneratedBy"][0]["Name"] == "fmriprep"
+        assert data["GeneratedBy"][0]["Version"] == "1.4.1"
+    
+    def test_skip_if_generatedby_exists(self, dataset_with_new_provenance):
+        """Test that migration skips if GeneratedBy already exists."""
+        result = migrate_generatedby(dataset_with_new_provenance)
+        
+        assert result["success"] is True
+        assert len(result["modified_files"]) == 0
+        assert "already exists" in result["message"] or "No pre-standard" in result["message"]
+    
+    def test_dry_run(self, dataset_with_old_provenance):
+        """Test dry run mode doesn't modify files."""
+        result = migrate_generatedby(dataset_with_old_provenance, dry_run=True)
+        
+        assert result["success"] is True
+        assert len(result["modified_files"]) > 0
+        assert "dry run" in result["modified_files"][0].lower()
+        
+        # Check that file was NOT modified
+        desc_file = dataset_with_old_provenance / "dataset_description.json"
+        with open(desc_file, 'r') as f:
+            data = json.load(f)
+        
+        assert "Pipeline" in data
+        assert "GeneratedBy" not in data
+    
+    def test_migrate_multiple_fields(self, tmp_path):
+        """Test migrating multiple pre-standard fields."""
+        dataset_path = tmp_path / "dataset"
+        dataset_path.mkdir()
+        
+        desc_data = {
+            "Name": "Test Dataset",
+            "BIDSVersion": "1.9.0",
+            "Pipeline": "fmriprep",
+            "Software": "SPM12",
+        }
+        
+        desc_file = dataset_path / "dataset_description.json"
+        with open(desc_file, 'w') as f:
+            json.dump(desc_data, f, indent=2)
+        
+        result = migrate_generatedby(dataset_path)
+        
+        assert result["success"] is True
+        
+        with open(desc_file, 'r') as f:
+            data = json.load(f)
+        
+        assert "GeneratedBy" in data
+        assert len(data["GeneratedBy"]) == 2
+        assert "Pipeline" not in data
+        assert "Software" not in data
+    
+    def test_no_dataset_description(self, tmp_path):
+        """Test with no dataset_description.json file."""
+        result = migrate_generatedby(tmp_path)
+        
+        assert result["success"] is True
+        assert len(result["modified_files"]) == 0
+        assert "No dataset_description.json" in result["message"]
+
+
+class TestCheckInheritanceOverloading:
+    """Tests for inheritance overloading check."""
+    
+    @pytest.fixture
+    def dataset_with_overloading(self, tmp_path):
+        """Create a dataset with inheritance overloading."""
+        dataset_path = tmp_path / "dataset"
+        dataset_path.mkdir()
+        
+        # Create dataset-level metadata
+        (dataset_path / "task-rest_bold.json").write_text(
+            json.dumps({"RepetitionTime": 2.0})
+        )
+        
+        # Create subject-level metadata with different value
+        sub_dir = dataset_path / "sub-01"
+        sub_dir.mkdir()
+        (sub_dir / "task-rest_bold.json").write_text(
+            json.dumps({"RepetitionTime": 1.5})
+        )
+        
+        return dataset_path
+    
+    @pytest.fixture
+    def dataset_without_overloading(self, tmp_path):
+        """Create a dataset without inheritance overloading."""
+        dataset_path = tmp_path / "dataset"
+        dataset_path.mkdir()
+        
+        # Create dataset-level metadata
+        (dataset_path / "task-rest_bold.json").write_text(
+            json.dumps({"TaskName": "rest"})
+        )
+        
+        # Create subject-level metadata with same value
+        sub_dir = dataset_path / "sub-01"
+        sub_dir.mkdir()
+        (sub_dir / "task-rest_bold.json").write_text(
+            json.dumps({"TaskName": "rest"})
+        )
+        
+        return dataset_path
+    
+    def test_detect_overloading(self, dataset_with_overloading):
+        """Test detection of inheritance overloading."""
+        result = check_inheritance_overloading(dataset_with_overloading)
+        
+        assert result["success"] is True
+        assert len(result["warnings"]) > 0
+        assert any("RepetitionTime" in w for w in result["warnings"])
+        assert any("deprecated" in w for w in result["warnings"])
+    
+    def test_no_overloading(self, dataset_without_overloading):
+        """Test dataset without overloading issues."""
+        result = check_inheritance_overloading(dataset_without_overloading)
+        
+        assert result["success"] is True
+        assert len(result.get("warnings", [])) == 0
+        assert "No inheritance overloading" in result["message"]
+    
+    def test_empty_dataset(self, tmp_path):
+        """Test with empty dataset."""
+        result = check_inheritance_overloading(tmp_path)
+        
+        assert result["success"] is True
+        assert "No JSON sidecar files" in result["message"]
+
+
+class TestCheckTsvEntityPrefix:
+    """Tests for TSV entity prefix check."""
+    
+    @pytest.fixture
+    def dataset_with_participants(self, tmp_path):
+        """Create a dataset with participants.tsv."""
+        dataset_path = tmp_path / "dataset"
+        dataset_path.mkdir()
+        
+        # Create participants.tsv with correct prefix
+        participants_file = dataset_path / "participants.tsv"
+        participants_file.write_text("participant_id\tage\tsex\n01\t25\tF\n")
+        
+        return dataset_path
+    
+    @pytest.fixture
+    def dataset_with_samples(self, tmp_path):
+        """Create a dataset with samples.tsv without proper prefix."""
+        dataset_path = tmp_path / "dataset"
+        dataset_path.mkdir()
+        
+        # Create samples.tsv without proper prefix
+        samples_file = dataset_path / "samples.tsv"
+        samples_file.write_text("id\ttype\ttissue\n01\tblood\tWB\n")
+        
+        return dataset_path
+    
+    def test_correct_prefix(self, dataset_with_participants):
+        """Test dataset with correct entity prefix."""
+        result = check_tsv_entity_prefix(dataset_with_participants)
+        
+        assert result["success"] is True
+        assert len(result.get("suggestions", [])) == 0
+        assert "No entity prefix issues" in result["message"]
+    
+    def test_missing_prefix(self, dataset_with_samples):
+        """Test dataset with missing entity prefix."""
+        result = check_tsv_entity_prefix(dataset_with_samples)
+        
+        assert result["success"] is True
+        assert len(result.get("suggestions", [])) > 0
+        assert any("sample_" in s for s in result["suggestions"])
+    
+    def test_empty_dataset(self, tmp_path):
+        """Test with empty dataset."""
+        result = check_tsv_entity_prefix(tmp_path)
+        
+        assert result["success"] is True
+        assert len(result.get("suggestions", [])) == 0

From 9434f0e822e9b7a4bc8d06a500b683faf7131039 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 17 Dec 2025 17:59:33 +0000
Subject: [PATCH 3/8] Add documentation and CLI integration tests for migrate
 command

- Add comprehensive MIGRATE.md documentation with usage examples
- Add CLI integration tests to verify end-to-end functionality
- Update README.md to mention migrate command feature
- All 29 tests passing

Co-authored-by: yarikoptic <39889+yarikoptic@users.noreply.github.com>
---
 tools/schemacode/MIGRATE.md                   | 209 ++++++++++++++++++
 tools/schemacode/README.md                    |   1 +
 .../bidsschematools/tests/test_migrate_cli.py | 125 +++++++++++
 3 files changed, 335 insertions(+)
 create mode 100644 tools/schemacode/MIGRATE.md
 create mode 100644 tools/schemacode/src/bidsschematools/tests/test_migrate_cli.py

diff --git a/tools/schemacode/MIGRATE.md b/tools/schemacode/MIGRATE.md
new file mode 100644
index 0000000000..b116c518f5
--- /dev/null
+++ b/tools/schemacode/MIGRATE.md
@@ -0,0 +1,209 @@
+# BIDS Dataset Migration Tools
+
+The BIDS Schema Tools package includes a `migrate` command to help dataset maintainers adopt new standardized conventions introduced in BIDS specification updates.
+
+## Overview
+
+As the BIDS specification evolves, new conventions and metadata fields are introduced. The migration tools help you upgrade existing datasets to use these new standards without manual editing.
+
+## Usage
+
+### List Available Migrations
+
+To see all available migrations:
+
+```bash
+bst migrate list
+```
+
+This will display:
+- Migration name
+- BIDS version when it was introduced
+- Description of what the migration does
+
+### Run a Specific Migration
+
+To run a single migration on a dataset:
+
+```bash
+bst migrate run MIGRATION_NAME /path/to/dataset
+```
+
+For example:
+```bash
+bst migrate run standardize_generatedby /data/my_bids_dataset
+```
+
+### Dry Run Mode
+
+To preview what would be changed without modifying files:
+
+```bash
+bst migrate run MIGRATION_NAME /path/to/dataset --dry-run
+```
+
+### Run All Migrations
+
+To run all available migrations on a dataset:
+
+```bash
+bst migrate all /path/to/dataset
+```
+
+Skip specific migrations:
+
+```bash
+bst migrate all /path/to/dataset --skip standardize_generatedby --skip another_migration
+```
+
+## Available Migrations
+
+### standardize_generatedby (version 1.10.0)
+
+**Purpose:** Convert pre-standard provenance metadata to the standardized `GeneratedBy` field introduced in BEP028.
+
+**What it does:**
+- Looks for legacy provenance fields: `Pipeline`, `ProcessingPipeline`, `Software`, `Tool`, `Provenance`
+- Converts them to the standard `GeneratedBy` array format
+- Removes the legacy fields after migration
+
+**Example:**
+
+Before migration:
+```json
+{
+  "Name": "My Dataset",
+  "BIDSVersion": "1.9.0",
+  "Pipeline": {
+    "Name": "fmriprep",
+    "Version": "1.4.1"
+  },
+  "Software": "SPM12"
+}
+```
+
+After migration:
+```json
+{
+  "Name": "My Dataset",
+  "BIDSVersion": "1.9.0",
+  "GeneratedBy": [
+    {
+      "Name": "fmriprep",
+      "Version": "1.4.1",
+      "Description": "Migrated from Pipeline field"
+    },
+    {
+      "Name": "SPM12",
+      "Description": "Migrated from Software field"
+    }
+  ]
+}
+```
+
+### fix_inheritance_overloading (version 1.10.1)
+
+**Purpose:** Check for deprecated inheritance overloading patterns (PR #1834).
+
+**What it does:**
+- Scans JSON sidecar files across different inheritance scopes
+- Identifies fields that have different values at different levels (dataset, subject, session)
+- Reports warnings about this deprecated pattern
+- Does not modify files (check-only migration)
+
+**Background:** Using different values for the same metadata field at different inheritance levels will not be supported in BIDS 2.0. This migration helps identify such cases so they can be addressed.
+
+### fix_tsv_entity_prefix (version 1.10.1)
+
+**Purpose:** Check for missing entity prefixes in TSV column headers (PR #2281).
+
+**What it does:**
+- Checks TSV files that should use entity prefixes (e.g., `samples.tsv`, `participants.tsv`)
+- Identifies columns that lack proper entity prefixes
+- Provides suggestions for corrections
+- Does not modify files (check-only migration)
+
+**Example issue:**
+In `samples.tsv`, a column named `id` should be `sample_id`.
+
+## Creating New Migrations
+
+Migrations are registered using a decorator pattern. To add a new migration:
+
+```python
+from bidsschematools.migrate import registry
+
+@registry.register(
+    name="my_migration",
+    version="1.11.0",
+    description="Description of what this migration does"
+)
+def my_migration(dataset_path, dry_run=False):
+    """
+    Perform the migration.
+    
+    Parameters
+    ----------
+    dataset_path : Path
+        Path to the BIDS dataset root
+    dry_run : bool
+        If True, don't modify files
+        
+    Returns
+    -------
+    dict
+        Result with keys: success, modified_files, message
+    """
+    # Migration implementation
+    modified_files = []
+    
+    # ... your migration code ...
+    
+    return {
+        "success": True,
+        "modified_files": modified_files,
+        "message": "Migration completed"
+    }
+```
+
+Add your migration to `bidsschematools/migrations.py` and it will automatically be available through the CLI.
+
+## Migration Best Practices
+
+1. **Always use dry-run first:** Preview changes before applying them
+   ```bash
+   bst migrate run MIGRATION_NAME /path/to/dataset --dry-run
+   ```
+
+2. **Backup your data:** While migrations are designed to be safe, always backup important datasets before running migrations
+
+3. **Version control:** If your dataset is in git, commit before running migrations so you can review and revert if needed
+
+4. **Review output:** Check the list of modified files and review changes to ensure they are correct
+
+5. **Check-only migrations:** Some migrations only report issues without making changes. Review their output and make manual corrections as needed
+
+## Exit Codes
+
+- `0`: Success
+- `1`: Migration failed or encountered errors
+- `2`: Invalid usage (e.g., no dataset_description.json found)
+
+## Logging
+
+Use `-v` for verbose output:
+```bash
+bst -v migrate run MIGRATION_NAME /path/to/dataset
+```
+
+Use `-vv` for even more detailed logging:
+```bash
+bst -vv migrate run MIGRATION_NAME /path/to/dataset
+```
+
+## Related Resources
+
+- [BIDS Specification](https://bids-specification.readthedocs.io/)
+- [BEP028 (Provenance)](https://bids.neuroimaging.io/bep028)
+- [PR #1834 (Inheritance Overloading)](https://github.com/bids-standard/bids-specification/pull/1834)
+- [PR #2281 (Entity Prefixes)](https://github.com/bids-standard/bids-specification/pull/2281)
diff --git a/tools/schemacode/README.md b/tools/schemacode/README.md
index 3fe4fc2705..956df5ab9a 100644
--- a/tools/schemacode/README.md
+++ b/tools/schemacode/README.md
@@ -12,6 +12,7 @@ Features:
 * lightweight
 * reference schema parsing implementation used for schema testing
 * simple CLI bindings (e.g. `bst export`)
+* dataset migration tools to adopt new BIDS conventions (e.g. `bst migrate`)
 
 If you have questions, you can post them in one of several channels where BIDS members are active:
   - the [NeuroStars](https://neurostars.org/tags/bids) discourse forum
diff --git a/tools/schemacode/src/bidsschematools/tests/test_migrate_cli.py b/tools/schemacode/src/bidsschematools/tests/test_migrate_cli.py
new file mode 100644
index 0000000000..6266fc04aa
--- /dev/null
+++ b/tools/schemacode/src/bidsschematools/tests/test_migrate_cli.py
@@ -0,0 +1,125 @@
+"""Integration tests for the migrate CLI commands."""
+
+import json
+import subprocess
+from pathlib import Path
+import pytest
+
+
+class TestMigrateCLI:
+    """Test the bst migrate CLI commands."""
+    
+    def test_migrate_list_command(self):
+        """Test that 'bst migrate list' works."""
+        result = subprocess.run(
+            ["bst", "migrate", "list"],
+            capture_output=True,
+            text=True,
+        )
+        
+        assert result.returncode == 0
+        assert "standardize_generatedby" in result.stdout
+        assert "fix_inheritance_overloading" in result.stdout
+        assert "fix_tsv_entity_prefix" in result.stdout
+        assert "version" in result.stdout
+    
+    def test_migrate_run_command(self, tmp_path):
+        """Test that 'bst migrate run' works on a real dataset."""
+        # Create a test dataset
+        dataset_path = tmp_path / "test_dataset"
+        dataset_path.mkdir()
+        
+        desc_file = dataset_path / "dataset_description.json"
+        desc_data = {
+            "Name": "Test Dataset",
+            "BIDSVersion": "1.9.0",
+            "Pipeline": "my_pipeline"
+        }
+        
+        with open(desc_file, 'w') as f:
+            json.dump(desc_data, f)
+        
+        # Run the migration
+        result = subprocess.run(
+            ["bst", "migrate", "run", "standardize_generatedby", str(dataset_path)],
+            capture_output=True,
+            text=True,
+        )
+        
+        assert result.returncode == 0
+        assert "Modified files" in result.stdout
+        assert "dataset_description.json" in result.stdout
+        
+        # Verify the file was actually migrated
+        with open(desc_file, 'r') as f:
+            migrated_data = json.load(f)
+        
+        assert "GeneratedBy" in migrated_data
+        assert "Pipeline" not in migrated_data
+    
+    def test_migrate_run_dry_run(self, tmp_path):
+        """Test that 'bst migrate run --dry-run' doesn't modify files."""
+        # Create a test dataset
+        dataset_path = tmp_path / "test_dataset"
+        dataset_path.mkdir()
+        
+        desc_file = dataset_path / "dataset_description.json"
+        desc_data = {
+            "Name": "Test Dataset",
+            "BIDSVersion": "1.9.0",
+            "Pipeline": "my_pipeline"
+        }
+        
+        with open(desc_file, 'w') as f:
+            json.dump(desc_data, f)
+        
+        # Run the migration in dry-run mode
+        result = subprocess.run(
+            ["bst", "migrate", "run", "standardize_generatedby", str(dataset_path), "--dry-run"],
+            capture_output=True,
+            text=True,
+        )
+        
+        assert result.returncode == 0
+        assert "dry run" in result.stdout.lower()
+        
+        # Verify the file was NOT modified
+        with open(desc_file, 'r') as f:
+            data = json.load(f)
+        
+        assert "Pipeline" in data
+        assert "GeneratedBy" not in data
+    
+    def test_migrate_run_invalid_dataset(self, tmp_path):
+        """Test that running migration on invalid dataset fails gracefully."""
+        # Create empty directory (no dataset_description.json)
+        dataset_path = tmp_path / "invalid_dataset"
+        dataset_path.mkdir()
+        
+        result = subprocess.run(
+            ["bst", "migrate", "run", "standardize_generatedby", str(dataset_path)],
+            capture_output=True,
+            text=True,
+        )
+        
+        assert result.returncode == 1
+        assert "dataset_description.json" in result.stderr.lower()
+    
+    def test_migrate_run_nonexistent_migration(self, tmp_path):
+        """Test that running nonexistent migration fails gracefully."""
+        # Create a valid dataset
+        dataset_path = tmp_path / "test_dataset"
+        dataset_path.mkdir()
+        
+        desc_file = dataset_path / "dataset_description.json"
+        with open(desc_file, 'w') as f:
+            json.dump({"Name": "Test", "BIDSVersion": "1.0.0"}, f)
+        
+        result = subprocess.run(
+            ["bst", "migrate", "run", "nonexistent_migration", str(dataset_path)],
+            capture_output=True,
+            text=True,
+        )
+        
+        assert result.returncode == 1
+        assert "not found" in result.stderr.lower()

From 835260e3f8781df987783407295f87275016d305 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 17 Dec 2025 18:02:16 +0000
Subject: [PATCH 4/8] Fix code style issues with ruff linter

- Fix quote style inconsistencies (use double quotes)
- Remove unnecessary mode arguments from open()
- Remove trailing whitespace from blank lines
- Replace Dict/Any with dict for modern type hints
- All 29 migration tests still passing

Co-authored-by: yarikoptic <39889+yarikoptic@users.noreply.github.com>
---
 .../schemacode/src/bidsschematools/migrate.py |  83 +++++++------
 .../src/bidsschematools/migrations.py         | 116 +++++++++---------
 2 files changed, 99 insertions(+), 100 deletions(-)

diff --git a/tools/schemacode/src/bidsschematools/migrate.py b/tools/schemacode/src/bidsschematools/migrate.py
index 19c4a8d04d..292e325327 100644
--- a/tools/schemacode/src/bidsschematools/migrate.py
+++ b/tools/schemacode/src/bidsschematools/migrate.py
@@ -5,17 +5,16 @@
 """
 
 import json
-import os
-from pathlib import Path
-from typing import Callable, Dict, List, Optional, Any
 import logging
+from pathlib import Path
+from typing import Any, Callable, Optional
 
 lgr = logging.getLogger(__name__)
 
 
 class Migration:
     """Represents a single migration operation.
-    
+
     Attributes
     ----------
     name : str
@@ -27,16 +26,16 @@ class Migration:
     func : Callable
         The function that performs the migration
     """
-    
+
     def __init__(
         self,
         name: str,
         version: str,
         description: str,
-        func: Callable[[Path], Dict[str, Any]],
+        func: Callable[[Path], dict[str, Any]],
     ):
         """Initialize a migration.
-        
+
         Parameters
         ----------
         name : str
@@ -53,17 +52,17 @@ def __init__(
         self.version = version
         self.description = description
         self.func = func
-    
-    def __call__(self, dataset_path: Path, **kwargs) -> Dict[str, Any]:
+
+    def __call__(self, dataset_path: Path, **kwargs) -> dict[str, Any]:
         """Execute the migration.
-        
+
         Parameters
         ----------
         dataset_path : Path
             Path to the BIDS dataset root
         **kwargs
             Additional keyword arguments for the migration function
-            
+
         Returns
         -------
         dict
@@ -77,11 +76,11 @@ def __call__(self, dataset_path: Path, **kwargs) -> Dict[str, Any]:
 
 class MigrationRegistry:
     """Registry for BIDS dataset migrations."""
-    
+
     def __init__(self):
         """Initialize the migration registry."""
-        self._migrations: Dict[str, Migration] = {}
-    
+        self._migrations: dict[str, Migration] = {}
+
     def register(
         self,
         name: str,
@@ -89,7 +88,7 @@ def register(
         description: str,
     ) -> Callable:
         """Decorator to register a migration function.
-        
+
         Parameters
         ----------
         name : str
@@ -98,12 +97,12 @@ def register(
             BIDS version when this migration applies
         description : str
             Description of what this migration does
-            
+
         Returns
         -------
         Callable
             Decorator function
-            
+
         Examples
         --------
         >>> registry = MigrationRegistry()
@@ -122,25 +121,25 @@ def decorator(func: Callable) -> Migration:
             lgr.debug(f"Registered migration: {name} (version {version})")
             return migration
         return decorator
-    
+
     def get(self, name: str) -> Optional[Migration]:
         """Get a migration by name.
-        
+
         Parameters
         ----------
         name : str
             Name of the migration
-            
+
         Returns
         -------
         Migration or None
             The migration if found, None otherwise
         """
         return self._migrations.get(name)
-    
-    def list_migrations(self) -> List[Dict[str, str]]:
+
+    def list_migrations(self) -> list[dict[str, str]]:
         """List all registered migrations.
-        
+
         Returns
         -------
         list of dict
@@ -154,16 +153,16 @@ def list_migrations(self) -> List[Dict[str, str]]:
             }
             for mig in self._migrations.values()
         ]
-    
+
     def run(
         self,
         name: str,
         dataset_path: Path,
         dry_run: bool = False,
         **kwargs,
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         """Run a specific migration.
-        
+
         Parameters
         ----------
         name : str
@@ -174,12 +173,12 @@ def run(
             If True, don't actually modify files, default False
         **kwargs
             Additional keyword arguments for the migration
-            
+
         Returns
         -------
         dict
             Result of the migration
-            
+
         Raises
         ------
         ValueError
@@ -188,21 +187,21 @@ def run(
         migration = self.get(name)
         if migration is None:
             raise ValueError(f"Migration '{name}' not found")
-        
+
         lgr.info(f"Running migration: {migration.name} (version {migration.version})")
         lgr.info(f"Description: {migration.description}")
-        
+
         if dry_run:
             lgr.info("DRY RUN: No files will be modified")
             kwargs["dry_run"] = True
-        
+
         result = migration(dataset_path, **kwargs)
-        
+
         if result.get("success"):
             lgr.info(f"Migration completed successfully: {result.get('message', '')}")
         else:
             lgr.warning(f"Migration failed or had issues: {result.get('message', '')}")
-        
+
         return result
 
 
@@ -210,30 +209,30 @@ def run(
 registry = MigrationRegistry()
 
 
-def load_json(filepath: Path) -> Optional[Dict]:
+def load_json(filepath: Path) -> Optional[dict]:
     """Load JSON file safely.
-    
+
     Parameters
     ----------
     filepath : Path
         Path to JSON file
-        
+
     Returns
     -------
     dict or None
         Loaded JSON data, or None if error
     """
     try:
-        with open(filepath, 'r') as f:
+        with open(filepath) as f:
             return json.load(f)
     except (json.JSONDecodeError, FileNotFoundError) as e:
         lgr.error(f"Error loading {filepath}: {e}")
         return None
 
 
-def save_json(filepath: Path, data: Dict, indent: int = 2) -> bool:
+def save_json(filepath: Path, data: dict, indent: int = 2) -> bool:
     """Save JSON file safely.
-    
+
     Parameters
     ----------
     filepath : Path
@@ -242,16 +241,16 @@ def save_json(filepath: Path, data: Dict, indent: int = 2) -> bool:
         Data to save
     indent : int, optional
         JSON indentation, default 2
-        
+
     Returns
     -------
     bool
         True if successful, False otherwise
     """
     try:
-        with open(filepath, 'w') as f:
+        with open(filepath, "w") as f:
             json.dump(data, f, indent=indent, ensure_ascii=False)
-            f.write('\n')  # Add newline at end of file
+            f.write("\n")  # Add newline at end of file
         return True
     except Exception as e:
         lgr.error(f"Error saving {filepath}: {e}")
diff --git a/tools/schemacode/src/bidsschematools/migrations.py b/tools/schemacode/src/bidsschematools/migrations.py
index b2b05d4679..ee978f5fd4 100644
--- a/tools/schemacode/src/bidsschematools/migrations.py
+++ b/tools/schemacode/src/bidsschematools/migrations.py
@@ -6,9 +6,9 @@
 import json
 import logging
 from pathlib import Path
-from typing import Dict, Any, List
+from typing import Any
 
-from .migrate import registry, load_json, save_json
+from .migrate import load_json, registry, save_json
 
 lgr = logging.getLogger(__name__)
 
@@ -21,25 +21,25 @@
         "This migration helps adopt BEP028 conventions for dataset provenance tracking."
     ),
 )
-def migrate_generatedby(dataset_path: Path, dry_run: bool = False) -> Dict[str, Any]:
+def migrate_generatedby(dataset_path: Path, dry_run: bool = False) -> dict[str, Any]:
     """Migrate pre-standard provenance to GeneratedBy field.
-    
+
     This migration looks for common pre-standard provenance fields and converts
     them to the standardized GeneratedBy array format introduced in BEP028.
-    
+
     Common pre-standard fields that may be migrated:
     - Provenance
     - Pipeline
     - ProcessingPipeline
     - Software
-    
+
     Parameters
     ----------
     dataset_path : Path
         Path to the BIDS dataset root
     dry_run : bool, optional
         If True, don't actually modify files, default False
-        
+
     Returns
     -------
     dict
@@ -47,48 +47,48 @@ def migrate_generatedby(dataset_path: Path, dry_run: bool = False) -> Dict[str,
     """
     modified_files = []
     issues = []
-    
+
     # Look for dataset_description.json files (including in derivatives)
     desc_files = list(dataset_path.glob("dataset_description.json"))
     desc_files.extend(dataset_path.glob("derivatives/*/dataset_description.json"))
     desc_files.extend(dataset_path.glob("derivatives/*/*/dataset_description.json"))
-    
+
     if not desc_files:
         return {
             "success": True,
             "modified_files": [],
             "message": "No dataset_description.json files found"
         }
-    
+
     for desc_file in desc_files:
         data = load_json(desc_file)
         if data is None:
             issues.append(f"Could not load {desc_file}")
             continue
-        
+
         # Check if GeneratedBy already exists
         if "GeneratedBy" in data:
             lgr.debug(f"GeneratedBy already exists in {desc_file}, skipping")
             continue
-        
+
         # Look for pre-standard fields to migrate
         generated_by = []
         migrated_fields = []
-        
+
         # Check for common pre-standard fields
         pre_standard_fields = {
             "Provenance": "provenance",
-            "Pipeline": "pipeline", 
+            "Pipeline": "pipeline",
             "ProcessingPipeline": "processing_pipeline",
             "Software": "software",
             "Tool": "tool",
         }
-        
+
         for old_field, field_type in pre_standard_fields.items():
             if old_field in data:
                 value = data[old_field]
                 migrated_fields.append(old_field)
-                
+
                 # Convert to GeneratedBy format
                 if isinstance(value, str):
                     # Simple string, create basic entry
@@ -106,23 +106,23 @@ def migrate_generatedby(dataset_path: Path, dry_run: bool = False) -> Dict[str,
                         entry["Name"] = value["name"]
                     else:
                         entry["Name"] = old_field
-                    
+
                     if "Version" in value:
                         entry["Version"] = value["Version"]
                     elif "version" in value:
                         entry["Version"] = value["version"]
-                    
+
                     if "Description" in value:
                         entry["Description"] = value["Description"]
                     elif "description" in value:
                         entry["Description"] = value["description"]
                     else:
                         entry["Description"] = f"Migrated from {old_field} field"
-                    
+
                     # Handle container info if present
                     if "Container" in value or "container" in value:
                         entry["Container"] = value.get("Container", value.get("container"))
-                    
+
                     generated_by.append(entry)
                 elif isinstance(value, list):
                     # List of entries
@@ -144,16 +144,16 @@ def migrate_generatedby(dataset_path: Path, dry_run: bool = False) -> Dict[str,
                             if "Container" in item or "container" in item:
                                 entry["Container"] = item.get("Container", item.get("container"))
                             generated_by.append(entry)
-        
+
         if generated_by:
             if not dry_run:
                 # Add GeneratedBy field
                 data["GeneratedBy"] = generated_by
-                
+
                 # Remove old fields
                 for field in migrated_fields:
                     del data[field]
-                
+
                 # Save updated file
                 if save_json(desc_file, data):
                     modified_files.append(str(desc_file.relative_to(dataset_path)))
@@ -169,7 +169,7 @@ def migrate_generatedby(dataset_path: Path, dry_run: bool = False) -> Dict[str,
                     f"{desc_file}: remove fields {migrated_fields}"
                 )
                 modified_files.append(f"{desc_file.relative_to(dataset_path)} (dry run)")
-    
+
     if issues:
         message = f"Completed with issues: {'; '.join(issues)}"
         success = False
@@ -179,7 +179,7 @@ def migrate_generatedby(dataset_path: Path, dry_run: bool = False) -> Dict[str,
     else:
         message = "No pre-standard provenance fields found to migrate"
         success = True
-    
+
     return {
         "success": success,
         "modified_files": modified_files,
@@ -196,20 +196,20 @@ def migrate_generatedby(dataset_path: Path, dry_run: bool = False) -> Dict[str,
         "will be deprecated in BIDS 2.0. This migration scans for potential issues."
     ),
 )
-def check_inheritance_overloading(dataset_path: Path, dry_run: bool = False) -> Dict[str, Any]:
+def check_inheritance_overloading(dataset_path: Path, dry_run: bool = False) -> dict[str, Any]:
     """Check for inheritance overloading patterns that will be deprecated.
-    
+
     This migration scans for cases where the inheritance principle is being
     used to overload values (e.g., using different values in different scopes
     for the same metadata field). This pattern is deprecated per PR #1834.
-    
+
     Parameters
     ----------
     dataset_path : Path
         Path to the BIDS dataset root
     dry_run : bool, optional
         Not used for this check-only migration
-        
+
     Returns
     -------
     dict
@@ -217,47 +217,47 @@ def check_inheritance_overloading(dataset_path: Path, dry_run: bool = False) ->
     """
     warnings = []
     metadata_by_scope = {}
-    
+
     # Find all JSON sidecar files
     json_files = list(dataset_path.rglob("*.json"))
     json_files = [f for f in json_files if f.name != "dataset_description.json"]
-    
+
     if not json_files:
         return {
             "success": True,
             "modified_files": [],
             "message": "No JSON sidecar files found to check"
         }
-    
+
     # Analyze metadata fields across different scopes
     for json_file in json_files:
         data = load_json(json_file)
         if data is None:
             continue
-        
+
         # Determine scope level (dataset, subject, session, etc.)
         parts = json_file.relative_to(dataset_path).parts
-        if len(parts) > 1 and parts[0].startswith('sub-'):
-            if len(parts) > 2 and parts[1].startswith('ses-'):
+        if len(parts) > 1 and parts[0].startswith("sub-"):
+            if len(parts) > 2 and parts[1].startswith("ses-"):
                 scope = f"subject-session ({parts[0]}/{parts[1]})"
             else:
                 scope = f"subject ({parts[0]})"
         else:
             scope = "dataset"
-        
+
         # Track metadata by field name
         for field, value in data.items():
             if field not in metadata_by_scope:
                 metadata_by_scope[field] = {}
-            
+
             if scope not in metadata_by_scope[field]:
                 metadata_by_scope[field][scope] = []
-            
+
             metadata_by_scope[field][scope].append({
                 "file": str(json_file.relative_to(dataset_path)),
                 "value": value,
             })
-    
+
     # Check for overloading (same field with different values in different scopes)
     for field, scopes in metadata_by_scope.items():
         if len(scopes) > 1:
@@ -268,20 +268,20 @@ def check_inheritance_overloading(dataset_path: Path, dry_run: bool = False) ->
                     value_str = json.dumps(item["value"], sort_keys=True)
                     if value_str not in all_values:
                         all_values.append(value_str)
-            
+
             # If multiple different values, this is potential overloading
             if len(all_values) > 1:
                 scope_summary = []
                 for scope, items in scopes.items():
                     scope_summary.append(f"{scope}: {len(items)} file(s)")
-                
+
                 warnings.append(
                     f"Field '{field}' has different values across scopes "
                     f"({', '.join(scope_summary)}). This inheritance overloading pattern "
                     "is deprecated and will not be supported in BIDS 2.0. "
                     "Consider using separate metadata fields or entity labels instead."
                 )
-    
+
     if warnings:
         message = (
             f"Found {len(warnings)} potential inheritance overloading pattern(s). "
@@ -292,7 +292,7 @@ def check_inheritance_overloading(dataset_path: Path, dry_run: bool = False) ->
             lgr.warning(warning)
     else:
         message = "No inheritance overloading patterns detected"
-    
+
     return {
         "success": True,
         "modified_files": [],
@@ -309,20 +309,20 @@ def check_inheritance_overloading(dataset_path: Path, dry_run: bool = False) ->
         "Per PR #2281, certain TSV files should use entity- prefix for their columns."
     ),
 )
-def check_tsv_entity_prefix(dataset_path: Path, dry_run: bool = False) -> Dict[str, Any]:
+def check_tsv_entity_prefix(dataset_path: Path, dry_run: bool = False) -> dict[str, Any]:
     """Check for and optionally fix missing entity prefixes in TSV files.
-    
+
     Some TSV files are expected to have entity prefixes (e.g., 'sample-' prefix
     in samples.tsv for columns that identify samples). This migration helps
     identify inconsistencies.
-    
+
     Parameters
     ----------
     dataset_path : Path
         Path to the BIDS dataset root
     dry_run : bool, optional
         If True, don't actually modify files, default False
-        
+
     Returns
     -------
     dict
@@ -330,40 +330,40 @@ def check_tsv_entity_prefix(dataset_path: Path, dry_run: bool = False) -> Dict[s
     """
     issues = []
     suggestions = []
-    
+
     # Files that should use entity prefixes
     # Format: {filename_pattern: expected_entity_prefix}
     expected_prefixes = {
         "samples.tsv": "sample",
         "participants.tsv": "participant",
     }
-    
+
     for pattern, entity in expected_prefixes.items():
         tsv_files = list(dataset_path.glob(pattern))
         tsv_files.extend(dataset_path.glob(f"*/{pattern}"))
-        
+
         for tsv_file in tsv_files:
             try:
-                with open(tsv_file, 'r') as f:
+                with open(tsv_file) as f:
                     header = f.readline().strip()
                     if not header:
                         continue
-                    
-                    columns = header.split('\t')
-                    
+
+                    columns = header.split("\t")
+
                     # Check if first column lacks proper prefix
                     if columns and not columns[0].startswith(f"{entity}_"):
                         # Special case: participant_id is standard, don't flag it
                         if tsv_file.name == "participants.tsv" and columns[0] == "participant_id":
                             continue
-                        
+
                         suggestions.append(
                             f"{tsv_file.relative_to(dataset_path)}: "
                             f"Column '{columns[0]}' should likely be '{entity}_{columns[0]}'"
                         )
             except Exception as e:
                 issues.append(f"Error reading {tsv_file}: {e}")
-    
+
     if suggestions:
         message = (
             f"Found {len(suggestions)} TSV files with potential entity prefix issues. "
@@ -374,11 +374,11 @@ def check_tsv_entity_prefix(dataset_path: Path, dry_run: bool = False) -> Dict[s
             lgr.info(f"  - {suggestion}")
     else:
         message = "No entity prefix issues detected in TSV files"
-    
+
     if issues:
         for issue in issues:
             lgr.warning(issue)
-    
+
     return {
         "success": True,
         "modified_files": [],

From ca2f71e13913408849b521a167184d8a65bd0820 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 17 Dec 2025 18:06:08 +0000
Subject: [PATCH 5/8] Address code review feedback: improve code quality and
 performance

- Use rglob() for finding dataset_description.json to handle arbitrary nesting depths
- Extract dataset validation into helper function to reduce duplication
- Use set instead of list for O(1) lookup performance when checking unique values
- All 29 migration tests and 110 existing tests still passing

Co-authored-by: yarikoptic <39889+yarikoptic@users.noreply.github.com>
---
 .../src/bidsschematools/__main__.py           | 47 +++++++++++--------
 .../src/bidsschematools/migrations.py         | 13 +++--
 2 files changed, 34 insertions(+), 26 deletions(-)

diff --git a/tools/schemacode/src/bidsschematools/__main__.py b/tools/schemacode/src/bidsschematools/__main__.py
index 3979fcd141..f14fdd8602 100644
--- a/tools/schemacode/src/bidsschematools/__main__.py
+++ b/tools/schemacode/src/bidsschematools/__main__.py
@@ -187,6 +187,27 @@ def migrate():
     pass
 
 
+def _validate_bids_dataset(dataset_path: Path) -> None:
+    """Validate that a path contains a BIDS dataset.
+
+    Parameters
+    ----------
+    dataset_path : Path
+        Path to check for BIDS dataset
+
+    Raises
+    ------
+    SystemExit
+        If dataset_description.json is not found
+    """
+    if not (dataset_path / "dataset_description.json").exists():
+        lgr.error(
+            f"No dataset_description.json found in {dataset_path}. "
+            "Is this a valid BIDS dataset?"
+        )
+        sys.exit(1)
+
+
 @migrate.command("list")
 def migrate_list():
     """List all available migrations"""
@@ -215,22 +236,16 @@ def migrate_list():
 )
 def migrate_run(migration_name, dataset_path, dry_run):
     """Run a specific migration on a BIDS dataset
-    
+
     MIGRATION_NAME is the name of the migration to run (use 'migrate list' to see available)
-    
+
     DATASET_PATH is the path to the BIDS dataset root directory
     """
     from .migrate import registry
     from . import migrations  # noqa: F401 - Import to register migrations
-    
+
     dataset_path = Path(dataset_path).resolve()
-    
-    if not (dataset_path / "dataset_description.json").exists():
-        lgr.error(
-            f"No dataset_description.json found in {dataset_path}. "
-            "Is this a valid BIDS dataset?"
-        )
-        sys.exit(1)
+    _validate_bids_dataset(dataset_path)
     
     try:
         result = registry.run(migration_name, dataset_path, dry_run=dry_run)
@@ -275,20 +290,14 @@ def migrate_run(migration_name, dataset_path, dry_run):
 )
 def migrate_all(dataset_path, dry_run, skip):
     """Run all available migrations on a BIDS dataset
-    
+
     DATASET_PATH is the path to the BIDS dataset root directory
     """
     from .migrate import registry
     from . import migrations  # noqa: F401 - Import to register migrations
-    
+
     dataset_path = Path(dataset_path).resolve()
-    
-    if not (dataset_path / "dataset_description.json").exists():
-        lgr.error(
-            f"No dataset_description.json found in {dataset_path}. "
-            "Is this a valid BIDS dataset?"
-        )
-        sys.exit(1)
+    _validate_bids_dataset(dataset_path)
     
     migrations_list = registry.list_migrations()
     skip_set = set(skip)
diff --git a/tools/schemacode/src/bidsschematools/migrations.py b/tools/schemacode/src/bidsschematools/migrations.py
index ee978f5fd4..745a86ac3d 100644
--- a/tools/schemacode/src/bidsschematools/migrations.py
+++ b/tools/schemacode/src/bidsschematools/migrations.py
@@ -49,9 +49,9 @@ def migrate_generatedby(dataset_path: Path, dry_run: bool = False) -> dict[str,
     issues = []
 
     # Look for dataset_description.json files (including in derivatives)
-    desc_files = list(dataset_path.glob("dataset_description.json"))
-    desc_files.extend(dataset_path.glob("derivatives/*/dataset_description.json"))
-    desc_files.extend(dataset_path.glob("derivatives/*/*/dataset_description.json"))
+    # Use rglob to handle arbitrary nesting depths
+    desc_files = list(dataset_path.rglob("dataset_description.json"))
+
 
     if not desc_files:
         return {
@@ -261,13 +261,12 @@ def check_inheritance_overloading(dataset_path: Path, dry_run: bool = False) ->
     # Check for overloading (same field with different values in different scopes)
     for field, scopes in metadata_by_scope.items():
         if len(scopes) > 1:
-            # Get unique values across scopes
-            all_values = []
+            # Get unique values across scopes using a set for O(1) lookup
+            all_values = set()
             for scope_data in scopes.values():
                 for item in scope_data:
                     value_str = json.dumps(item["value"], sort_keys=True)
-                    if value_str not in all_values:
-                        all_values.append(value_str)
+                    all_values.add(value_str)
 
             # If multiple different values, this is potential overloading
             if len(all_values) > 1:

From 70d3d238ac0df5a0ed8538eac22e5dbae045a654 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 18 Dec 2025 04:07:30 +0000
Subject: [PATCH 6/8] Add validate_bids-examples workflow for testing
 migrations

- Add workflow based on PR #1775 to test migrations on example datasets
- Uses bst migrate command to migrate datasets from bids-examples
- Runs validation before and after migration with deno validator
- Skips check-only migrations (inheritance overloading and TSV prefix checks)
- Adapted to use new 'bst migrate all' command instead of 'bst migrate-datasets'

Co-authored-by: yarikoptic <39889+yarikoptic@users.noreply.github.com>
---
 .github/workflows/validate_bids-examples.yml | 111 +++++++++++++++++++
 1 file changed, 111 insertions(+)
 create mode 100644 .github/workflows/validate_bids-examples.yml

diff --git a/.github/workflows/validate_bids-examples.yml b/.github/workflows/validate_bids-examples.yml
new file mode 100644
index 0000000000..1cb3fc934c
--- /dev/null
+++ b/.github/workflows/validate_bids-examples.yml
@@ -0,0 +1,111 @@
+name: validate_datasets
+
+on:
+  push:
+    branches: ["master"]
+  pull_request:
+    branches: ["**"]
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  build:
+    strategy:
+      fail-fast: false
+      matrix:
+        platform: [ubuntu-latest]
+        bids-validator: [master-deno]
+        python-version: ["3.11"]
+
+    runs-on: ${{ matrix.platform }}
+
+    env:
+      TZ: Europe/Berlin
+      FORCE_COLOR: 1
+
+    steps:
+    - uses: actions/checkout@v4
+
+    # Setup Python with bst
+    - uses: actions/setup-python@v5
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: "Install build dependencies"
+      run: pip install --upgrade build twine
+    - name: "Build source distribution and wheel"
+      run: python -m build tools/schemacode
+    - name: "Check distribution metadata"
+      run: twine check tools/schemacode/dist/*
+    - name: "Install bst tools from the build"
+      run: pip install $( ls tools/schemacode/dist/*.whl )[all]
+    - name: "Produce dump of the schema as schema.json"
+      run: bst -v export --output src/schema.json
+
+    - uses: denoland/setup-deno@v1
+      if: "matrix.bids-validator == 'master-deno'"
+      with:
+        deno-version: v1.x
+
+    - name: Install BIDS validator (master deno build)
+      if: "matrix.bids-validator == 'master-deno'"
+      run: |
+        pushd ..
+        # Let's use specific commit for now
+        # TODO: progress it once in a while
+        commit=a7b291b882a8c6184219ccb84faae255ba96203a
+        git clone --depth 1 https://github.com/bids-standard/bids-validator
+        cd bids-validator
+        git fetch --depth 1 origin $commit
+        echo -e '#!/bin/sh\n'"$PWD/bids-validator/bids-validator-deno \"\$@\"" >| /tmp/bids-validator
+        chmod a+x /tmp/bids-validator
+        sudo mv /tmp/bids-validator /usr/local/bin/bids-validator
+        which -a bids-validator
+        bids-validator --help
+        popd
+
+    - name: Display versions and environment information
+      run: |
+        echo $TZ
+        date
+        echo -n "npm: "; npm --version
+        echo -n "node: "; node --version
+        echo -n "bids-validator: "; bids-validator --version
+        echo -n "python: "; python --version
+
+    # Checkout bids-examples
+    - uses: actions/checkout@v4
+      with:
+        # For now use the forked repository with support for deno validator
+        # from https://github.com/bids-standard/bids-examples/pull/435
+        repository: yarikoptic/bids-examples
+        ref: deno-validator
+        path: bids-examples
+
+    - name: Mark known not yet to be deno-legit BIDS datasets
+      run: touch {ds000117,ds000246,ds000247,ds000248,eeg_ds003645s_hed_demo,ieeg_motorMiller2007,ieeg_visual}/.SKIP_VALIDATION
+      shell: bash
+      working-directory: bids-examples
+
+    - name: Validate using bids-validator without migration
+      run: ./run_tests.sh
+      working-directory: bids-examples
+
+    - name: Migrate all BIDS datasets
+      run: |
+        for dataset in */dataset_description.json; do
+          dataset_dir=$(dirname "$dataset")
+          echo "Migrating $dataset_dir..."
+          bst migrate all "$dataset_dir" --skip fix_inheritance_overloading --skip fix_tsv_entity_prefix || echo "Migration failed for $dataset_dir"
+        done
+      shell: bash
+      working-directory: bids-examples
+
+    - name: Show migrated datasets diff
+      run: git diff HEAD
+      working-directory: bids-examples
+
+    - name: Validate all BIDS datasets using bids-validator after migration
+      run: VALIDATOR_ARGS="--schema file://$PWD/../src/schema.json" bash ./run_tests.sh
+      working-directory: bids-examples

From cd1849ab23c716f2858e4202b7d4c5ce4d09ffef Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 18 Dec 2025 16:02:59 +0000
Subject: [PATCH 7/8] Fix code formatting issues for CI validation workflow

- Apply ruff format to fix whitespace and line breaks
- Fix import ordering with ruff check --fix
- All 119 tests still passing after formatting fixes

Co-authored-by: yarikoptic <39889+yarikoptic@users.noreply.github.com>
---
 .../src/bidsschematools/__main__.py           |  43 +++---
 .../schemacode/src/bidsschematools/migrate.py |   2 +
 .../src/bidsschematools/migrations.py         |  29 ++--
 .../src/bidsschematools/tests/test_migrate.py |  85 ++++++-----
 .../bidsschematools/tests/test_migrate_cli.py |  70 ++++-----
 .../bidsschematools/tests/test_migrations.py  | 143 +++++++++---------
 6 files changed, 178 insertions(+), 194 deletions(-)

diff --git a/tools/schemacode/src/bidsschematools/__main__.py b/tools/schemacode/src/bidsschematools/__main__.py
index f14fdd8602..3b651bd3c8 100644
--- a/tools/schemacode/src/bidsschematools/__main__.py
+++ b/tools/schemacode/src/bidsschematools/__main__.py
@@ -202,8 +202,7 @@ def _validate_bids_dataset(dataset_path: Path) -> None:
     """
     if not (dataset_path / "dataset_description.json").exists():
         lgr.error(
-            f"No dataset_description.json found in {dataset_path}. "
-            "Is this a valid BIDS dataset?"
+            f"No dataset_description.json found in {dataset_path}. Is this a valid BIDS dataset?"
         )
         sys.exit(1)
 
@@ -211,15 +210,15 @@ def _validate_bids_dataset(dataset_path: Path) -> None:
 @migrate.command("list")
 def migrate_list():
     """List all available migrations"""
-    from .migrate import registry
     from . import migrations  # noqa: F401 - Import to register migrations
-    
+    from .migrate import registry
+
     migrations_list = registry.list_migrations()
-    
+
     if not migrations_list:
         lgr.info("No migrations available")
         return
-    
+
     click.echo("Available migrations:\n")
     for mig in sorted(migrations_list, key=lambda x: x["version"]):
         click.echo(f"  {mig['name']} (version {mig['version']})")
@@ -241,37 +240,37 @@ def migrate_run(migration_name, dataset_path, dry_run):
 
     DATASET_PATH is the path to the BIDS dataset root directory
     """
-    from .migrate import registry
     from . import migrations  # noqa: F401 - Import to register migrations
+    from .migrate import registry
 
     dataset_path = Path(dataset_path).resolve()
     _validate_bids_dataset(dataset_path)
-    
+
     try:
         result = registry.run(migration_name, dataset_path, dry_run=dry_run)
     except ValueError as e:
         lgr.error(str(e))
         lgr.info("Use 'bst migrate list' to see available migrations")
         sys.exit(1)
-    
+
     # Display results
     if result.get("modified_files"):
         click.echo(f"\nModified files ({len(result['modified_files'])}):")
         for filepath in result["modified_files"]:
             click.echo(f"  - {filepath}")
-    
+
     if result.get("warnings"):
         click.echo(f"\nWarnings ({len(result['warnings'])}):")
         for warning in result["warnings"]:
             click.echo(f"  - {warning}")
-    
+
     if result.get("suggestions"):
         click.echo(f"\nSuggestions ({len(result['suggestions'])}):")
         for suggestion in result["suggestions"]:
             click.echo(f"  - {suggestion}")
-    
+
     click.echo(f"\n{result['message']}")
-    
+
     if not result["success"]:
         sys.exit(1)
 
@@ -293,33 +292,33 @@ def migrate_all(dataset_path, dry_run, skip):
 
     DATASET_PATH is the path to the BIDS dataset root directory
     """
-    from .migrate import registry
     from . import migrations  # noqa: F401 - Import to register migrations
+    from .migrate import registry
 
     dataset_path = Path(dataset_path).resolve()
     _validate_bids_dataset(dataset_path)
-    
+
     migrations_list = registry.list_migrations()
     skip_set = set(skip)
-    
+
     if not migrations_list:
         click.echo("No migrations available")
         return
-    
+
     click.echo(f"Running {len(migrations_list)} migration(s) on {dataset_path}")
     if dry_run:
         click.echo("DRY RUN: No files will be modified\n")
-    
+
     results = []
     for mig in sorted(migrations_list, key=lambda x: x["version"]):
         if mig["name"] in skip_set:
             click.echo(f"Skipping: {mig['name']}")
             continue
-        
+
         click.echo(f"\nRunning: {mig['name']} (version {mig['version']})")
         result = registry.run(mig["name"], dataset_path, dry_run=dry_run)
         results.append((mig["name"], result))
-        
+
         if result.get("modified_files"):
             click.echo(f"  Modified {len(result['modified_files'])} file(s)")
         if result.get("warnings"):
@@ -327,12 +326,12 @@ def migrate_all(dataset_path, dry_run, skip):
         if result.get("suggestions"):
             click.echo(f"  {len(result['suggestions'])} suggestion(s)")
         click.echo(f"  {result['message']}")
-    
+
     # Summary
     click.echo("\n" + "=" * 60)
     click.echo("Migration Summary:")
     click.echo("=" * 60)
-    
+
     for name, result in results:
         status = "✓" if result["success"] else "✗"
         click.echo(f"{status} {name}: {result['message']}")
diff --git a/tools/schemacode/src/bidsschematools/migrate.py b/tools/schemacode/src/bidsschematools/migrate.py
index 292e325327..f988d9739c 100644
--- a/tools/schemacode/src/bidsschematools/migrate.py
+++ b/tools/schemacode/src/bidsschematools/migrate.py
@@ -115,11 +115,13 @@ def register(
         ...     # migration code here
         ...     return {"success": True, "modified_files": [], "message": "Done"}
         """
+
         def decorator(func: Callable) -> Migration:
             migration = Migration(name, version, description, func)
             self._migrations[name] = migration
             lgr.debug(f"Registered migration: {name} (version {version})")
             return migration
+
         return decorator
 
     def get(self, name: str) -> Optional[Migration]:
diff --git a/tools/schemacode/src/bidsschematools/migrations.py b/tools/schemacode/src/bidsschematools/migrations.py
index 745a86ac3d..e32d695bae 100644
--- a/tools/schemacode/src/bidsschematools/migrations.py
+++ b/tools/schemacode/src/bidsschematools/migrations.py
@@ -52,12 +52,11 @@ def migrate_generatedby(dataset_path: Path, dry_run: bool = False) -> dict[str,
     # Use rglob to handle arbitrary nesting depths
     desc_files = list(dataset_path.rglob("dataset_description.json"))
 
-
     if not desc_files:
         return {
             "success": True,
             "modified_files": [],
-            "message": "No dataset_description.json files found"
+            "message": "No dataset_description.json files found",
         }
 
     for desc_file in desc_files:
@@ -92,10 +91,7 @@ def migrate_generatedby(dataset_path: Path, dry_run: bool = False) -> dict[str,
                 # Convert to GeneratedBy format
                 if isinstance(value, str):
                     # Simple string, create basic entry
-                    entry = {
-                        "Name": value,
-                        "Description": f"Migrated from {old_field} field"
-                    }
+                    entry = {"Name": value, "Description": f"Migrated from {old_field} field"}
                     generated_by.append(entry)
                 elif isinstance(value, dict):
                     # Already structured, try to map fields
@@ -128,10 +124,9 @@ def migrate_generatedby(dataset_path: Path, dry_run: bool = False) -> dict[str,
                     # List of entries
                     for item in value:
                         if isinstance(item, str):
-                            generated_by.append({
-                                "Name": item,
-                                "Description": f"Migrated from {old_field} field"
-                            })
+                            generated_by.append(
+                                {"Name": item, "Description": f"Migrated from {old_field} field"}
+                            )
                         elif isinstance(item, dict):
                             entry = {}
                             entry["Name"] = item.get("Name", item.get("name", old_field))
@@ -139,7 +134,7 @@ def migrate_generatedby(dataset_path: Path, dry_run: bool = False) -> dict[str,
                                 entry["Version"] = item.get("Version", item.get("version"))
                             entry["Description"] = item.get(
                                 "Description",
-                                item.get("description", f"Migrated from {old_field} field")
+                                item.get("description", f"Migrated from {old_field} field"),
                             )
                             if "Container" in item or "container" in item:
                                 entry["Container"] = item.get("Container", item.get("container"))
@@ -226,7 +221,7 @@ def check_inheritance_overloading(dataset_path: Path, dry_run: bool = False) ->
         return {
             "success": True,
             "modified_files": [],
-            "message": "No JSON sidecar files found to check"
+            "message": "No JSON sidecar files found to check",
         }
 
     # Analyze metadata fields across different scopes
@@ -253,10 +248,12 @@ def check_inheritance_overloading(dataset_path: Path, dry_run: bool = False) ->
             if scope not in metadata_by_scope[field]:
                 metadata_by_scope[field][scope] = []
 
-            metadata_by_scope[field][scope].append({
-                "file": str(json_file.relative_to(dataset_path)),
-                "value": value,
-            })
+            metadata_by_scope[field][scope].append(
+                {
+                    "file": str(json_file.relative_to(dataset_path)),
+                    "value": value,
+                }
+            )
 
     # Check for overloading (same field with different values in different scopes)
     for field, scopes in metadata_by_scope.items():
diff --git a/tools/schemacode/src/bidsschematools/tests/test_migrate.py b/tools/schemacode/src/bidsschematools/tests/test_migrate.py
index ce4bb8e2e6..5ba7208727 100644
--- a/tools/schemacode/src/bidsschematools/tests/test_migrate.py
+++ b/tools/schemacode/src/bidsschematools/tests/test_migrate.py
@@ -1,47 +1,50 @@
 """Tests for BIDS dataset migration functionality."""
 
 import json
+
 import pytest
-from pathlib import Path
+
 from bidsschematools.migrate import Migration, MigrationRegistry, load_json, save_json
 
 
 class TestMigration:
     """Tests for Migration class."""
-    
+
     def test_migration_creation(self):
         """Test creating a migration."""
+
         def dummy_func(dataset_path):
             return {"success": True, "modified_files": [], "message": "Done"}
-        
+
         mig = Migration(
             name="test_migration",
             version="1.0.0",
             description="Test migration",
             func=dummy_func,
         )
-        
+
         assert mig.name == "test_migration"
         assert mig.version == "1.0.0"
         assert mig.description == "Test migration"
         assert callable(mig.func)
-    
+
     def test_migration_call(self, tmp_path):
         """Test calling a migration."""
+
         def dummy_func(dataset_path, **kwargs):
             return {
                 "success": True,
                 "modified_files": ["test.json"],
                 "message": f"Processed {dataset_path}",
             }
-        
+
         mig = Migration(
             name="test_migration",
             version="1.0.0",
             description="Test migration",
             func=dummy_func,
         )
-        
+
         result = mig(tmp_path)
         assert result["success"] is True
         assert result["modified_files"] == ["test.json"]
@@ -50,16 +53,16 @@ def dummy_func(dataset_path, **kwargs):
 
 class TestMigrationRegistry:
     """Tests for MigrationRegistry class."""
-    
+
     def test_registry_creation(self):
         """Test creating a registry."""
         registry = MigrationRegistry()
         assert registry.list_migrations() == []
-    
+
     def test_register_migration(self):
         """Test registering a migration."""
         registry = MigrationRegistry()
-        
+
         @registry.register(
             name="test_migration",
             version="1.0.0",
@@ -67,16 +70,16 @@ def test_register_migration(self):
         )
         def test_func(dataset_path):
             return {"success": True, "modified_files": [], "message": "Done"}
-        
+
         migrations = registry.list_migrations()
         assert len(migrations) == 1
         assert migrations[0]["name"] == "test_migration"
         assert migrations[0]["version"] == "1.0.0"
-    
+
     def test_get_migration(self):
         """Test getting a migration by name."""
         registry = MigrationRegistry()
-        
+
         @registry.register(
             name="test_migration",
             version="1.0.0",
@@ -84,18 +87,18 @@ def test_get_migration(self):
         )
         def test_func(dataset_path):
             return {"success": True, "modified_files": [], "message": "Done"}
-        
+
         mig = registry.get("test_migration")
         assert mig is not None
         assert mig.name == "test_migration"
-        
+
         mig = registry.get("nonexistent")
         assert mig is None
-    
+
     def test_run_migration(self, tmp_path):
         """Test running a migration."""
         registry = MigrationRegistry()
-        
+
         @registry.register(
             name="test_migration",
             version="1.0.0",
@@ -107,22 +110,22 @@ def test_func(dataset_path, **kwargs):
                 "modified_files": [],
                 "message": "Migration completed",
             }
-        
+
         result = registry.run("test_migration", tmp_path)
         assert result["success"] is True
         assert result["message"] == "Migration completed"
-    
+
     def test_run_nonexistent_migration(self, tmp_path):
         """Test running a nonexistent migration raises error."""
         registry = MigrationRegistry()
-        
+
         with pytest.raises(ValueError, match="Migration 'nonexistent' not found"):
             registry.run("nonexistent", tmp_path)
-    
+
     def test_dry_run(self, tmp_path):
         """Test dry run mode."""
         registry = MigrationRegistry()
-        
+
         @registry.register(
             name="test_migration",
             version="1.0.0",
@@ -134,7 +137,7 @@ def test_func(dataset_path, dry_run=False):
                 "modified_files": [] if dry_run else ["test.json"],
                 "message": "Dry run" if dry_run else "Modified files",
             }
-        
+
         result = registry.run("test_migration", tmp_path, dry_run=True)
         assert result["success"] is True
         assert result["modified_files"] == []
@@ -143,56 +146,56 @@ def test_func(dataset_path, dry_run=False):
 
 class TestJsonHelpers:
     """Tests for JSON helper functions."""
-    
+
     def test_load_json(self, tmp_path):
         """Test loading JSON file."""
         test_file = tmp_path / "test.json"
         test_data = {"key": "value", "number": 42}
-        
-        with open(test_file, 'w') as f:
+
+        with open(test_file, "w") as f:
             json.dump(test_data, f)
-        
+
         loaded = load_json(test_file)
         assert loaded == test_data
-    
+
     def test_load_json_nonexistent(self, tmp_path):
         """Test loading nonexistent JSON file returns None."""
         test_file = tmp_path / "nonexistent.json"
         loaded = load_json(test_file)
         assert loaded is None
-    
+
     def test_load_json_invalid(self, tmp_path):
         """Test loading invalid JSON returns None."""
         test_file = tmp_path / "invalid.json"
-        
-        with open(test_file, 'w') as f:
+
+        with open(test_file, "w") as f:
             f.write("not valid json{")
-        
+
         loaded = load_json(test_file)
         assert loaded is None
-    
+
     def test_save_json(self, tmp_path):
         """Test saving JSON file."""
         test_file = tmp_path / "test.json"
         test_data = {"key": "value", "number": 42}
-        
+
         result = save_json(test_file, test_data)
         assert result is True
-        
+
         # Verify file contents
-        with open(test_file, 'r') as f:
+        with open(test_file) as f:
             loaded = json.load(f)
         assert loaded == test_data
-    
+
     def test_save_json_with_indentation(self, tmp_path):
         """Test saving JSON file with custom indentation."""
         test_file = tmp_path / "test.json"
         test_data = {"key": "value"}
-        
+
         save_json(test_file, test_data, indent=4)
-        
+
         # Check that file is properly indented
-        with open(test_file, 'r') as f:
+        with open(test_file) as f:
             content = f.read()
         assert "    " in content  # Should have 4-space indent
-        assert content.endswith('\n')  # Should have trailing newline
+        assert content.endswith("\n")  # Should have trailing newline
diff --git a/tools/schemacode/src/bidsschematools/tests/test_migrate_cli.py b/tools/schemacode/src/bidsschematools/tests/test_migrate_cli.py
index 6266fc04aa..b4f236cd91 100644
--- a/tools/schemacode/src/bidsschematools/tests/test_migrate_cli.py
+++ b/tools/schemacode/src/bidsschematools/tests/test_migrate_cli.py
@@ -2,13 +2,11 @@
 
 import json
 import subprocess
-from pathlib import Path
-import pytest
 
 
 class TestMigrateCLI:
     """Test the bst migrate CLI commands."""
-    
+
     def test_migrate_list_command(self):
         """Test that 'bst migrate list' works."""
         result = subprocess.run(
@@ -16,110 +14,102 @@ def test_migrate_list_command(self):
             capture_output=True,
             text=True,
         )
-        
+
         assert result.returncode == 0
         assert "standardize_generatedby" in result.stdout
         assert "fix_inheritance_overloading" in result.stdout
         assert "fix_tsv_entity_prefix" in result.stdout
         assert "version" in result.stdout
-    
+
     def test_migrate_run_command(self, tmp_path):
         """Test that 'bst migrate run' works on a real dataset."""
         # Create a test dataset
         dataset_path = tmp_path / "test_dataset"
         dataset_path.mkdir()
-        
+
         desc_file = dataset_path / "dataset_description.json"
-        desc_data = {
-            "Name": "Test Dataset",
-            "BIDSVersion": "1.9.0",
-            "Pipeline": "my_pipeline"
-        }
-        
-        with open(desc_file, 'w') as f:
+        desc_data = {"Name": "Test Dataset", "BIDSVersion": "1.9.0", "Pipeline": "my_pipeline"}
+
+        with open(desc_file, "w") as f:
             json.dump(desc_data, f)
-        
+
         # Run the migration
         result = subprocess.run(
             ["bst", "migrate", "run", "standardize_generatedby", str(dataset_path)],
             capture_output=True,
             text=True,
         )
-        
+
         assert result.returncode == 0
         assert "Modified files" in result.stdout
         assert "dataset_description.json" in result.stdout
-        
+
         # Verify the file was actually migrated
-        with open(desc_file, 'r') as f:
+        with open(desc_file) as f:
             migrated_data = json.load(f)
-        
+
         assert "GeneratedBy" in migrated_data
         assert "Pipeline" not in migrated_data
-    
+
     def test_migrate_run_dry_run(self, tmp_path):
         """Test that 'bst migrate run --dry-run' doesn't modify files."""
         # Create a test dataset
         dataset_path = tmp_path / "test_dataset"
         dataset_path.mkdir()
-        
+
         desc_file = dataset_path / "dataset_description.json"
-        desc_data = {
-            "Name": "Test Dataset",
-            "BIDSVersion": "1.9.0",
-            "Pipeline": "my_pipeline"
-        }
-        
-        with open(desc_file, 'w') as f:
+        desc_data = {"Name": "Test Dataset", "BIDSVersion": "1.9.0", "Pipeline": "my_pipeline"}
+
+        with open(desc_file, "w") as f:
             json.dump(desc_data, f)
-        
+
         # Run the migration in dry-run mode
         result = subprocess.run(
             ["bst", "migrate", "run", "standardize_generatedby", str(dataset_path), "--dry-run"],
             capture_output=True,
             text=True,
         )
-        
+
         assert result.returncode == 0
         assert "dry run" in result.stdout.lower()
-        
+
         # Verify the file was NOT modified
-        with open(desc_file, 'r') as f:
+        with open(desc_file) as f:
             data = json.load(f)
-        
+
         assert "Pipeline" in data
         assert "GeneratedBy" not in data
-    
+
     def test_migrate_run_invalid_dataset(self, tmp_path):
         """Test that running migration on invalid dataset fails gracefully."""
         # Create empty directory (no dataset_description.json)
         dataset_path = tmp_path / "invalid_dataset"
         dataset_path.mkdir()
-        
+
         result = subprocess.run(
             ["bst", "migrate", "run", "standardize_generatedby", str(dataset_path)],
             capture_output=True,
             text=True,
         )
-        
+
         assert result.returncode == 1
         assert "dataset_description.json" in result.stderr.lower()
-    
+
     def test_migrate_run_nonexistent_migration(self, tmp_path):
         """Test that running nonexistent migration fails gracefully."""
         # Create a valid dataset
         dataset_path = tmp_path / "test_dataset"
         dataset_path.mkdir()
-        
+
         desc_file = dataset_path / "dataset_description.json"
-        with open(desc_file, 'w') as f:
+        with open(desc_file, "w") as f:
             json.dump({"Name": "Test", "BIDSVersion": "1.0.0"}, f)
-        
+
         result = subprocess.run(
             ["bst", "migrate", "run", "nonexistent_migration", str(dataset_path)],
             capture_output=True,
             text=True,
         )
-        
+
         assert result.returncode == 1
         assert "not found" in result.stderr.lower()
diff --git a/tools/schemacode/src/bidsschematools/tests/test_migrations.py b/tools/schemacode/src/bidsschematools/tests/test_migrations.py
index 217737ca95..456b20802f 100644
--- a/tools/schemacode/src/bidsschematools/tests/test_migrations.py
+++ b/tools/schemacode/src/bidsschematools/tests/test_migrations.py
@@ -1,24 +1,25 @@
 """Tests for specific BIDS dataset migrations."""
 
 import json
+
 import pytest
-from pathlib import Path
+
 from bidsschematools.migrations import (
-    migrate_generatedby,
     check_inheritance_overloading,
     check_tsv_entity_prefix,
+    migrate_generatedby,
 )
 
 
 class TestMigrateGeneratedBy:
     """Tests for GeneratedBy migration."""
-    
+
     @pytest.fixture
     def dataset_with_old_provenance(self, tmp_path):
         """Create a dataset with pre-standard provenance fields."""
         dataset_path = tmp_path / "dataset"
         dataset_path.mkdir()
-        
+
         # Create dataset_description.json with old-style provenance
         desc_data = {
             "Name": "Test Dataset",
@@ -26,21 +27,21 @@ def dataset_with_old_provenance(self, tmp_path):
             "Pipeline": {
                 "Name": "fmriprep",
                 "Version": "1.4.1",
-            }
+            },
         }
-        
+
         desc_file = dataset_path / "dataset_description.json"
-        with open(desc_file, 'w') as f:
+        with open(desc_file, "w") as f:
             json.dump(desc_data, f, indent=2)
-        
+
         return dataset_path
-    
+
     @pytest.fixture
     def dataset_with_new_provenance(self, tmp_path):
         """Create a dataset with standard GeneratedBy field."""
         dataset_path = tmp_path / "dataset"
         dataset_path.mkdir()
-        
+
         desc_data = {
             "Name": "Test Dataset",
             "BIDSVersion": "1.10.0",
@@ -49,89 +50,89 @@ def dataset_with_new_provenance(self, tmp_path):
                     "Name": "fmriprep",
                     "Version": "1.4.1",
                 }
-            ]
+            ],
         }
-        
+
         desc_file = dataset_path / "dataset_description.json"
-        with open(desc_file, 'w') as f:
+        with open(desc_file, "w") as f:
             json.dump(desc_data, f, indent=2)
-        
+
         return dataset_path
-    
+
     def test_migrate_old_pipeline_field(self, dataset_with_old_provenance):
         """Test migrating old Pipeline field to GeneratedBy."""
         result = migrate_generatedby(dataset_with_old_provenance)
-        
+
         assert result["success"] is True
         assert len(result["modified_files"]) == 1
         assert "dataset_description.json" in result["modified_files"][0]
-        
+
         # Check that file was actually modified
         desc_file = dataset_with_old_provenance / "dataset_description.json"
-        with open(desc_file, 'r') as f:
+        with open(desc_file) as f:
             data = json.load(f)
-        
+
         assert "GeneratedBy" in data
         assert "Pipeline" not in data
         assert data["GeneratedBy"][0]["Name"] == "fmriprep"
         assert data["GeneratedBy"][0]["Version"] == "1.4.1"
-    
+
     def test_skip_if_generatedby_exists(self, dataset_with_new_provenance):
         """Test that migration skips if GeneratedBy already exists."""
         result = migrate_generatedby(dataset_with_new_provenance)
-        
+
         assert result["success"] is True
         assert len(result["modified_files"]) == 0
         assert "already exists" in result["message"] or "No pre-standard" in result["message"]
-    
+
     def test_dry_run(self, dataset_with_old_provenance):
         """Test dry run mode doesn't modify files."""
         result = migrate_generatedby(dataset_with_old_provenance, dry_run=True)
-        
+
         assert result["success"] is True
         assert len(result["modified_files"]) > 0
         assert "dry run" in result["modified_files"][0].lower()
-        
+
         # Check that file was NOT modified
         desc_file = dataset_with_old_provenance / "dataset_description.json"
-        with open(desc_file, 'r') as f:
+        with open(desc_file) as f:
             data = json.load(f)
-        
+
         assert "Pipeline" in data
         assert "GeneratedBy" not in data
-    
+
     def test_migrate_multiple_fields(self, tmp_path):
         """Test migrating multiple pre-standard fields."""
         dataset_path = tmp_path / "dataset"
         dataset_path.mkdir()
-        
+
         desc_data = {
             "Name": "Test Dataset",
             "BIDSVersion": "1.9.0",
             "Pipeline": "fmriprep",
             "Software": "SPM12",
         }
-        
+
         desc_file = dataset_path / "dataset_description.json"
-        with open(desc_file, 'w') as f:
+        with open(desc_file, "w") as f:
             json.dump(desc_data, f, indent=2)
-        
+
         result = migrate_generatedby(dataset_path)
-        
+
         assert result["success"] is True
-        
-        with open(desc_file, 'r') as f:
+
+        with open(desc_file) as f:
             data = json.load(f)
-        
+
         assert "GeneratedBy" in data
         assert len(data["GeneratedBy"]) == 2
         assert "Pipeline" not in data
         assert "Software" not in data
-    
+
     def test_no_dataset_description(self, tmp_path):
         """Test with no dataset_description.json file."""
         result = migrate_generatedby(tmp_path)
-        
+
         assert result["success"] is True
         assert len(result["modified_files"]) == 0
         assert "No dataset_description.json" in result["message"]
@@ -139,118 +140,110 @@ def test_no_dataset_description(self, tmp_path):
 
 class TestCheckInheritanceOverloading:
     """Tests for inheritance overloading check."""
-    
+
     @pytest.fixture
     def dataset_with_overloading(self, tmp_path):
         """Create a dataset with inheritance overloading."""
         dataset_path = tmp_path / "dataset"
         dataset_path.mkdir()
-        
+
         # Create dataset-level metadata
-        (dataset_path / "task-rest_bold.json").write_text(
-            json.dumps({"RepetitionTime": 2.0})
-        )
-        
+        (dataset_path / "task-rest_bold.json").write_text(json.dumps({"RepetitionTime": 2.0}))
+
         # Create subject-level metadata with different value
         sub_dir = dataset_path / "sub-01"
         sub_dir.mkdir()
-        (sub_dir / "task-rest_bold.json").write_text(
-            json.dumps({"RepetitionTime": 1.5})
-        )
-        
+        (sub_dir / "task-rest_bold.json").write_text(json.dumps({"RepetitionTime": 1.5}))
+
         return dataset_path
-    
+
     @pytest.fixture
     def dataset_without_overloading(self, tmp_path):
         """Create a dataset without inheritance overloading."""
         dataset_path = tmp_path / "dataset"
         dataset_path.mkdir()
-        
+
         # Create dataset-level metadata
-        (dataset_path / "task-rest_bold.json").write_text(
-            json.dumps({"TaskName": "rest"})
-        )
-        
+        (dataset_path / "task-rest_bold.json").write_text(json.dumps({"TaskName": "rest"}))
+
         # Create subject-level metadata with same value
         sub_dir = dataset_path / "sub-01"
         sub_dir.mkdir()
-        (sub_dir / "task-rest_bold.json").write_text(
-            json.dumps({"TaskName": "rest"})
-        )
-        
+        (sub_dir / "task-rest_bold.json").write_text(json.dumps({"TaskName": "rest"}))
+
         return dataset_path
-    
+
     def test_detect_overloading(self, dataset_with_overloading):
         """Test detection of inheritance overloading."""
         result = check_inheritance_overloading(dataset_with_overloading)
-        
+
         assert result["success"] is True
         assert len(result["warnings"]) > 0
         assert any("RepetitionTime" in w for w in result["warnings"])
         assert any("deprecated" in w for w in result["warnings"])
-    
+
     def test_no_overloading(self, dataset_without_overloading):
         """Test dataset without overloading issues."""
         result = check_inheritance_overloading(dataset_without_overloading)
-        
+
         assert result["success"] is True
         assert len(result.get("warnings", [])) == 0
         assert "No inheritance overloading" in result["message"]
-    
+
     def test_empty_dataset(self, tmp_path):
         """Test with empty dataset."""
         result = check_inheritance_overloading(tmp_path)
-        
+
         assert result["success"] is True
         assert "No JSON sidecar files" in result["message"]
 
 
 class TestCheckTsvEntityPrefix:
     """Tests for TSV entity prefix check."""
-    
+
     @pytest.fixture
     def dataset_with_participants(self, tmp_path):
         """Create a dataset with participants.tsv."""
         dataset_path = tmp_path / "dataset"
         dataset_path.mkdir()
-        
+
         # Create participants.tsv with correct prefix
         participants_file = dataset_path / "participants.tsv"
         participants_file.write_text("participant_id\tage\tsex\n01\t25\tF\n")
-        
+
         return dataset_path
-    
+
     @pytest.fixture
     def dataset_with_samples(self, tmp_path):
         """Create a dataset with samples.tsv without proper prefix."""
         dataset_path = tmp_path / "dataset"
         dataset_path.mkdir()
-        
+
         # Create samples.tsv without proper prefix
         samples_file = dataset_path / "samples.tsv"
         samples_file.write_text("id\ttype\ttissue\n01\tblood\tWB\n")
-        
+
         return dataset_path
-    
+
     def test_correct_prefix(self, dataset_with_participants):
         """Test dataset with correct entity prefix."""
         result = check_tsv_entity_prefix(dataset_with_participants)
-        
+
         assert result["success"] is True
         assert len(result.get("suggestions", [])) == 0
         assert "No entity prefix issues" in result["message"]
-    
+
     def test_missing_prefix(self, dataset_with_samples):
         """Test dataset with missing entity prefix."""
         result = check_tsv_entity_prefix(dataset_with_samples)
-        
+
         assert result["success"] is True
         assert len(result.get("suggestions", [])) > 0
         assert any("sample_" in s for s in result["suggestions"])
-    
+
     def test_empty_dataset(self, tmp_path):
         """Test with empty dataset."""
         result = check_tsv_entity_prefix(tmp_path)
-        
+
         assert result["success"] is True
         assert len(result.get("suggestions", [])) == 0

From b43da8f572c657e1807d6896c81ccece3b287082 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 18 Dec 2025 16:05:23 +0000
Subject: [PATCH 8/8] Fix YAML formatting issues in validate_bids-examples
 workflow

- Add document start marker (---)
- Fix indentation (use 6 spaces for steps)
- Split long lines using YAML multiline syntax (>-)
- All yamllint errors resolved

Co-authored-by: yarikoptic <39889+yarikoptic@users.noreply.github.com>
---
 .github/workflows/validate_bids-examples.yml | 174 ++++++++++---------
 1 file changed, 91 insertions(+), 83 deletions(-)

diff --git a/.github/workflows/validate_bids-examples.yml b/.github/workflows/validate_bids-examples.yml
index 1cb3fc934c..f85d81cacf 100644
--- a/.github/workflows/validate_bids-examples.yml
+++ b/.github/workflows/validate_bids-examples.yml
@@ -1,3 +1,4 @@
+---
 name: validate_datasets
 
 on:
@@ -26,86 +27,93 @@ jobs:
       FORCE_COLOR: 1
 
     steps:
-    - uses: actions/checkout@v4
-
-    # Setup Python with bst
-    - uses: actions/setup-python@v5
-      with:
-        python-version: ${{ matrix.python-version }}
-    - name: "Install build dependencies"
-      run: pip install --upgrade build twine
-    - name: "Build source distribution and wheel"
-      run: python -m build tools/schemacode
-    - name: "Check distribution metadata"
-      run: twine check tools/schemacode/dist/*
-    - name: "Install bst tools from the build"
-      run: pip install $( ls tools/schemacode/dist/*.whl )[all]
-    - name: "Produce dump of the schema as schema.json"
-      run: bst -v export --output src/schema.json
-
-    - uses: denoland/setup-deno@v1
-      if: "matrix.bids-validator == 'master-deno'"
-      with:
-        deno-version: v1.x
-
-    - name: Install BIDS validator (master deno build)
-      if: "matrix.bids-validator == 'master-deno'"
-      run: |
-        pushd ..
-        # Let's use specific commit for now
-        # TODO: progress it once in a while
-        commit=a7b291b882a8c6184219ccb84faae255ba96203a
-        git clone --depth 1 https://github.com/bids-standard/bids-validator
-        cd bids-validator
-        git fetch --depth 1 origin $commit
-        echo -e '#!/bin/sh\n'"$PWD/bids-validator/bids-validator-deno \"\$@\"" >| /tmp/bids-validator
-        chmod a+x /tmp/bids-validator
-        sudo mv /tmp/bids-validator /usr/local/bin/bids-validator
-        which -a bids-validator
-        bids-validator --help
-        popd
-
-    - name: Display versions and environment information
-      run: |
-        echo $TZ
-        date
-        echo -n "npm: "; npm --version
-        echo -n "node: "; node --version
-        echo -n "bids-validator: "; bids-validator --version
-        echo -n "python: "; python --version
-
-    # Checkout bids-examples
-    - uses: actions/checkout@v4
-      with:
-        # For now use the forked repository with support for deno validator
-        # from https://github.com/bids-standard/bids-examples/pull/435
-        repository: yarikoptic/bids-examples
-        ref: deno-validator
-        path: bids-examples
-
-    - name: Mark known not yet to be deno-legit BIDS datasets
-      run: touch {ds000117,ds000246,ds000247,ds000248,eeg_ds003645s_hed_demo,ieeg_motorMiller2007,ieeg_visual}/.SKIP_VALIDATION
-      shell: bash
-      working-directory: bids-examples
-
-    - name: Validate using bids-validator without migration
-      run: ./run_tests.sh
-      working-directory: bids-examples
-
-    - name: Migrate all BIDS datasets
-      run: |
-        for dataset in */dataset_description.json; do
-          dataset_dir=$(dirname "$dataset")
-          echo "Migrating $dataset_dir..."
-          bst migrate all "$dataset_dir" --skip fix_inheritance_overloading --skip fix_tsv_entity_prefix || echo "Migration failed for $dataset_dir"
-        done
-      shell: bash
-      working-directory: bids-examples
-
-    - name: Show migrated datasets diff
-      run: git diff HEAD
-      working-directory: bids-examples
-
-    - name: Validate all BIDS datasets using bids-validator after migration
-      run: VALIDATOR_ARGS="--schema file://$PWD/../src/schema.json" bash ./run_tests.sh
-      working-directory: bids-examples
+      - uses: actions/checkout@v4
+
+      # Setup Python with bst
+      - uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: "Install build dependencies"
+        run: pip install --upgrade build twine
+      - name: "Build source distribution and wheel"
+        run: python -m build tools/schemacode
+      - name: "Check distribution metadata"
+        run: twine check tools/schemacode/dist/*
+      - name: "Install bst tools from the build"
+        run: pip install $( ls tools/schemacode/dist/*.whl )[all]
+      - name: "Produce dump of the schema as schema.json"
+        run: bst -v export --output src/schema.json
+
+      - uses: denoland/setup-deno@v1
+        if: "matrix.bids-validator == 'master-deno'"
+        with:
+          deno-version: v1.x
+
+      - name: Install BIDS validator (master deno build)
+        if: "matrix.bids-validator == 'master-deno'"
+        run: |
+          pushd ..
+          # Let's use specific commit for now
+          # TODO: progress it once in a while
+          commit=a7b291b882a8c6184219ccb84faae255ba96203a
+          git clone --depth 1 https://github.com/bids-standard/bids-validator
+          cd bids-validator
+          git fetch --depth 1 origin $commit
+          echo -e '#!/bin/sh\n'"$PWD/bids-validator/bids-validator-deno \"\$@\"" >| /tmp/bids-validator
+          chmod a+x /tmp/bids-validator
+          sudo mv /tmp/bids-validator /usr/local/bin/bids-validator
+          which -a bids-validator
+          bids-validator --help
+          popd
+
+      - name: Display versions and environment information
+        run: |
+          echo $TZ
+          date
+          echo -n "npm: "; npm --version
+          echo -n "node: "; node --version
+          echo -n "bids-validator: "; bids-validator --version
+          echo -n "python: "; python --version
+
+      # Checkout bids-examples
+      - uses: actions/checkout@v4
+        with:
+          # For now use the forked repository with support for deno validator
+          # from https://github.com/bids-standard/bids-examples/pull/435
+          repository: yarikoptic/bids-examples
+          ref: deno-validator
+          path: bids-examples
+
+      - name: Mark known not yet to be deno-legit BIDS datasets
+        run: >-
+          touch
+          {ds000117,ds000246,ds000247,ds000248,eeg_ds003645s_hed_demo,ieeg_motorMiller2007,ieeg_visual}/.SKIP_VALIDATION
+        shell: bash
+        working-directory: bids-examples
+
+      - name: Validate using bids-validator without migration
+        run: ./run_tests.sh
+        working-directory: bids-examples
+
+      - name: Migrate all BIDS datasets
+        run: |
+          for dataset in */dataset_description.json; do
+            dataset_dir=$(dirname "$dataset")
+            echo "Migrating $dataset_dir..."
+            bst migrate all "$dataset_dir" \
+              --skip fix_inheritance_overloading \
+              --skip fix_tsv_entity_prefix || \
+              echo "Migration failed for $dataset_dir"
+          done
+        shell: bash
+        working-directory: bids-examples
+
+      - name: Show migrated datasets diff
+        run: git diff HEAD
+        working-directory: bids-examples
+
+      - name: Validate all BIDS datasets using bids-validator after migration
+        run: >-
+          VALIDATOR_ARGS="--schema file://$PWD/../src/schema.json"
+          bash ./run_tests.sh
+        working-directory: bids-examples