InternLM · CyCle1024 · Apr 28, 2026 · Apr 28, 2026 · May 12, 2026
diff --git a/.agents/skills b/.agents/skills
@@ -0,0 +1 @@
+../.claude/skills
diff --git a/.claude/skills/sphinx-debug/SKILL.md b/.claude/skills/sphinx-debug/SKILL.md
@@ -0,0 +1,145 @@
+---
+name: sphinx-debug
+description: >
+  Use when `make html` or `sphinx-build` fails with
+  `Extension error (sphinx.ext.autosummary)`,
+  `ImportExceptionGroup`, or other Sphinx autosummary / autodoc
+  import failures. Provides automated pdb diagnosis for
+  `autodoc_mock_imports` issues.
+---
+
+# Sphinx Debug
+
+## Quick Diagnostic Command
+
+Run the build with maximum verbosity and pdb on exception:
+
+```bash
+make html SPHINXOPTS="-vv -T -P"
+```
+
+Flags:
+- `-vv`: Verbose autodoc mock logging
+- `-T`: Full traceback
+- `-P`: Drop into pdb on exception
+
+## For agents: automate pdb via monkey-patch
+
+If you are an agent and cannot interact with an interactive pdb session,
+**do not redesign a subprocess/pty capture system**. Instead, monkey-patch
+`pdb.post_mortem` so it runs scripted commands and prints the output
+directly to stdout.
+
+Run this one-shot script from the docs directory (`docs/en` or `docs/zh_cn`):
+
+```bash
+cd docs/en   # or docs/zh_cn
+python -c "
+import pdb, sys, io, traceback
+
+# Hijack pdb.post_mortem to auto-run diagnostic commands
+_original_post_mortem = pdb.post_mortem
+
+def scripted_post_mortem(tb=None):
+    out = io.StringIO()
+    p = pdb.Pdb(stdout=out, stdin=io.StringIO(''))
+    p.use_rawinput = False
+    p.reset()
+    if tb is None:
+        tb = sys.exc_info()[2]
+    p.setup(None, tb)
+
+    old_stderr = sys.stderr
+    sys.stderr = out
+
+    # List all inner exceptions
+    p.onecmd('for i, exc in enumerate(exceptions): print(f\"[{i}] {type(exc).__name__}: {exc}\")')
+    # Print full traceback of the first TypeError (the real root cause)
+    p.onecmd('type_errs = [exc for exc in exceptions if type(exc).__name__ == \"TypeError\"]')
+    p.onecmd('if type_errs: import traceback; traceback.print_exception(type(type_errs[0]), type_errs[0], type_errs[0].__traceback__)')
+    p.do_quit('')
+
+    sys.stderr = old_stderr
+    print('=== PDB AUTOMATED OUTPUT ===')
+    print(out.getvalue())
+    print('=== END OUTPUT ===')
+
+pdb.post_mortem = scripted_post_mortem
+
+from sphinx.cmd.build import build_main
+build_main(['-b', 'html', '-vv', '-T', '-P', '.', '_build/html'])
+"
+```
+
+This runs the real Sphinx build with `-P`, but when pdb drops in it
+automatically executes your commands and dumps the results to stdout.
+You read the output exactly as if you had typed the pdb commands yourself.
+
+## What to look for in the automated output
+
+Sphinx autosummary raises `ImportExceptionGroup` when it cannot resolve a module.
+The **top-level message is often misleading** (e.g. `no module named xtuner.v1.ray.dataflow`).
+The real root cause is hidden inside the grouped exceptions.
+
+### Common root-cause patterns
+
+1. **TypeError about `__version__`**
+   ```
+   TypeError: expected string or bytes-like object, got '__version__'
+   ```
+   This happens when a mocked module (e.g. `torch`) is imported for real by an
+   intermediate library (e.g. `fla`) that calls `packaging.version.parse()` on
+   the mocked `__version__` object.
+
+   **Fix**: Add the intermediate library to `autodoc_mock_imports` in `conf.py`.
+   To identify the exact library, read the traceback: the frame just above
+   `packaging/version.py` is the culprit (e.g. `fla/utils.py`).
+
+   ```python
+   autodoc_mock_imports = [
+       ...
+       "fla",   # or whatever library triggers the real import
+   ]
+   ```
+
+2. **Missing-comma string-concatenation bug**
+
+   If `autodoc_mock_imports` contains an entry like `"scipytorchvision"`,
+   a comma is missing between two string literals in `conf.py`:
+
+   ```python
+   # WRONG
+   autodoc_mock_imports = [
+       ...
+       "scipy"
+       "torchvision",
+       ...
+   ]
+
+   # CORRECT
+   autodoc_mock_imports = [
+       ...
+       "scipy",
+       "torchvision",
+       ...
+   ]
+   ```
+
+3. **AttributeError after the TypeErrors**
+
+   ```
+   AttributeError: module 'xtuner.v1.ray' has no attribute 'dataflow'
+   ```
+   This is usually a *secondary* failure caused by the earlier `TypeError`s.
+   Fix the `TypeError` first, then rebuild.
+
+## Step-by-step workflow
+
+1. Run the automated pdb script (see "For agents" section above).
+2. Read the `=== PDB AUTOMATED OUTPUT ===` block.
+3. If you see `TypeError: expected string or bytes-like object, got '__version__'`,
+   read the traceback to find the intermediate library name.
+4. Add the identified library to `autodoc_mock_imports` in **both**
+   `docs/en/conf.py` and `docs/zh_cn/conf.py`.
+5. Also eyeball `autodoc_mock_imports` for missing commas while you are there.
+6. Re-run `make html` (without `-P`) to verify the fix.
diff --git a/.claude/skills/xtuner-sync-supported-models/SKILL.md b/.claude/skills/xtuner-sync-supported-models/SKILL.md
@@ -0,0 +1,123 @@
+---
+name: xtuner-sync-supported-models
+description: Synchronize xtuner's supported model documentation (docs/en/pretrain_sft/advanced_tutorial/model.md and docs/zh_cn/pretrain_sft/advanced_tutorial/model.md) with the actual Config classes defined under xtuner/v1/model/. Use when (1) new TransformerConfig, MoEConfig, or BaseComposeConfig subclasses are added, removed, or renamed in xtuner/v1/model/, (2) existing model configs change their inheritance hierarchy, scale, or HuggingFace counterpart, or (3) a code review or user request points out that model.md is out of sync with the codebase.
+---
+
+# Update XTuner Supported Model Docs
+
+Keep the English and Chinese `model.md` files synchronized with the actual Config classes in `xtuner/v1/model/`.
+
+## Scan the Codebase
+
+Run the bundled scan script from the xtuner project root to discover all Config classes and their inheritance:
+
+```bash
+python3 .agents/skills/xtuner-sync-supported-models/scripts/scan_model_configs.py
+```
+
+The script outputs JSON with two keys:
+- `configs`: list of every `*Config` class under `xtuner/v1/model/` with its parent classes and file path
+- `children`: parent-to-children mapping for the hierarchy tree
+
+## What to Update
+
+Compare the script output against the two files:
+- `docs/en/pretrain_sft/advanced_tutorial/model.md`
+- `docs/zh_cn/pretrain_sft/advanced_tutorial/model.md`
+
+Both files share the same structure and must stay in sync:
+
+1. **Base Config Classes** — configs that directly inherit from `TransformerConfig` (or `MoEConfig`) and provide a `from_hf` classmethod for loading HuggingFace weights
+2. **Concrete Model Configs** — fixed-scale subclasses of the base configs above
+3. **Compose Models** — multimodal configs that inherit from `BaseComposeConfig`
+4. **Inheritance Hierarchy** — a text tree showing the full `XTunerBaseModelConfig` hierarchy
+
+### Rules for the Base Config table
+
+Include these direct descendants of `TransformerConfig`/`MoEConfig`:
+- `Qwen2DenseConfig`
+- `Qwen3DenseConfig`
+- `DeepSeekV3Config`
+- `GptOssConfig`
+- `Qwen3MoEConfig`
+
+Exclude from the base table:
+- `MoEConfig` — it is an intermediate base class, not a usable model family
+- `Qwen3_5_VLTextMoEConfig` — it is an intermediate base with only one concrete child; its child `Qwen3_5_VLTextMoE35BA3BConfig` belongs under the MoE concrete table
+
+### Rules for the Concrete Model table
+
+Include every concrete subclass that has fixed parameter defaults. For each row note:
+- `Config Class`
+- `Base Class / Family`
+- `Architecture Type`: `Dense`, `MoE`, `Dense (VL backbone)`, `MoE (VL backbone)`
+- `Scale / Notes`: parameter count or total/activated size; for VL backbones note "for multimodal"
+
+`DeepSeekV3Config` appears here even though it has no separate base entry (it is both base and concrete).
+
+### Rules for the Compose Models section
+
+Include three sub-tables:
+1. **Compose Base Config Classes** — `Qwen3VLBaseConfig`, `InternVLBaseConfig`, `InternS1BaseConfig`
+   - `Qwen3VLBaseConfig`: VL model based on Qwen3 text backbone
+   - `InternVLBaseConfig`: VL model based on InternViT + Qwen3
+   - `InternS1BaseConfig`: Science multimodal model based on InternViT + Qwen3
+2. **Concrete Compose Model Configs** — every subclass of the above bases; for each row note the wrapped `Text Config` and scale
+
+### Rules for the Inheritance Hierarchy tree
+
+Rebuild the tree from `XTunerBaseModelConfig` with two top-level branches:
+
+```text
+XTunerBaseModelConfig
+├── TransformerConfig
+│   ├── Dense Models
+│   │   ├── Qwen2DenseConfig
+│   │   │   └── Qwen2Dense7BConfig
+│   │   └── Qwen3DenseConfig
+│   │       ├── Qwen3Dense8BConfig
+│   │       │   └── Qwen3VLTextDense8BConfig
+│   │       ├── Qwen3Dense4BConfig
+│   │       │   └── Qwen3VLTextDense4BConfig
+│   │       └── Qwen3Dense0P6BConfig
+│   └── MoE Models (via MoEConfig)
+│       ├── DeepSeekV3Config
+│       ├── GptOssConfig
+│       │   ├── GptOss21BA3P6Config
+│       │   └── GptOss117BA5P8Config
+│       ├── Qwen3MoEConfig
+│       │   ├── Qwen3MoE30BA3Config
+│       │   │   └── Qwen3VLTextMoE30BA3Config
+│       │   ├── Qwen3MoE235BA22Config
+│       │   │   └── Qwen3VLTextMoE235BA22Config
+│       │   └── Qwen3MoEFoPEConfig
+│       └── Qwen3_5_VLTextMoEConfig
+│           └── Qwen3_5_VLTextMoE35BA3BConfig
+└── BaseComposeConfig
+    ├── Qwen3VLBaseConfig
+    │   ├── Qwen3VLMoE30BA3Config
+    │   ├── Qwen3VLMoE235BA22Config
+    │   ├── Qwen3VLDense4BConfig
+    │   ├── Qwen3VLDense8BConfig
+    │   └── Qwen3_5_BaseConfig
+    │       └── Qwen3_5_VLMoE35BA3Config
+    ├── InternVLBaseConfig
+    │   ├── InternVL3P5Dense8BConfig
+    │   ├── InternVL3P5MoE30BA3Config
+    │   └── InternVL3P5Dense1BConfig
+    └── InternS1BaseConfig
+        ├── InternS1Config
+        └── InternS1MiniConfig
+```
+
+When new configs are added, insert them into the appropriate branch following the same indentation style.
+
+## Translation Notes
+
+Keep the Chinese `model.md` (`docs/zh_cn/...`) structurally identical to the English one. Translate:
+- Section headings
+- Table header cells
+- Description cells (e.g., "Image / Video + Text" → "图像/视频 + 文本")
+- Scale descriptions (e.g., "~7B parameters" → "约 7B 参数", "FoPE variant" → "FoPE 变体")
+
+Do **not** translate Config class names, file paths, or code identifiers.
diff --git a/.claude/skills/xtuner-sync-supported-models/scripts/scan_model_configs.py b/.claude/skills/xtuner-sync-supported-models/scripts/scan_model_configs.py
@@ -0,0 +1,52 @@
+#!/usr/bin/env python3
+"""Scan xtuner/v1/model for all Config classes and output model info as JSON."""
+
+import json
+import re
+import sys
+from pathlib import Path
+
+
+def scan_file(path: Path) -> list[dict[str, str | list[str]]]:
+    text = path.read_text()
+    # Match class definitions like: class FooConfig(BarConfig):
+    pattern = r"^class\s+(\w+Config)\s*\(([^)]+)\):"
+    results: list[dict[str, str | list[str]]] = []
+    for m in re.finditer(pattern, text, re.MULTILINE):
+        class_name = m.group(1)
+        parents = [p.strip() for p in m.group(2).split(",")]
+        results.append({"class": class_name, "parents": parents, "file": str(path)})
+    return results
+
+
+def main() -> None:
+    root = Path(sys.argv[1]) if len(sys.argv) > 1 else Path(".")
+    model_dir = root / "xtuner" / "v1" / "model"
+    if not model_dir.exists():
+        print(f"Model directory not found: {model_dir}", file=sys.stderr)
+        sys.exit(1)
+
+    all_configs = []
+    for py_file in sorted(model_dir.rglob("*.py")):
+        all_configs.extend(scan_file(py_file))
+
+    # Build parent -> children map
+    children: dict[str, list[str]] = {}
+    for cfg in all_configs:
+        for p in cfg["parents"]:
+            if p.endswith("Config"):
+                children.setdefault(p, []).append(cfg["class"])
+
+    # Deduplicate
+    for k in children:
+        children[k] = sorted(set(children[k]))
+
+    output = {
+        "configs": all_configs,
+        "children": children,
+    }
+    print(json.dumps(output, indent=2, ensure_ascii=False))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/docs/en/conf.py b/docs/en/conf.py
@@ -106,22 +106,21 @@
     "loguru",
     "datasets",
     "scikit-image",
-    "scipy"
+    "scipy",
     "torchvision",
     "xxhash",
     "timm",
     "imageio",
-    "torchvision",
     "httpx",
     "cv2",
     "addict",
-    "torchvision",
     "PIL",
     "uvicorn",
     "fastapi",
     "huggingface_hub",
     "more_itertools",
     "pyarrow",
+    "fla",
 ]