diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 50722fa..8a826cd 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -14,6 +14,7 @@ jobs: strategy: matrix: python-version: ["3.11"] + ray-version: ["latest", "2.55.0"] timeout-minutes: 10 steps: - name: Config mirrors @@ -25,36 +26,43 @@ jobs: - name: Checkout code uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - - name: Build Python ${{ matrix.python-version }} wheel + - name: Build Python ${{ matrix.python-version }} wheel (Ray ${{ matrix.ray-version }}) run: | echo "Building for Python ${{ matrix.python-version }}..." pip${{ matrix.python-version }} install build pip${{ matrix.python-version }} install -r requirements.txt python${{ matrix.python-version }} -m build --wheel pip${{ matrix.python-version }} install "$(ls dist/*.whl)[test,yr]" --force-reinstall + if [ "${{ matrix.ray-version }}" != "latest" ]; then + pip${{ matrix.python-version }} install "ray[default]==${{ matrix.ray-version }}" --force-reinstall + fi - name: Run tests for Python ${{ matrix.python-version }} run: | echo "Running tests for Python ${{ matrix.python-version }}..." python${{ matrix.python-version }} -m pytest -v tests/ - name: Install etcd + if: matrix.ray-version == 'latest' run: | apt-get update && apt-get install -y curl curl -s -L https://mirrors.huaweicloud.com/etcd/v3.6.5/etcd-v3.6.5-linux-arm64.tar.gz -o etcd-v3.6.5-linux-arm64.tar.gz tar -xzf etcd-v3.6.5-linux-arm64.tar.gz mv etcd-v3.6.5-linux-arm64/etcd /usr/local/bin/ - name: Install openyuanrong-datasystem + if: matrix.ray-version == 'latest' run: | pip${{ matrix.python-version }} install "openyuanrong-datasystem>=0.8.0" - name: Run performance test (etcd mode) + if: matrix.ray-version == 'latest' run: | echo "start ray cluster" ray start --head echo "Running direct_transport_perftest.py (etcd mode)" - python${{ matrix.python-version }} tests/benchmarks/direct_transport_perftest.py --backend=yr --placement=local --device=npu --init-mode=etcd + python${{ matrix.python-version }} tests/benchmarks/direct_transport_perftest.py --backend=yr --placement=local --device=cpu --init-mode=etcd echo "stop ray cluster" ray stop - name: Run performance test (metastore mode) + if: matrix.ray-version == 'latest' run: | echo "start ray cluster" ray start --head diff --git a/ray_ascend/__init__.py b/ray_ascend/__init__.py index 2bb2603..0309265 100644 --- a/ray_ascend/__init__.py +++ b/ray_ascend/__init__.py @@ -157,10 +157,11 @@ def __init__(self): def transfer_npu_tensor_via_hccs(self): return torch.tensor([1, 2, 3]).npu() """ + register_hccl_collective_backend() + import torch from ray.experimental import register_tensor_transport from .direct_transport.hccl_tensor_transport import HCCLTensorTransport - register_hccl_collective_backend() register_tensor_transport("HCCL", ["npu"], HCCLTensorTransport, torch.Tensor) diff --git a/tests/test_backward_compat.py b/tests/test_backward_compat.py new file mode 100644 index 0000000..16bfc15 --- /dev/null +++ b/tests/test_backward_compat.py @@ -0,0 +1,35 @@ +"""Verify backward compatibility of registration functions on Ray < 2.56. + +These tests use mock to simulate the absence of +`ray.util.collective.backend_registry` (which was introduced in Ray 2.56), +ensuring that both registration functions raise RuntimeError with a clear +upgrade message instead of failing silently or crashing. +""" + +from unittest.mock import patch + +import pytest + +from ray_ascend import register_hccl_collective_backend, register_hccl_tensor_transport + + +class TestBackwardCompatibility: + """Verify registration functions raise RuntimeError on Ray < 2.56.""" + + @patch.dict("sys.modules", {"ray.util.collective.backend_registry": None}) + def test_register_hccl_collective_backend_raises_on_old_ray(self): + """register_hccl_collective_backend should raise RuntimeError when + backend_registry module is unavailable (Ray < 2.56).""" + with pytest.raises(RuntimeError, match="requires Ray >= 2.56"): + register_hccl_collective_backend() + + @patch.dict("sys.modules", {"ray.util.collective.backend_registry": None}) + def test_register_hccl_tensor_transport_raises_on_old_ray(self): + """register_hccl_tensor_transport should raise RuntimeError when + backend_registry module is unavailable (Ray < 2.56). + + It calls register_hccl_collective_backend first, so it inherits + the same RuntimeError behavior. + """ + with pytest.raises(RuntimeError, match="requires Ray >= 2.56"): + register_hccl_tensor_transport()