Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ jobs:
strategy:
matrix:
python-version: ["3.11"]
ray-version: ["latest", "2.55.0"]
timeout-minutes: 10
steps:
- name: Config mirrors
Expand All @@ -25,36 +26,43 @@ jobs:

- name: Checkout code
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Build Python ${{ matrix.python-version }} wheel
- name: Build Python ${{ matrix.python-version }} wheel (Ray ${{ matrix.ray-version }})
run: |
echo "Building for Python ${{ matrix.python-version }}..."
pip${{ matrix.python-version }} install build
pip${{ matrix.python-version }} install -r requirements.txt
python${{ matrix.python-version }} -m build --wheel
pip${{ matrix.python-version }} install "$(ls dist/*.whl)[test,yr]" --force-reinstall
if [ "${{ matrix.ray-version }}" != "latest" ]; then
pip${{ matrix.python-version }} install "ray[default]==${{ matrix.ray-version }}" --force-reinstall
fi
- name: Run tests for Python ${{ matrix.python-version }}
run: |
echo "Running tests for Python ${{ matrix.python-version }}..."
python${{ matrix.python-version }} -m pytest -v tests/
- name: Install etcd
if: matrix.ray-version == 'latest'
run: |
apt-get update && apt-get install -y curl
curl -s -L https://mirrors.huaweicloud.com/etcd/v3.6.5/etcd-v3.6.5-linux-arm64.tar.gz -o etcd-v3.6.5-linux-arm64.tar.gz
tar -xzf etcd-v3.6.5-linux-arm64.tar.gz
mv etcd-v3.6.5-linux-arm64/etcd /usr/local/bin/
- name: Install openyuanrong-datasystem
if: matrix.ray-version == 'latest'
run: |
pip${{ matrix.python-version }} install "openyuanrong-datasystem>=0.8.0"
- name: Run performance test (etcd mode)
if: matrix.ray-version == 'latest'
run: |
echo "start ray cluster"
ray start --head
echo "Running direct_transport_perftest.py (etcd mode)"
python${{ matrix.python-version }} tests/benchmarks/direct_transport_perftest.py --backend=yr --placement=local --device=npu --init-mode=etcd
python${{ matrix.python-version }} tests/benchmarks/direct_transport_perftest.py --backend=yr --placement=local --device=cpu --init-mode=etcd
echo "stop ray cluster"
ray stop

- name: Run performance test (metastore mode)
if: matrix.ray-version == 'latest'
run: |
echo "start ray cluster"
ray start --head
Expand Down
3 changes: 2 additions & 1 deletion ray_ascend/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,10 +157,11 @@ def __init__(self):
def transfer_npu_tensor_via_hccs(self):
return torch.tensor([1, 2, 3]).npu()
"""
register_hccl_collective_backend()

Comment on lines +160 to +161

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unnecessary changes?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

make register_hccl_tensor_transport raises RuntimeError like register_hccl_collective_backend instead of ImportedError.

import torch
from ray.experimental import register_tensor_transport

from .direct_transport.hccl_tensor_transport import HCCLTensorTransport

register_hccl_collective_backend()
register_tensor_transport("HCCL", ["npu"], HCCLTensorTransport, torch.Tensor)
35 changes: 35 additions & 0 deletions tests/test_backward_compat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
"""Verify backward compatibility of registration functions on Ray < 2.56.

These tests use mock to simulate the absence of
`ray.util.collective.backend_registry` (which was introduced in Ray 2.56),
ensuring that both registration functions raise RuntimeError with a clear
upgrade message instead of failing silently or crashing.
"""

from unittest.mock import patch

import pytest

from ray_ascend import register_hccl_collective_backend, register_hccl_tensor_transport


class TestBackwardCompatibility:
"""Verify registration functions raise RuntimeError on Ray < 2.56."""

@patch.dict("sys.modules", {"ray.util.collective.backend_registry": None})
def test_register_hccl_collective_backend_raises_on_old_ray(self):
"""register_hccl_collective_backend should raise RuntimeError when
backend_registry module is unavailable (Ray < 2.56)."""
with pytest.raises(RuntimeError, match="requires Ray >= 2.56"):
register_hccl_collective_backend()

@patch.dict("sys.modules", {"ray.util.collective.backend_registry": None})
def test_register_hccl_tensor_transport_raises_on_old_ray(self):
"""register_hccl_tensor_transport should raise RuntimeError when
backend_registry module is unavailable (Ray < 2.56).

It calls register_hccl_collective_backend first, so it inherits
the same RuntimeError behavior.
"""
with pytest.raises(RuntimeError, match="requires Ray >= 2.56"):
register_hccl_tensor_transport()
Loading