diff --git a/playbooks/supplemental/pytorch-kernels/README.md b/playbooks/supplemental/pytorch-kernels/README.md index 1056bb36..b7467d6c 100644 --- a/playbooks/supplemental/pytorch-kernels/README.md +++ b/playbooks/supplemental/pytorch-kernels/README.md @@ -136,7 +136,7 @@ source rocm-env/bin/activate -```bash +```powershell python -m venv rocm-env rocm-env\Scripts\activate ``` @@ -150,46 +150,64 @@ rocm-env\Scripts\activate ```bash source ~/rocm-env/bin/activate -pip install --upgrade pip setuptools wheel -pip install --index-url https://rocm.nightlies.amd.com/v2/gfx1151/ "rocm[libraries,devel]" -# sudo reboot +python -m pip install --upgrade pip setuptools wheel -source ~/rocm-env/bin/activate +# Install PyTorch first. This pins the compatible ROCm runtime/library version. +python -m pip install --pre --index-url https://rocm.nightlies.amd.com/v2/gfx1151/ torch==2.10.0 torchaudio torchvision -pip install --pre --index-url https://rocm.nightlies.amd.com/v2/gfx1151/ torch==2.10.0 torchaudio torchvision +# Install ROCm devel packages matching the ROCm version pulled in by PyTorch. +ROCM_VERSION="$(python -c 'import importlib.metadata as m; print(m.version("rocm"))')" +python -m pip install --index-url https://rocm.nightlies.amd.com/v2/gfx1151/ "rocm[libraries,devel]==${ROCM_VERSION}" + +# Verify installed package versions +python -m pip list | grep -E '^(rocm|rocm-sdk|torch|torchvision|torchaudio)' || true ``` -```bash +```powershell rocm-env\Scripts\activate -pip install --upgrade pip setuptools wheel -pip install --index-url https://rocm.nightlies.amd.com/v2/gfx1151/ "rocm[libraries,devel]" -# Reboot +python -m pip install --upgrade pip setuptools wheel -# Open a Powershell terminal and activate Visual Studio environment -cmd /c '"C:\Program Files\Microsoft Visual Studio\2022\Community\VC\Auxiliary\Build\vcvars64.bat" >nul 2>&1 && set' | ForEach-Object { if ($_ -match '^([^=]+)=(.*)$') { [System.Environment]::SetEnvironmentVariable($matches[1], $matches[2], 'Process') } } +# Install PyTorch first. This pins the compatible ROCm runtime/library version. +python -m pip install --pre --index-url https://rocm.nightlies.amd.com/v2/gfx1151/ torch==2.10.0 torchaudio torchvision -rocm-env\Scripts\activate +# Install ROCm devel packages matching the ROCm version pulled in by PyTorch. +$RocmVersion = (.\rocm-env\Scripts\python.exe -c "import importlib.metadata as m; print(m.version('rocm'))").Trim() +Write-Host "Installing ROCm devel package matching rocm==$RocmVersion" +python -m pip install --index-url https://rocm.nightlies.amd.com/v2/gfx1151/ "rocm[libraries,devel]==$RocmVersion" + +# Verify installed package versions +python -m pip list | Select-String "rocm|torch|torchvision|torchaudio" +``` + -pip install --pre --index-url https://rocm.nightlies.amd.com/v2/gfx1151/ torch==2.10.0 torchaudio torchvision + +Open a new Powershell terminal and activate Visual Studio environment: +```powershell +cmd /c '"C:\Program Files\Microsoft Visual Studio\2022\Community\VC\Auxiliary\Build\vcvars64.bat" >nul 2>&1 && set' | ForEach-Object { if ($_ -match '^([^=]+)=(.*)$') { [System.Environment]::SetEnvironmentVariable($matches[1], $matches[2], 'Process') } } ``` #### Set Environment Variables ```bash +source rocm-env/bin/activate + rocm-sdk init # Initialize the devel libraries -export ROCM_HOME="$VIRTUAL_ENV/lib/python3.12/site-packages/_rocm_sdk_devel" +PY_MM="$(python -c 'import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")')" +export ROCM_HOME="$VIRTUAL_ENV/lib/python${PY_MM}/site-packages/_rocm_sdk_devel" export LD_LIBRARY_PATH="$ROCM_HOME/lib:$LD_LIBRARY_PATH" export PATH="$ROCM_HOME/bin:$PATH" ``` -```bash +```powershell +rocm-env\Scripts\activate + rocm-sdk init # Initialize the devel libraries $ROCM_ROOT = (rocm-sdk path --root).Trim() @@ -206,6 +224,7 @@ $env:PATH = (($RocmPathEntries + @($env:PATH)) -join ";") $env:ROCM_HOME = $ROCM_ROOT $env:HIP_PATH = $ROCM_ROOT +$env:ROCM_BIN = $ROCM_BIN $env:HIP_PLATFORM = "amd" # Set compiler and build settings @@ -237,9 +256,15 @@ fi source "$VENV/bin/activate" -pip install --upgrade pip setuptools wheel -pip install --index-url https://rocm.nightlies.amd.com/v2/gfx1151/ "rocm[libraries,devel]" -pip install --pre --index-url https://rocm.nightlies.amd.com/v2/gfx1151/ torch==2.10.0 torchaudio torchvision +python -m pip install --upgrade pip setuptools wheel +# Install PyTorch first. This pins the compatible ROCm runtime/library version. +python -m pip install --pre --index-url https://rocm.nightlies.amd.com/v2/gfx1151/ torch==2.10.0 torchaudio torchvision +# Install the matching ROCm devel package for the ROCm version pulled in by torch. +ROCM_VERSION="$(python -c 'import importlib.metadata as m; print(m.version("rocm"))')" +echo "Installing ROCm devel package matching rocm==$ROCM_VERSION" +python -m pip install --index-url https://rocm.nightlies.amd.com/v2/gfx1151/ "rocm[libraries,devel]==${ROCM_VERSION}" +echo "Installed ROCm/PyTorch packages:" +python -m pip list | grep -E '^(rocm|rocm-sdk|torch|torchvision|torchaudio)' || true rocm-sdk init @@ -255,7 +280,6 @@ hipcc --version >/dev/null rocminfo >/dev/null python - <<'PY' -import sys import torch print("torch:", torch.__version__) @@ -297,9 +321,15 @@ $Python = Join-Path $Venv "Scripts\python.exe" if (-not (Test-Path $Python)) {throw "Missing venv at $Venv. Run the setup steps first."} -pip install --upgrade pip setuptools wheel -pip install --index-url https://rocm.nightlies.amd.com/v2/gfx1151/ "rocm[libraries,devel]" -pip install --pre --index-url https://rocm.nightlies.amd.com/v2/gfx1151/ torch==2.10.0 torchaudio torchvision +& $Python -m pip install --upgrade pip setuptools wheel +# Install PyTorch first. This pins the compatible ROCm runtime/library version. +& $Python -m pip install --pre --index-url https://rocm.nightlies.amd.com/v2/gfx1151/ torch==2.10.0 torchaudio torchvision +# Install the matching ROCm devel package for the ROCm version pulled in by torch. +$RocmVersion = (& $Python -c "import importlib.metadata as m; print(m.version('rocm'))").Trim() +Write-Host "Installing ROCm devel package matching rocm==$RocmVersion" +& $Python -m pip install --index-url https://rocm.nightlies.amd.com/v2/gfx1151/ "rocm[libraries,devel]==$RocmVersion" +Write-Host "Installed ROCm/PyTorch packages:" +& $Python -m pip list | Select-String "rocm|torch|torchvision|torchaudio" $RocmSdk = Join-Path $Venv "Scripts\rocm-sdk.exe" if (-not (Test-Path $RocmSdk)) {throw "Missing rocm-sdk.exe at $RocmSdk. Run the setup steps first."} @@ -309,10 +339,18 @@ $ROCM_ROOT = (& $RocmSdk path --root).Trim() $ROCM_BIN = (& $RocmSdk path --bin).Trim() $ExpectedHiprtc = Join-Path $ROCM_BIN "hiprtc0701.dll" -$ActualHiprtc = Join-Path $ROCM_BIN "hiprtc07013.dll" -if ((-not (Test-Path $ExpectedHiprtc)) -and (Test-Path $ActualHiprtc)) { - Copy-Item $ActualHiprtc $ExpectedHiprtc -Force - Write-Host "Created HIPRTC compatibility copy: $ExpectedHiprtc" +$ActualHiprtc = Get-ChildItem -Path $ROCM_BIN -Filter "hiprtc*.dll" -ErrorAction SilentlyContinue | + Where-Object { $_.Name -notlike "hiprtc-builtins*" -and $_.Name -ne "hiprtc0701.dll" } | + Sort-Object Name -Descending | + Select-Object -First 1 +if ((-not (Test-Path $ExpectedHiprtc)) -and $ActualHiprtc) { + Copy-Item $ActualHiprtc.FullName $ExpectedHiprtc -Force + Write-Host "Created HIPRTC compatibility copy: $ExpectedHiprtc from $($ActualHiprtc.Name)" +} +if (-not (Test-Path $ExpectedHiprtc)) { + Write-Host "Available HIPRTC DLLs:" + Get-ChildItem -Path $ROCM_BIN -Filter "hiprtc*.dll" -ErrorAction SilentlyContinue | Select-Object FullName | Out-Host + throw "Missing $ExpectedHiprtc and no compatible hiprtc*.dll was found to copy." } $RocmPathEntries = @( @@ -325,6 +363,7 @@ $env:PATH = (($RocmPathEntries + @($env:PATH)) -join ";") $env:ROCM_HOME = $ROCM_ROOT $env:HIP_PATH = $ROCM_ROOT +$env:ROCM_BIN = $ROCM_BIN $env:HIP_PLATFORM = "amd" $env:CC = "clang-cl" $env:CXX = "clang-cl" @@ -367,6 +406,21 @@ if not torch.cuda.is_available(): raise SystemExit("torch.cuda.is_available() is False. AMD GPU is not available through HIP.") print("Device:", torch.cuda.get_device_name(0)) + +kernel_source = r""" +extern "C" +__global__ void noop(float* data) { + data[0] = data[0] + 1.0f; +} +""" +kernel = torch.cuda._compile_kernel(kernel_source, "noop") +x = torch.zeros(1, dtype=torch.float32, device="cuda") +kernel(grid=(1, 1, 1), block=(1, 1, 1), args=[x]) +torch.cuda.synchronize() +if abs(x.item() - 1.0) > 1e-6: + raise SystemExit("HIPRTC/JIT sanity check failed.") +print("OK: HIPRTC JIT compilation is ready") + print("OK: ROCm PyTorch environment is ready") '@ @@ -531,10 +585,18 @@ $ROCM_ROOT = (& $RocmSdk path --root).Trim() $ROCM_BIN = (& $RocmSdk path --bin).Trim() $ExpectedHiprtc = Join-Path $ROCM_BIN "hiprtc0701.dll" -$ActualHiprtc = Join-Path $ROCM_BIN "hiprtc07013.dll" -if ((-not (Test-Path $ExpectedHiprtc)) -and (Test-Path $ActualHiprtc)) { - Copy-Item $ActualHiprtc $ExpectedHiprtc -Force - Write-Host "Created HIPRTC compatibility copy: $ExpectedHiprtc" +$ActualHiprtc = Get-ChildItem -Path $ROCM_BIN -Filter "hiprtc*.dll" -ErrorAction SilentlyContinue | + Where-Object { $_.Name -notlike "hiprtc-builtins*" -and $_.Name -ne "hiprtc0701.dll" } | + Sort-Object Name -Descending | + Select-Object -First 1 +if ((-not (Test-Path $ExpectedHiprtc)) -and $ActualHiprtc) { + Copy-Item $ActualHiprtc.FullName $ExpectedHiprtc -Force + Write-Host "Created HIPRTC compatibility copy: $ExpectedHiprtc from $($ActualHiprtc.Name)" +} +if (-not (Test-Path $ExpectedHiprtc)) { + Write-Host "Available HIPRTC DLLs:" + Get-ChildItem -Path $ROCM_BIN -Filter "hiprtc*.dll" -ErrorAction SilentlyContinue | Select-Object FullName | Out-Host + throw "Missing $ExpectedHiprtc and no compatible hiprtc*.dll was found to copy." } $RocmPathEntries = @( @@ -1054,10 +1116,18 @@ $ROCM_ROOT = (& $RocmSdk path --root).Trim() $ROCM_BIN = (& $RocmSdk path --bin).Trim() $ExpectedHiprtc = Join-Path $ROCM_BIN "hiprtc0701.dll" -$ActualHiprtc = Join-Path $ROCM_BIN "hiprtc07013.dll" -if ((-not (Test-Path $ExpectedHiprtc)) -and (Test-Path $ActualHiprtc)) { - Copy-Item $ActualHiprtc $ExpectedHiprtc -Force - Write-Host "Created HIPRTC compatibility copy: $ExpectedHiprtc" +$ActualHiprtc = Get-ChildItem -Path $ROCM_BIN -Filter "hiprtc*.dll" -ErrorAction SilentlyContinue | + Where-Object { $_.Name -notlike "hiprtc-builtins*" -and $_.Name -ne "hiprtc0701.dll" } | + Sort-Object Name -Descending | + Select-Object -First 1 +if ((-not (Test-Path $ExpectedHiprtc)) -and $ActualHiprtc) { + Copy-Item $ActualHiprtc.FullName $ExpectedHiprtc -Force + Write-Host "Created HIPRTC compatibility copy: $ExpectedHiprtc from $($ActualHiprtc.Name)" +} +if (-not (Test-Path $ExpectedHiprtc)) { + Write-Host "Available HIPRTC DLLs:" + Get-ChildItem -Path $ROCM_BIN -Filter "hiprtc*.dll" -ErrorAction SilentlyContinue | Select-Object FullName | Out-Host + throw "Missing $ExpectedHiprtc and no compatible hiprtc*.dll was found to copy." } $RocmPathEntries = @(