Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -160,11 +160,12 @@ tests:
env:
AWS_SECUREBOOT_ENABLED: "true"
BASE_DOMAIN: edge-sro.rhecoeng.com
MERGE_STAGE_REGISTRY_CREDENTIALS: "true"
NVIDIAGPU_GPU_CLUSTER_POLICY_PATCH: '[{"op": "add", "path": "/spec/driver/usePrecompiled",
"value": true}, {"op": "add", "path": "/spec/driver/repository", "value":
"quay.io/jcastillolema"}, {"op": "add", "path": "/spec/driver/image", "value":
"gpu-driver-rhel9"}, {"op": "add", "path": "/spec/driver/version", "value":
"580.159.03"}]'
"registry.stage.redhat.io/nvidia"}, {"op": "add", "path": "/spec/driver/image",
"value": "gpu-driver-rhel9"}, {"op": "add", "path": "/spec/driver/version",
"value": "580.159.03"}]'
NVIDIAGPU_SUBSCRIPTION_CHANNEL: v26.3
workflow: nvidia-gpu-operator-e2e-aws
zz_generated_metadata:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ workflow:
- chain: ipi-install
test:
- ref: aws-secureboot-verify
- ref: nvidia-gpu-operator-merge-stage-credentials
- as: gpu-operator-e2e
commands: make run-tests
from: nvidia-ci
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
approvers:
- empovit
- fabiendupont
- ggordanired
- josecastillolema
- TomerNewman
- wabouhamad
- ybettan
options: {}
reviewers:
- empovit
- fabiendupont
- ggordanired
- josecastillolema
- TomerNewman
- wabouhamad
- ybettan
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#!/bin/bash

set -euo pipefail

if [[ "${MERGE_STAGE_REGISTRY_CREDENTIALS}" != "true" ]]; then
echo "MERGE_STAGE_REGISTRY_CREDENTIALS is not 'true', skipping."
exit 0
fi

STAGE_REGISTRY_PATH="/var/run/vault/mirror-registry/registry_stage.json"

if [[ ! -f "${STAGE_REGISTRY_PATH}" ]]; then
echo "Stage registry credentials not found at ${STAGE_REGISTRY_PATH}"
exit 1
fi

echo "Extracting current cluster pull secret..."
oc extract secret/pull-secret -n openshift-config --confirm --to /tmp

echo "Merging registry.stage.redhat.io credentials..."
[[ $- == *x* ]] && WAS_TRACING=true || WAS_TRACING=false
set +x
stage_auth_user=$(jq -r '.user' "${STAGE_REGISTRY_PATH}")
stage_auth_password=$(jq -r '.password' "${STAGE_REGISTRY_PATH}")
stage_registry_auth=$(echo -n "${stage_auth_user}:${stage_auth_password}" | base64 -w 0)
$WAS_TRACING && set -x

jq --argjson stage "{\"registry.stage.redhat.io\": {\"auth\": \"${stage_registry_auth}\"}}" \
'.auths |= . + $stage' /tmp/.dockerconfigjson > /tmp/new-dockerconfigjson
Comment thread
coderabbitai[bot] marked this conversation as resolved.

echo "Updating cluster pull secret..."
oc set data secret/pull-secret -n openshift-config \
--from-file=.dockerconfigjson=/tmp/new-dockerconfigjson

echo "Waiting for MCP worker pool to propagate..."
total=$(oc get mcp worker -o jsonpath='{.status.machineCount}')
COUNTER=0
while [ $COUNTER -lt 600 ]; do
sleep 20
COUNTER=$((COUNTER + 20))
updated=$(oc get mcp worker -o jsonpath='{.status.updatedMachineCount}')
echo "MCP rollout: ${updated}/${total} machines updated (${COUNTER}s elapsed)"
if [[ "${updated}" == "${total}" ]]; then
echo "MCP rollout complete."
exit 0
fi
done
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Outdated

echo "MCP rollout timed out after ${COUNTER}s"
oc get mcp worker -o yaml
exit 1
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Outdated
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{
"path": "nvidia-gpu-operator/merge-stage-credentials/nvidia-gpu-operator-merge-stage-credentials-ref.yaml",
"owners": {
"approvers": [
"empovit",
"fabiendupont",
"ggordanired",
"josecastillolema",
"TomerNewman",
"wabouhamad",
"ybettan"
],
"reviewers": [
"empovit",
"fabiendupont",
"ggordanired",
"josecastillolema",
"TomerNewman",
"wabouhamad",
"ybettan"
]
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
ref:
as: nvidia-gpu-operator-merge-stage-credentials
from: cli
cli: latest
grace_period: 10m
commands: nvidia-gpu-operator-merge-stage-credentials-commands.sh
timeout: 10m
resources:
requests:
cpu: 100m
memory: 100Mi
env:
- name: MERGE_STAGE_REGISTRY_CREDENTIALS
default: "false"
documentation: |-
When set to "true", merges registry.stage.redhat.io credentials into
the cluster global pull secret. Required for pulling precompiled driver
images from the Red Hat staging registry. When "false", the step is a
no-op.
credentials:
- namespace: test-credentials
name: openshift-custom-mirror-registry
mount_path: /var/run/vault/mirror-registry
documentation: |-
Merges registry.stage.redhat.io pull credentials into the cluster global
pull secret so that the GPU operator can pull precompiled driver images
from the Red Hat staging registry.

Controlled by the MERGE_STAGE_REGISTRY_CREDENTIALS env var. When set to
"false" (default), the step exits immediately as a no-op.