diff --git a/.github/scripts/end2end/configs/jaeger.yaml b/.github/scripts/end2end/configs/jaeger.yaml new file mode 100644 index 0000000000..b6b48a59d5 --- /dev/null +++ b/.github/scripts/end2end/configs/jaeger.yaml @@ -0,0 +1,64 @@ +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: jaeger + namespace: default + labels: + app: jaeger +spec: + replicas: 1 + selector: + matchLabels: + app: jaeger + template: + metadata: + labels: + app: jaeger + spec: + containers: + - name: jaeger + image: jaegertracing/all-in-one:1.76.0 + resources: + requests: + cpu: 100m + memory: 128Mi + limits: + memory: 512Mi + env: + - name: COLLECTOR_OTLP_ENABLED + value: "true" + - name: MEMORY_MAX_TRACES + value: "10000" + ports: + - containerPort: 16686 + name: query + - containerPort: 4317 + name: otlp-grpc + - containerPort: 4318 + name: otlp-http + readinessProbe: + httpGet: + path: / + port: 16686 + initialDelaySeconds: 5 + periodSeconds: 5 +--- +apiVersion: v1 +kind: Service +metadata: + name: jaeger + namespace: default +spec: + selector: + app: jaeger + ports: + - name: query + port: 16686 + targetPort: 16686 + - name: otlp-grpc + port: 4317 + targetPort: 4317 + - name: otlp-http + port: 4318 + targetPort: 4318 diff --git a/.github/scripts/end2end/configs/zenko.yaml b/.github/scripts/end2end/configs/zenko.yaml index ab771516b1..00535310c7 100644 --- a/.github/scripts/end2end/configs/zenko.yaml +++ b/.github/scripts/end2end/configs/zenko.yaml @@ -135,3 +135,7 @@ spec: - zenko-operator-image-pull veeamSosApi: enable: ${ZENKO_ENABLE_SOSAPI} + tracing: + enabled: true + samplingRatio: "0" + endpoint: "http://jaeger.default.svc.cluster.local:4318/v1/traces" diff --git a/.github/scripts/end2end/configure-e2e-ctst.sh b/.github/scripts/end2end/configure-e2e-ctst.sh index d3f7e0d033..12e089e8b7 100755 --- a/.github/scripts/end2end/configure-e2e-ctst.sh +++ b/.github/scripts/end2end/configure-e2e-ctst.sh @@ -111,3 +111,4 @@ kubectl run kafka-topics \ # Deploy PyKMIP server (infra only, does NOT patch the CR). # The CR is patched later, after file-backend SSE tests have run. bash "$(dirname "$0")/../mocks/setup-kmip.sh" + diff --git a/.github/scripts/end2end/install-kind-dependencies.sh b/.github/scripts/end2end/install-kind-dependencies.sh index 197b1c97a8..2d9fc43fd5 100755 --- a/.github/scripts/end2end/install-kind-dependencies.sh +++ b/.github/scripts/end2end/install-kind-dependencies.sh @@ -153,6 +153,9 @@ helm upgrade --install --version ${KEYCLOAK_VERSION} keycloak codecentric/keyclo kubectl rollout status sts/keycloak --timeout=10m +# jaeger all-in-one (OTLP collector + query UI, memory-only) +kubectl apply -f "$(dirname "$0")/configs/jaeger.yaml" +kubectl rollout status deployment/jaeger --timeout=5m # TODO: use zenko-operator install-deps kubectl apply -f - </dev/null | grep -q ":${JAEGER_QUERY_PORT}" && \ + ! lsof -i ":${JAEGER_QUERY_PORT}" &>/dev/null; then + kubectl port-forward "svc/jaeger" "${JAEGER_QUERY_PORT}:${JAEGER_QUERY_PORT}" &>/dev/null & + _JAEGER_PF_PID=$! + timeout 10 bash -c "until ss -tlnp 2>/dev/null | grep -q ':${JAEGER_QUERY_PORT}'; do sleep 0.2; done" + fi + export JAEGER_QUERY_ENDPOINT="http://localhost:${JAEGER_QUERY_PORT}" + + # Cloudserver internal port-forward — bypasses ingress so OTEL tests can + # inject a W3C traceparent header (which nginx ingress strips) + CLOUDSERVER_INTERNAL_PORT=8000 + if ! ss -tlnp 2>/dev/null | grep -q ":${CLOUDSERVER_INTERNAL_PORT}" && \ + ! lsof -i ":${CLOUDSERVER_INTERNAL_PORT}" &>/dev/null; then + kubectl port-forward "svc/${ZENKO_NAME}-connector-cloudserver" "${CLOUDSERVER_INTERNAL_PORT}:${CLOUDSERVER_INTERNAL_PORT}" &>/dev/null & + _CLOUDSERVER_INTERNAL_PF_PID=$! + timeout 10 bash -c "until ss -tlnp 2>/dev/null | grep -q ':${CLOUDSERVER_INTERNAL_PORT}'; do sleep 0.2; done" + fi + export INTERNAL_CLOUDSERVER_ENDPOINT="http://localhost:${CLOUDSERVER_INTERNAL_PORT}" + # --- 14. Zenko CR metadata --- export TIME_PROGRESSION_FACTOR=$(kubectl get zenko ${ZENKO_NAME} -o jsonpath="{.metadata.annotations.zenko\.io/time-progression-factor}") export INSTANCE_ID=$(kubectl get zenko ${ZENKO_NAME} -o jsonpath='{.status.instanceID}') @@ -338,7 +359,9 @@ else "DRAdminSecretKey":"${ADMIN_PRA_SECRET_ACCESS_KEY}", "UtilizationServiceHost":"${UTILIZATION_SERVICE_HOST}", "UtilizationServicePort":"${UTILIZATION_SERVICE_PORT}", - "KubeconfigPath":"${KUBECONFIG:-${HOME}/.kube/config}" + "KubeconfigPath":"${KUBECONFIG:-${HOME}/.kube/config}", + "JaegerQueryEndpoint":"${JAEGER_QUERY_ENDPOINT}", + "InternalCloudserverEndpoint":"${INTERNAL_CLOUDSERVER_ENDPOINT}" } EOF )" diff --git a/tests/functional/ctst/features/otel-tracing.feature b/tests/functional/ctst/features/otel-tracing.feature new file mode 100644 index 0000000000..5f5e12a532 --- /dev/null +++ b/tests/functional/ctst/features/otel-tracing.feature @@ -0,0 +1,13 @@ +@2.15.0 +@PreMerge +Feature: OpenTelemetry Tracing + Even when global sampling is disabled, a request carrying a W3C + traceparent header from a trusted (in-cluster) source must produce + a trace spanning all the Zenko services it touches. + + Scenario: PutObject with injected traceparent produces a trace spanning cloudserver and vault + Given a "Non versioned" bucket + When I put an object with an injected traceparent + Then the injected trace should be found in Jaeger + And the trace should contain spans from service "connector-cloudserver" + And the trace should contain spans from service "connector-vault" diff --git a/tests/functional/ctst/steps/otel-tracing.ts b/tests/functional/ctst/steps/otel-tracing.ts new file mode 100644 index 0000000000..408588ac03 --- /dev/null +++ b/tests/functional/ctst/steps/otel-tracing.ts @@ -0,0 +1,142 @@ +import { Then, When } from '@cucumber/cucumber'; +import { strict as assert } from 'assert'; +import { PutObjectCommand, S3Client } from '@aws-sdk/client-s3'; +import { randomBytes } from 'crypto'; +import { Identity, Utils } from 'cli-testing'; +import Zenko from 'world/Zenko'; + +const JAEGER_POLL_TIMEOUT = 30000; +const JAEGER_POLL_INTERVAL = 2000; +const TRACED_OBJECT_KEY = 'otel-trace-test-object'; + +interface JaegerTrace { + traceID: string; + spans: { processID: string }[]; + processes: Record; +} + +function generateTraceContext(): { traceparent: string; traceId: string } { + const traceId = randomBytes(16).toString('hex'); + const spanId = randomBytes(8).toString('hex'); + return { traceparent: `00-${traceId}-${spanId}-01`, traceId }; +} + +function buildInternalS3Client(endpoint: string, traceparent: string): S3Client { + const credentials = Identity.getCurrentCredentials(); + const client = new S3Client({ + region: 'us-east-1', + endpoint, + credentials: { + accessKeyId: credentials.accessKeyId, + secretAccessKey: credentials.secretAccessKey, + }, + forcePathStyle: true, + }); + // traceparent is not part of any SigV4 signed-header set, so injecting at + // the 'build' step (pre-signing) does not invalidate the signature. + client.middlewareStack.add( + next => async args => { + const request = args.request as { headers: Record }; + request.headers.traceparent = traceparent; + return next(args); + }, + { step: 'build', name: 'injectTraceparent' }, + ); + return client; +} + +async function fetchTraceById(endpoint: string, traceId: string): Promise { + const response = await fetch(`${endpoint}/api/traces/${traceId}`, { + signal: AbortSignal.timeout(5000), + }); + if (response.status === 404) { + return null; + } + if (!response.ok) { + throw new Error(`Jaeger query returned HTTP ${response.status}`); + } + const body = await response.json() as { data: JaegerTrace[] }; + return body.data?.[0] ?? null; +} + +async function pollJaegerForTrace( + endpoint: string, + traceId: string, + timeoutMs = JAEGER_POLL_TIMEOUT, + intervalMs = JAEGER_POLL_INTERVAL, +): Promise { + const deadline = Date.now() + timeoutMs; + let lastError: Error | null = null; + + while (Date.now() < deadline) { + try { + const trace = await fetchTraceById(endpoint, traceId); + if (trace) { + return trace; + } + } catch (err) { + lastError = err as Error; + } + await Utils.sleep(intervalMs); + } + + throw new Error( + `pollJaegerForTrace timed out after ${timeoutMs}ms waiting for trace ${traceId}` + + `${lastError ? `: ${lastError.message}` : ''}`, + ); +} + +function traceHasSpansFromService(trace: JaegerTrace, serviceName: string): boolean { + const processIds = Object.entries(trace.processes) + .filter(([, proc]) => proc.serviceName === serviceName) + .map(([id]) => id); + + return trace.spans.some(span => processIds.includes(span.processID)); +} + +When('I put an object with an injected traceparent', + async function (this: Zenko) { + const endpoint = this.parameters.InternalCloudserverEndpoint; + assert.ok(endpoint, 'InternalCloudserverEndpoint missing from world parameters'); + const bucketName = this.getSaved('bucketName'); + assert.ok(bucketName, 'No bucketName saved from a previous step'); + + const { traceparent, traceId } = generateTraceContext(); + const client = buildInternalS3Client(endpoint, traceparent); + await client.send(new PutObjectCommand({ + Bucket: bucketName, + Key: TRACED_OBJECT_KEY, + Body: 'otel-trace-payload', + })); + + this.addToSaved('jaegerTraceId', traceId); + }, +); + +Then('the injected trace should be found in Jaeger', + { timeout: JAEGER_POLL_TIMEOUT + 10000 }, + async function (this: Zenko) { + const endpoint = this.parameters.JaegerQueryEndpoint; + assert.ok(endpoint, 'JaegerQueryEndpoint missing from world parameters'); + const traceId = this.getSaved('jaegerTraceId'); + assert.ok(traceId, 'No jaegerTraceId saved from a previous step'); + + const trace = await pollJaegerForTrace(endpoint, traceId); + this.addToSaved('jaegerTrace', trace); + }, +); + +Then('the trace should contain spans from service {string}', + async function (this: Zenko, service: string) { + const trace = this.getSaved('jaegerTrace'); + assert.ok(trace, 'No trace saved from the previous step'); + + assert.ok( + traceHasSpansFromService(trace, service), + `Trace ${trace.traceID} does not contain spans from service "${service}". ` + + `Services in trace: ${[...new Set( + Object.values(trace.processes).map(p => p.serviceName), + )].join(', ')}`, + ); + }, +); diff --git a/tests/functional/ctst/world/Zenko.ts b/tests/functional/ctst/world/Zenko.ts index 6b56a1ab38..1b2940bb07 100644 --- a/tests/functional/ctst/world/Zenko.ts +++ b/tests/functional/ctst/world/Zenko.ts @@ -103,6 +103,8 @@ export interface ZenkoWorldParameters extends ClientOptions { SorbetdRestoreTimeout: string; UtilizationServiceHost: string; UtilizationServicePort: string; + JaegerQueryEndpoint: string; + InternalCloudserverEndpoint: string; [key: string]: unknown; }