From 6b9d9bd452e1150e85e35eb51b08b7b0387825c5 Mon Sep 17 00:00:00 2001 From: Daniil Antoshin Date: Mon, 22 Jun 2026 19:13:42 +0200 Subject: [PATCH 1/2] fix(dra): support Deckhouse GPU attributes --- pkg/virt-controller/watch/dra/dra.go | 63 +++++++++++++--- pkg/virt-controller/watch/dra/dra_test.go | 92 ++++++++++++++++++++--- 2 files changed, 137 insertions(+), 18 deletions(-) diff --git a/pkg/virt-controller/watch/dra/dra.go b/pkg/virt-controller/watch/dra/dra.go index 4961aabb523e..73775f4de94c 100644 --- a/pkg/virt-controller/watch/dra/dra.go +++ b/pkg/virt-controller/watch/dra/dra.go @@ -60,9 +60,14 @@ const ( deleteNotifFailed = "Failed to process delete notification" tombstoneGetObjectErrFmt = "couldn't get object from tombstone %+v" - indexByNodeName = "byNodeName" - PCIAddressDeviceAttributeKey = "resource.kubernetes.io/pcieRoot" - MDevUUIDDeviceAttributeKey = "resource.kubernetes.io/mDevUUID" + indexByNodeName = "byNodeName" + PCIAddressDeviceAttributeKey = "resource.kubernetes.io/pcieRoot" + MDevUUIDDeviceAttributeKey = "resource.kubernetes.io/mDevUUID" + DeckhouseGPUPCIAddressAttributeKey = "gpu.deckhouse.io/pciAddress" + DeckhouseGPUDeviceTypeAttributeKey = "gpu.deckhouse.io/deviceType" + DeckhouseGPUSharingStrategyAttributeKey = "gpu.deckhouse.io/sharingStrategy" + DeckhouseGPUDeviceTypePhysical = "physical" + DeckhouseGPUDeviceTypeMIG = "mig" // USBAddressAttributeKey = "usbAddress" // No Kubernetes resource.kubernetes.io/ prefix is used because this is a // driver-specific attribute. @@ -568,8 +573,8 @@ func (c *DRAStatusController) getGPUStatus(gpuInfo DeviceInfo, pod *k8sv1.Pod) ( if err != nil { return gpuStatus, err } - if info.pciAddress == "" && info.mdevUUID == "" { - return gpuStatus, fmt.Errorf("failed to get pciAddress or mdevUUID for gpu %s", gpuInfo.VMISpecClaimName) + if err := validateGPUDeviceInfo(info, gpuInfo.VMISpecClaimName); err != nil { + return gpuStatus, err } attrs := v1.DeviceAttribute{} if info.pciAddress != "" { @@ -636,10 +641,40 @@ type deviceInfo struct { pciAddress string mdevUUID string usbAddress *v1.USBAddress + deviceType string + sharingStrategy string allowMultipleAllocations bool bindsToNode bool } +func validateGPUDeviceInfo(info deviceInfo, claimName string) error { + if info.pciAddress == "" && info.mdevUUID == "" { + return fmt.Errorf("failed to get pciAddress or mdevUUID for gpu %s", claimName) + } + if info.allowMultipleAllocations { + return fmt.Errorf("gpu %s allows multiple allocations and cannot be used for VM passthrough", claimName) + } + if info.sharingStrategy != "" { + return fmt.Errorf("gpu %s uses sharing strategy %q and cannot be used for VM passthrough", claimName, info.sharingStrategy) + } + if info.deviceType == DeckhouseGPUDeviceTypeMIG && info.mdevUUID == "" { + return fmt.Errorf("gpu %s has MIG device type without mdevUUID", claimName) + } + if info.pciAddress != "" && info.deviceType != "" && info.deviceType != DeckhouseGPUDeviceTypePhysical { + return fmt.Errorf("gpu %s has device type %q and cannot be used for PCI passthrough", claimName, info.deviceType) + } + return nil +} + +func normalizePCIAddress(address string) string { + parts := strings.Split(address, ":") + if len(parts) == 3 && len(parts[0]) == 8 { + parts[0] = parts[0][4:] + return strings.Join(parts, ":") + } + return address +} + // getDeviceInfo returns the pciAddress, mdevUUID, usbAddress of the device. It will return all if found, otherwise it will return empty strings or nil. func (c *DRAStatusController) getDeviceInfo(nodeName string, deviceName, driverName string) (deviceInfo, error) { resourceSlices, err := c.getResourceSlices(nodeName) @@ -654,11 +689,21 @@ func (c *DRAStatusController) getDeviceInfo(nodeName string, deviceName, driverN info := deviceInfo{} for key, value := range device.Attributes { - if string(key) == PCIAddressDeviceAttributeKey && value.StringValue != nil { - info.pciAddress = *value.StringValue - } else if string(key) == MDevUUIDDeviceAttributeKey && value.StringValue != nil { + if value.StringValue == nil { + continue + } + switch string(key) { + case PCIAddressDeviceAttributeKey: + info.pciAddress = normalizePCIAddress(*value.StringValue) + case DeckhouseGPUPCIAddressAttributeKey: + info.pciAddress = normalizePCIAddress(*value.StringValue) + case MDevUUIDDeviceAttributeKey: info.mdevUUID = *value.StringValue - } else if string(key) == USBAddressAttributeKey && value.StringValue != nil { + case DeckhouseGPUDeviceTypeAttributeKey: + info.deviceType = *value.StringValue + case DeckhouseGPUSharingStrategyAttributeKey: + info.sharingStrategy = *value.StringValue + case USBAddressAttributeKey: info.usbAddress, err = resolveUSBAddress(*value.StringValue) if err != nil { return deviceInfo{}, err diff --git a/pkg/virt-controller/watch/dra/dra_test.go b/pkg/virt-controller/watch/dra/dra_test.go index c95d1566c5ab..eceb1dc4db9b 100644 --- a/pkg/virt-controller/watch/dra/dra_test.go +++ b/pkg/virt-controller/watch/dra/dra_test.go @@ -209,6 +209,75 @@ var _ = Describe("DRA Status Controller", func() { }) }) + Context("getGPUStatus", func() { + var gpuInfo DeviceInfo + + BeforeEach(func() { + gpuInfo = DeviceInfo{ + VMISpecClaimName: "claim1", + VMISpecRequestName: "request1", + DeviceStatusInfo: &v1.DeviceStatusInfo{ + Name: "gpu1", + }, + } + pod.Spec.NodeName = "testnode" + pod.Spec.ResourceClaims = []k8sv1.PodResourceClaim{{ + Name: "claim1", + ResourceClaimName: ptr.To("claim1"), + }} + pod.Status.ResourceClaimStatuses = []k8sv1.PodResourceClaimStatus{{ + Name: "claim1", + ResourceClaimName: ptr.To("claim1"), + }} + }) + + It("should use Deckhouse GPU PCI address attribute and normalize it", func() { + resourceSlice := getTestResourceSliceWithAttributes("resourceslice1", "testnode", "device1", "driver1", map[resourcev1.QualifiedName]resourcev1.DeviceAttribute{ + DeckhouseGPUPCIAddressAttributeKey: {StringValue: ptr.To("00000000:01:00.0")}, + DeckhouseGPUDeviceTypeAttributeKey: {StringValue: ptr.To(DeckhouseGPUDeviceTypePhysical)}, + }, false) + draController := testDRAStatusController(kubeClient, nil, pod, + getTestResourceClaim("claim1", "default", "request1", "device1", "driver1"), + resourceSlice) + + status, err := draController.getGPUStatus(gpuInfo, pod) + + Expect(err).ToNot(HaveOccurred()) + Expect(status.DeviceResourceClaimStatus.Attributes.PCIAddress).To(Equal(ptr.To("0000:01:00.0"))) + }) + + DescribeTable("should reject GPU resources that are not safe for passthrough", + func(attributes map[resourcev1.QualifiedName]resourcev1.DeviceAttribute, allowMultipleAllocations bool, expectedError string) { + resourceSlice := getTestResourceSliceWithAttributes("resourceslice1", "testnode", "device1", "driver1", attributes, allowMultipleAllocations) + draController := testDRAStatusController(kubeClient, nil, pod, + getTestResourceClaim("claim1", "default", "request1", "device1", "driver1"), + resourceSlice) + + _, err := draController.getGPUStatus(gpuInfo, pod) + + Expect(err).To(MatchError(ContainSubstring(expectedError))) + }, + Entry("when sharing strategy is mps", map[resourcev1.QualifiedName]resourcev1.DeviceAttribute{ + DeckhouseGPUPCIAddressAttributeKey: {StringValue: ptr.To("0000:01:00.0")}, + DeckhouseGPUDeviceTypeAttributeKey: {StringValue: ptr.To(DeckhouseGPUDeviceTypePhysical)}, + DeckhouseGPUSharingStrategyAttributeKey: {StringValue: ptr.To("mps")}, + }, false, "uses sharing strategy"), + Entry("when sharing strategy is ts", map[resourcev1.QualifiedName]resourcev1.DeviceAttribute{ + DeckhouseGPUPCIAddressAttributeKey: {StringValue: ptr.To("0000:01:00.0")}, + DeckhouseGPUDeviceTypeAttributeKey: {StringValue: ptr.To(DeckhouseGPUDeviceTypePhysical)}, + DeckhouseGPUSharingStrategyAttributeKey: {StringValue: ptr.To("ts")}, + }, false, "uses sharing strategy"), + Entry("when multiple allocations are allowed", map[resourcev1.QualifiedName]resourcev1.DeviceAttribute{ + DeckhouseGPUPCIAddressAttributeKey: {StringValue: ptr.To("0000:01:00.0")}, + DeckhouseGPUDeviceTypeAttributeKey: {StringValue: ptr.To(DeckhouseGPUDeviceTypePhysical)}, + }, true, "allows multiple allocations"), + Entry("when MIG has no mdevUUID", map[resourcev1.QualifiedName]resourcev1.DeviceAttribute{ + DeckhouseGPUPCIAddressAttributeKey: {StringValue: ptr.To("0000:01:00.0")}, + DeckhouseGPUDeviceTypeAttributeKey: {StringValue: ptr.To(DeckhouseGPUDeviceTypeMIG)}, + }, false, "MIG device type without mdevUUID"), + ) + }) + Context("isAllDRAGPUsReconciled", func() { var vmi *v1.VirtualMachineInstance @@ -476,6 +545,17 @@ func getTestResourceSlice(name, nodeName, deviceName, driverName string) *resour pciAddress := "0000:00:01.0" mdevUUID := "mdev-uuid-123" + return getTestResourceSliceWithAttributes(name, nodeName, deviceName, driverName, map[resourcev1.QualifiedName]resourcev1.DeviceAttribute{ + PCIAddressDeviceAttributeKey: { + StringValue: ptr.To(pciAddress), + }, + MDevUUIDDeviceAttributeKey: { + StringValue: ptr.To(mdevUUID), + }, + }, false) +} + +func getTestResourceSliceWithAttributes(name, nodeName, deviceName, driverName string, attributes map[resourcev1.QualifiedName]resourcev1.DeviceAttribute, allowMultipleAllocations bool) *resourcev1.ResourceSlice { return &resourcev1.ResourceSlice{ ObjectMeta: metav1.ObjectMeta{ Name: name, @@ -490,15 +570,9 @@ func getTestResourceSlice(name, nodeName, deviceName, driverName string) *resour }, Devices: []resourcev1.Device{ { - Name: deviceName, - Attributes: map[resourcev1.QualifiedName]resourcev1.DeviceAttribute{ - PCIAddressDeviceAttributeKey: { - StringValue: ptr.To(pciAddress), - }, - MDevUUIDDeviceAttributeKey: { - StringValue: ptr.To(mdevUUID), - }, - }, + Name: deviceName, + Attributes: attributes, + AllowMultipleAllocations: ptr.To(allowMultipleAllocations), }, }, }, From 57d5cf8d3326430a5f5958dfecad89e40b3ac310 Mon Sep 17 00:00:00 2001 From: Daniil Antoshin Date: Mon, 22 Jun 2026 19:25:22 +0200 Subject: [PATCH 2/2] chore(lint): migrate golangci config to v2 --- hack/linter/.golangci.yml | 187 +++++++++++++++++++------------------- 1 file changed, 92 insertions(+), 95 deletions(-) diff --git a/hack/linter/.golangci.yml b/hack/linter/.golangci.yml index ff6e742c18e5..5829187a2d97 100644 --- a/hack/linter/.golangci.yml +++ b/hack/linter/.golangci.yml @@ -1,70 +1,6 @@ -# golangci configuration - -linters-settings: - dupl: - threshold: 100 - funlen: - lines: 100 - statements: 50 - goconst: - min-len: 2 - min-occurrences: 2 - gocritic: - enabled-tags: - - diagnostic - - experimental - - opinionated - - performance - - style - disabled-checks: - - dupImport # https://github.com/go-critic/go-critic/issues/845 - - ifElseChain - - octalLiteral - - paramTypeCombine - - whyNoLint - - wrapperFunc - settings: - hugeParam: - sizeThreshold: 1024 - rangeValCopy: - sizeThreshold: 1024 - gocyclo: - min-complexity: 15 - goimports: - local-prefixes: kubevirt.io/kubevirt - mnd: - # don't include the "operation" and "assign" - checks: - - argument - - case - - condition - - return - ignored-functions: - - '^Eventually$' - - '^EventuallyWithOffset$' - - '^ExpectWithOffset$' - - '^console\.ExpectBatch$' - - '^console\.RunCommand$' - govet: - enable: - - shadow - lll: - line-length: 140 - misspell: - locale: US - nolintlint: - allow-unused: false # report any unused nolint directives - require-explanation: false # don't require an explanation for nolint directives - require-specific: false # don't require nolint directives to be specific about which linter is being skipped - stylecheck: - dot-import-whitelist: - - "github.com/onsi/ginkgo/v2" - - "github.com/onsi/gomega" - gofumpt: - extra-rules: true - +version: "2" linters: - disable-all: true + default: none enable: - bodyclose - copyloopvar @@ -77,47 +13,108 @@ linters: - goconst - gocritic - gocyclo - - gofmt - - gofumpt - goheader - - goimports - - mnd - goprintffuncname - gosec - - gosimple - govet - ineffassign - lll - misspell + - mnd - nakedret - noctx - nolintlint - rowserrcheck - staticcheck - - stylecheck - - typecheck - unconvert - unused - whitespace - - # don't enable: - # - asciicheck - # - scopelint - # - gochecknoglobals - # - gocognit - # - godot - # - godox - # - goerr113 - # - interfacer - # - maligned - # - nestif - # - prealloc - # - testpackage - # - revive - # - wsl - -issues: - exclude-rules: - - text: 'SA1019: checks.SkipTestIfNoCPUManager' - linters: - - staticcheck + settings: + dupl: + threshold: 100 + funlen: + lines: 100 + statements: 50 + goconst: + min-len: 2 + min-occurrences: 2 + gocritic: + disabled-checks: + - dupImport + - ifElseChain + - octalLiteral + - paramTypeCombine + - whyNoLint + - wrapperFunc + enabled-tags: + - diagnostic + - experimental + - opinionated + - performance + - style + settings: + hugeParam: + sizeThreshold: 1024 + rangeValCopy: + sizeThreshold: 1024 + gocyclo: + min-complexity: 15 + govet: + enable: + - shadow + lll: + line-length: 140 + misspell: + locale: US + mnd: + checks: + - argument + - case + - condition + - return + ignored-functions: + - ^Eventually$ + - ^EventuallyWithOffset$ + - ^ExpectWithOffset$ + - ^console\.ExpectBatch$ + - ^console\.RunCommand$ + nolintlint: + require-explanation: false + require-specific: false + allow-unused: false + staticcheck: + dot-import-whitelist: + - github.com/onsi/ginkgo/v2 + - github.com/onsi/gomega + exclusions: + generated: lax + presets: + - comments + - common-false-positives + - legacy + - std-error-handling + rules: + - linters: + - staticcheck + text: 'SA1019: checks.SkipTestIfNoCPUManager' + paths: + - third_party$ + - builtin$ + - examples$ +formatters: + enable: + - gofmt + - gofumpt + - goimports + settings: + gofumpt: + extra-rules: true + goimports: + local-prefixes: + - kubevirt.io/kubevirt + exclusions: + generated: lax + paths: + - third_party$ + - builtin$ + - examples$