diff --git a/jobs/competitive-test.yml b/jobs/competitive-test.yml index 4f2c6a08f2..1df75bac38 100644 --- a/jobs/competitive-test.yml +++ b/jobs/competitive-test.yml @@ -58,6 +58,11 @@ jobs: timeoutInMinutes: ${{ parameters.timeout_in_minutes }} condition: or(eq(variables['Build.Reason'], 'Manual'), and(eq(variables['Build.Reason'], 'Schedule'), eq(variables['Build.SourceBranchName'], 'main'))) steps: + - script: | + sed -i "/name[[:space:]]*=[[:space:]]*\"os-sku\"/,+2s/value[[:space:]]*=[[:space:]]*\"[^\"]*/value = \"$OS_SKU/g" $(Pipeline.Workspace)/s/scenarios/perf-eval/k8s-node-stress/terraform-inputs/azure.tfvars + sed -i "/name[[:space:]]*=[[:space:]]*\"userpool1\"/,+2s/vm_size[[:space:]]*=[[:space:]]*\"[^\"]*/vm_size = \"$VM_SIZE/g" $(Pipeline.Workspace)/s/scenarios/perf-eval/k8s-node-stress/terraform-inputs/azure.tfvars + cat $(pwd)/scenarios/perf-eval/k8s-node-stress/terraform-inputs/azure.tfvars + displayName: "Set TF Overrides" - template: /steps/setup-tests.yml parameters: cloud: ${{ parameters.cloud }} diff --git a/modules/python/clusterloader2/cri/config/config.yaml b/modules/python/clusterloader2/cri/config/config.yaml index b365ecfd32..cf3c65d416 100644 --- a/modules/python/clusterloader2/cri/config/config.yaml +++ b/modules/python/clusterloader2/cri/config/config.yaml @@ -3,12 +3,14 @@ name: resource-consumer {{$deploymentSize := DefaultParam .CL2_DEPLOYMENT_SIZE 10}} {{$memory := DefaultParam .CL2_RESOURCE_CONSUME_MEMORY "100"}} {{$memoryKi := DefaultParam .CL2_RESOURCE_CONSUME_MEMORY_KI "100"}} +{{$testMemoryKi := DefaultParam .CL2_TEST_RESOURCE_CONSUME_MEMORY_KI $memoryKi}} {{$cpu := DefaultParam .CL2_RESOURCE_CONSUME_CPU 100}} {{$repeats := DefaultParam .CL2_REPEATS 1}} {{$steps := DefaultParam .CL2_STEPS 1}} {{$nodePerStep := DefaultParam .CL2_NODE_PER_STEP 1}} {{$totalNodes := MultiplyInt $nodePerStep $steps}} +{{$nodeToMeasure := DefaultParam .CL2_NODE_TO_MEASURE ""}} {{$replicas := MultiplyInt $deploymentSize $totalNodes}} {{$scaleReplicas := MultiplyInt $deploymentSize $nodePerStep}} {{$scaleEnabled := DefaultParam .CL2_SCALE_ENABLED false}} @@ -39,26 +41,31 @@ tuningSets: steps: - name: Start measurements measurements: + - Identifier: TestMetrics + Method: TestMetrics + Params: + action: start + systemPodMetricsEnabled: true + clusterOOMsTrackerEnabled: true + clusterOOMsIgnoredProcesses: "" + enableRestartCountCheck: true + labelSelector: group = resource-consumer - Identifier: PodStartupLatency Method: PodStartupLatency Params: action: start labelSelector: group = resource-consumer - threshold: {{$podStartupLatencyThreshold}} + threshold: "60s" - Identifier: ResourceUsageSummary Method: ResourceUsageSummary Params: action: start labelSelector: group = resource-consumer - - Identifier: WaitForRunningLatencyDeployments - Method: WaitForControlledPodsRunning + - Identifier: SchedulingThroughput + Method: SchedulingThroughput Params: action: start - checkIfPodsAreUpdated: true - apiVersion: apps/v1 - kind: Deployment labelSelector: group = resource-consumer - operationTimeout: {{$operationTimeout}} {{range $i := Loop $repeats}} @@ -76,6 +83,12 @@ steps: action: start {{end}} + - module: + path: /node-measurements.yaml + params: + action: start + node: {{$nodeToMeasure}} + {{range $j := Loop $steps}} - name: Create deployment {{$j}} phases: @@ -101,7 +114,7 @@ steps: {{if eq $osType "windows"}} Memory: {{$memory}} {{else}} - Memory: {{$memoryKi}}K + Memory: {{$testMemoryKi}}K {{end}} CPU: --millicores={{$cpu}} MemoryRequest: {{$memoryKi}} @@ -112,36 +125,56 @@ steps: OSType: {{$osType}} HostNetwork: {{$hostNetwork}} - - name: Waiting for latency pods to be running - measurements: - - Identifier: WaitForRunningLatencyDeployments - Method: WaitForControlledPodsRunning - Params: - action: gather - - name: Wait for resource consumption measurements: - Identifier: Sleep Method: Sleep Params: duration: 1m + {{end}} - - name: Wait for nodes to be ready - measurements: - - Identifier: ConfirmNodeCount - Method: WaitForNodes - Params: - action: start - {{if $scaleEnabled}} - minDesiredNodeCount: {{MultiplyInt (AddInt (MultiplyInt $nodePerStep (AddInt $j 1)) 1) 0.8}} - maxDesiredNodeCount: {{AddInt $totalNodes 1}} + {{range $j := Loop $steps}} + - name: Create deployment {{$j}} + phases: + - namespaceRange: + min: 1 + max: 1 + replicasPerNamespace: 1 + tuningSet: Uniform1qps + objectBundle: + - basename: resource-consumer-{{$j}} + objectTemplatePath: deployment_template.yaml + templateFillMap: + {{if $scaleEnabled}} + {{if eq $j 0}} + Replicas: {{AddInt $scaleReplicas $deploymentSize}} {{else}} - minDesiredNodeCount: {{MultiplyInt $totalNodes 0.8}} - maxDesiredNodeCount: {{$totalNodes}} + Replicas: {{$scaleReplicas}} {{end}} - labelSelector: cri-resource-consume = true - timeout: 1m - refreshInterval: 5s + {{else}} + Replicas: {{$replicas}} + {{end}} + Group: resource-consumer + {{if eq $osType "windows"}} + Memory: {{$memory}} + {{else}} + Memory: {{$testMemoryKi}}K + {{end}} + CPU: --millicores={{$cpu}} + MemoryRequest: {{$memoryKi}} + CPURequest: {{$cpu}}m + LoadType: {{$loadType}} + Provider: {{$provider}} + RegistryEndpoint: {{$registryEndpoint}} + OSType: {{$osType}} + HostNetwork: {{$hostNetwork}} + + - name: Wait for resource consumption + measurements: + - Identifier: Sleep + Method: Sleep + Params: + duration: 30s {{end}} {{if $scrapeKubelets}} @@ -158,6 +191,12 @@ steps: action: gather {{end}} + - module: + path: /node-measurements.yaml + params: + action: gather + node: {{$nodeToMeasure}} + {{range $j := Loop $steps}} - name: Deleting deployments {{$j}} phases: @@ -169,18 +208,18 @@ steps: objectBundle: - basename: resource-consumer-{{$j}} objectTemplatePath: deployment_template.yaml - - - name: Waiting for latency pods to be deleted - measurements: - - Identifier: WaitForRunningLatencyDeployments - Method: WaitForControlledPodsRunning - Params: - action: gather {{end}} {{end}} - name: Collect measurements measurements: + - Identifier: TestMetrics + Method: TestMetrics + Params: + action: gather + systemPodMetricsEnabled: true + clusterOOMsTrackerEnabled: true + enableRestartCountCheck: true - Identifier: ResourceUsageSummary Method: ResourceUsageSummary Params: @@ -189,3 +228,7 @@ steps: Method: PodStartupLatency Params: action: gather + - Identifier: SchedulingThroughput + Method: SchedulingThroughput + Params: + action: gather diff --git a/modules/python/clusterloader2/cri/config/deployment_template.yaml b/modules/python/clusterloader2/cri/config/deployment_template.yaml index cdb7efe99a..7e4454a5cd 100644 --- a/modules/python/clusterloader2/cri/config/deployment_template.yaml +++ b/modules/python/clusterloader2/cri/config/deployment_template.yaml @@ -55,11 +55,11 @@ spec: - stress args: - --vm - - "1" + - "3" - --vm-bytes - {{$Memory}} - --vm-hang - - "0" + - "30" - --timeout - "3600" {{end}} diff --git a/modules/python/clusterloader2/cri/config/node-measurements.yaml b/modules/python/clusterloader2/cri/config/node-measurements.yaml new file mode 100644 index 0000000000..039a6e1b4e --- /dev/null +++ b/modules/python/clusterloader2/cri/config/node-measurements.yaml @@ -0,0 +1,45 @@ +{{$action := .action}} # start, gather +{{$node := .node}} + +steps: + - name: {{$action}} Node Resource Measurements + measurements: + - Identifier: ResourceMetrics + Method: GenericPrometheusQuery + Params: + action: {{$action}} + metricName: NodeResourceMetrics + metricVersion: v1 + unit: mixed + queries: + # Node Level Summary + - name: NodeInfo + query: kube_node_info{node = "{{$node}}"} + - name: NodeMemoryAllocatableGiB + query: sum(kube_node_status_allocatable{resource="memory", node = "{{$node}}"}) / 1073741824 + - name: NodeMemoryCapacityGiB + query: sum(kube_node_status_capacity{resource="memory", node = "{{$node}}"}) / 1073741824 + - name: NodesReady + query: changes(kube_node_status_condition{condition="Ready",status="true", node = "{{$node}}"}[5m]) + - name: NodePressureStatus + query: changes(kube_node_status_condition{condition="MemoryPressure", status="true", node = "{{$node}}"}[5m]) + # Node Memory Usage Stats (from kubelet/cAdvisor - container metrics aggregated by node) + - name: NodeMemoryUsageTotalGiB + query: sum(container_memory_working_set_bytes{instance="{{$node}}"}) / 1073741824 + - name: NodeMemoryRequestsTotalGiB + query: sum(kube_pod_container_resource_requests{resource="memory", node = "{{$node}}"}) / 1073741824 + - name: NodeMemoryCommitmentGiB + query: (sum(kube_pod_container_resource_limits{resource="memory", node = "{{$node}}"}) - sum(kube_node_status_allocatable{resource="memory", node = "{{$node}}"})) / 1073741824 + - name: NodeMemoryRequestToAllocatableRatio + query: sum(kube_pod_container_resource_requests{resource="memory", node = "{{$node}}"}) / sum(kube_node_status_allocatable{resource="memory", node = "{{$node}}"}) + - name: NodeMemoryUsageToAllocatableRatio + query: sum(container_memory_working_set_bytes{instance="{{$node}}"}) / sum(kube_node_status_allocatable{resource="memory", node = "{{$node}}"}) + - name: NodeMemoryUsageToLimitsRatio + query: sum(container_memory_working_set_bytes{instance="{{$node}}"}) / sum(kube_pod_container_resource_limits{resource="memory", node = "{{$node}}"}) + # Container Level Summary + - name: ContainerRuntimes + query: count(container_runtime_version{node = "{{$node}}"}) + - name: ContainerMemoryFailures + query: increase(container_memory_failcnt{node = "{{$node}}"}[5m]) + - name: ContainersNearMemoryLimit + query: count((container_memory_working_set_bytes{instance="{{$node}}"} / container_spec_memory_limit_bytes{instance="{{$node}}"} > 0.8)) \ No newline at end of file diff --git a/modules/python/clusterloader2/cri/cri.py b/modules/python/clusterloader2/cri/cri.py index 889d04c95d..4231a350d6 100644 --- a/modules/python/clusterloader2/cri/cri.py +++ b/modules/python/clusterloader2/cri/cri.py @@ -12,14 +12,13 @@ setup_logging() logger = get_logger(__name__) -MEMORY_SCALE_FACTOR = 0.95 # 95% of the total allocatable memory to account for error margin - # TODO: Refactor to use a config dataclass to reduce number of arguments # Reference: modules/python/clusterloader2/job_controller/job_controller.py def override_config_clusterloader2( - node_count, node_per_step, max_pods, repeats, operation_timeout, + node_count, node_to_measure, node_per_step, max_pods, repeats, operation_timeout, load_type, scale_enabled, pod_startup_latency_threshold, provider, - registry_endpoint, os_type, scrape_kubelets, scrape_containerd, containerd_scrape_interval, host_network, override_file): + registry_endpoint, os_type, scrape_kubelets, scrape_containerd, containerd_scrape_interval, host_network, override_file, use_custom_kubelet = False): + MEMORY_SCALE_FACTOR = 1.0 client = KubernetesClient(os.path.expanduser("~/.kube/config")) nodes = client.get_nodes(label_selector="cri-resource-consume=true") if len(nodes) == 0: @@ -51,7 +50,7 @@ def override_config_clusterloader2( # Calculate request cpu and memory for each pod daemonset_count = client.get_daemonsets_pods_count("kube-system", node.metadata.name) logger.info(f"Node {node.metadata.name} has {daemonset_count} daemonset pods") - pod_count = max_pods - daemonset_count + pod_count = max_pods cpu_request = cpu_value // pod_count memory_request_in_ki = math.ceil(memory_value * MEMORY_SCALE_FACTOR // pod_count) memory_request_in_k = int(memory_request_in_ki // 1.024) @@ -75,11 +74,13 @@ def override_config_clusterloader2( file.write(f"CL2_DEPLOYMENT_SIZE: {pod_count}\n") file.write(f"CL2_RESOURCE_CONSUME_MEMORY: {memory_request}\n") file.write(f"CL2_RESOURCE_CONSUME_MEMORY_KI: {memory_request_in_ki}Ki\n") + file.write(f"CL2_TEST_RESOURCE_CONSUME_MEMORY_KI: {5*memory_request_in_ki}Ki\n") file.write(f"CL2_RESOURCE_CONSUME_CPU: {cpu_request}\n") file.write(f"CL2_REPEATS: {repeats}\n") file.write(f"CL2_NODE_COUNT: {node_count}\n") file.write(f"CL2_NODE_PER_STEP: {node_per_step}\n") file.write(f"CL2_STEPS: {steps}\n") + file.write(f"CL2_NODE_TO_MEASURE: {node_to_measure}\n") file.write(f"CL2_OPERATION_TIMEOUT: {operation_timeout}\n") file.write(f"CL2_LOAD_TYPE: {load_type}\n") file.write(f"CL2_SCALE_ENABLED: {str(scale_enabled).lower()}\n") @@ -139,6 +140,17 @@ def verify_measurement(): except k8s_client.ApiException as e: logger.error(f"Error fetching metrics: {e}") +def parse_node_reader_report(file_path, template): + with open(file_path, 'r', encoding='utf-8') as file: + logger.info(f"Processing node_reader metrics report") + + template["group"] = "self" + template["measurement"] = "NodeReader" + + node_reader_data = json.loads(file.read()) + template["data"] = node_reader_data + return template + def collect_clusterloader2( node_count, max_pods, @@ -190,6 +202,9 @@ def collect_clusterloader2( for f in os.listdir(cl2_report_dir): file_path = os.path.join(cl2_report_dir, f) with open(file_path, 'r', encoding='utf-8') as file: + if file_path.endswith("node_reader.json"): + content += json.dumps(parse_node_reader_report(file_path, template)) + "\n" + continue measurement, group_name = get_measurement(file_path) if not measurement: continue @@ -216,6 +231,12 @@ def collect_clusterloader2( template["percentile"] = "dataItems" template["data"] = item content += json.dumps(template) + "\n" + else: + result = template.copy() + result["group"] = group_name + result["measurement"] = measurement + result["data"] = data + content += json.dumps(result) + "\n" os.makedirs(os.path.dirname(result_file), exist_ok=True) with open(result_file, 'w', encoding='utf-8') as file: @@ -228,6 +249,9 @@ def main(): # Sub-command for override_config_clusterloader2 parser_override = subparsers.add_parser("override", help="Override CL2 config file") parser_override.add_argument("--node_count", type=int, help="Number of nodes") + parser_override.add_argument( + "--node_to_measure", type=str, default="", help="Name of the node to gather detailed measurements from" + ) parser_override.add_argument( "--node_per_step", type=int, help="Number of nodes to scale per step" ) @@ -382,6 +406,7 @@ def main(): if args.command == "override": override_config_clusterloader2( args.node_count, + args.node_to_measure, args.node_per_step, args.max_pods, args.repeats, diff --git a/modules/python/clusterloader2/utils.py b/modules/python/clusterloader2/utils.py index 8212b5ae7f..4df30fa2b8 100644 --- a/modules/python/clusterloader2/utils.py +++ b/modules/python/clusterloader2/utils.py @@ -17,6 +17,7 @@ "InClusterNetworkLatency", "NetworkProgrammingLatency"] PROM_QUERY_PREFIX = "GenericPrometheusQuery" RESOURCE_USAGE_SUMMARY_PREFIX = "ResourceUsageSummary" +NODE_METRIC_SUMMARY_PREFIX = "NodeResourceMetrics" NETWORK_POLICY_SOAK_MEASUREMENT_PREFIX = "NetworkPolicySoakMeasurement" JOB_LIFECYCLE_LATENCY_PREFIX = "JobLifecycleLatency" SCHEDULING_THROUGHPUT_PROMETHEUS_PREFIX = "SchedulingThroughputPrometheus" @@ -24,8 +25,8 @@ def run_cl2_command(kubeconfig, cl2_image, cl2_config_dir, cl2_report_dir, provider, cl2_config_file="config.yaml", overrides=False, enable_prometheus=False, tear_down_prometheus=True, - enable_exec_service=False, scrape_kubelets=False, - scrape_containerd=False, scrape_ksm=False, scrape_metrics_server=False): + enable_exec_service=False, scrape_kubelets=True, + scrape_containerd=True, scrape_ksm=True, scrape_metrics_server=True): docker_client = DockerClient() command = f"""--provider={provider} --v=2 @@ -108,6 +109,9 @@ def get_measurement(file_path): if file_name.startswith(SCHEDULING_THROUGHPUT_PREFIX): group_name = file_name.split("_")[1] return SCHEDULING_THROUGHPUT_PREFIX, group_name + if file_name.startswith(NODE_METRIC_SUMMARY_PREFIX): + group_name = file_name.split("_")[1] + return NODE_METRIC_SUMMARY_PREFIX, group_name return None, None def process_cl2_reports(cl2_report_dir, template): diff --git a/pipelines/perf-eval/CRI Benchmark/k8s-resource-pressure.yml b/pipelines/perf-eval/CRI Benchmark/k8s-resource-pressure.yml index 1b601dc09f..0587471c7d 100644 --- a/pipelines/perf-eval/CRI Benchmark/k8s-resource-pressure.yml +++ b/pipelines/perf-eval/CRI Benchmark/k8s-resource-pressure.yml @@ -1,64 +1,306 @@ trigger: none +parameters: + - name: max_pods + displayName: "Max Pods per Node" + type: number + default: 60 + - name: custom_kubelet_config + displayName: "Custom Kubelet Config (JSON String)" + type: string # The parameter type is string + default: "" + - name: custom_metadata + displayName: "Custom Metadata for Test Run" + type: string + default: "" + variables: SCENARIO_TYPE: perf-eval SCENARIO_NAME: k8s-node-stress + CUSTOM_RUN_ID: "${{ parameters.custom_metadata }}" stages: - - stage: azure_westus2_stress_baseline_1_34 + - stage: swedencentral_stress dependsOn: [] jobs: - - template: /jobs/competitive-test.yml - parameters: - cloud: azure - regions: - - westus2 - terraform_input_file_mapping: - - westus2: "scenarios/perf-eval/k8s-node-stress/terraform-inputs/azure-ubuntu2204.tfvars" - topology: k8s-resource-pressure - engine: clusterloader2 - engine_input: - image: "ghcr.io/azure/clusterloader2:v20250513" - matrix: - n1-p60-memory-ephemeral: - node_count: 1 - max_pods: 60 - repeats: 1 - operation_timeout: 3m - load_type: memory - pod_startup_latency_threshold: 23s - kubernetes_version: "1.34" - scrape_kubelets: True - n1-p60-memory-managed: - node_count: 1 - max_pods: 60 - repeats: 1 - operation_timeout: 3m - load_type: memory - pod_startup_latency_threshold: 23s - kubernetes_version: "1.34" - k8s_os_disk_type: Managed - scrape_kubelets: True - n1-p60-cpu-ephemeral: - node_count: 1 - max_pods: 60 - repeats: 1 - operation_timeout: 3m - load_type: cpu - pod_startup_latency_threshold: 23s - kubernetes_version: "1.34" - scrape_kubelets: True - n1-p60-cpu-managed: - node_count: 1 - max_pods: 60 - repeats: 1 - operation_timeout: 3m - load_type: cpu - pod_startup_latency_threshold: 23s - kubernetes_version: "1.34" - k8s_os_disk_type: Managed - scrape_kubelets: True - max_parallel: 1 - timeout_in_minutes: 120 - credential_type: service_connection - ssh_key_enabled: false + - template: /jobs/competitive-test.yml + parameters: + cloud: azure + regions: + - swedencentral + terraform_input_file_mapping: + - swedencentral: "scenarios/perf-eval/k8s-node-stress/terraform-inputs/azure.tfvars" + topology: k8s-resource-pressure + engine: clusterloader2 + engine_input: + image: "ghcr.io/azure/clusterloader2:v20250513" + run_id: ${{ variables.CUSTOM_RUN_ID }} + matrix: + base-100-u2204-d2v3: + matrix_key: base-100-u2204-d2v3 + node_count: 1 + max_pods: ${{ parameters.max_pods }} + repeats: 1 + operation_timeout: 3m + load_type: memory + pod_startup_latency_threshold: 35s + kubernetes_version: "1.34" + scrape_kubelets: True + scrape_containerd: True + scrape_ksm: True + scrape_metrics_server: True + use_custom_kubelet: false + os_sku: Ubuntu2204 + vm_size: Standard_D2_v3 + base-100-u2204-d4v3: + matrix_key: base-100-u2204-d4v3 + node_count: 1 + max_pods: ${{ parameters.max_pods }} + repeats: 1 + operation_timeout: 3m + load_type: memory + pod_startup_latency_threshold: 35s + kubernetes_version: "1.34" + scrape_kubelets: True + scrape_containerd: True + scrape_ksm: True + scrape_metrics_server: True + use_custom_kubelet: false + os_sku: Ubuntu2204 + vm_size: Standard_D4_v3 + base-100-u2204-d8v3: + matrix_key: base-100-u2204-d8v3 + node_count: 1 + max_pods: ${{ parameters.max_pods }} + repeats: 1 + operation_timeout: 3m + load_type: memory + pod_startup_latency_threshold: 35s + kubernetes_version: "1.34" + scrape_kubelets: True + scrape_containerd: True + scrape_ksm: True + scrape_metrics_server: True + use_custom_kubelet: false + os_sku: Ubuntu2204 + vm_size: Standard_D8_v3 + base-100-u2204-d16v3: + matrix_key: base-100-u2204-d16v3 + node_count: 1 + max_pods: ${{ parameters.max_pods }} + repeats: 1 + operation_timeout: 3m + load_type: memory + pod_startup_latency_threshold: 35s + kubernetes_version: "1.34" + scrape_kubelets: True + scrape_containerd: True + scrape_ksm: True + scrape_metrics_server: True + use_custom_kubelet: false + os_sku: Ubuntu2204 + vm_size: Standard_D16_v3 + base-100-u2404-d2v3: + matrix_key: base-100-u2404-d2v3 + node_count: 1 + max_pods: ${{ parameters.max_pods }} + repeats: 1 + operation_timeout: 3m + load_type: memory + pod_startup_latency_threshold: 35s + kubernetes_version: "1.34" + scrape_kubelets: True + scrape_containerd: True + scrape_ksm: True + scrape_metrics_server: True + use_custom_kubelet: false + os_sku: Ubuntu2204 + vm_size: Standard_D2_v3 + base-100-u2404-d4v3: + matrix_key: base-100-u2404-d4v3 + node_count: 1 + max_pods: ${{ parameters.max_pods }} + repeats: 1 + operation_timeout: 3m + load_type: memory + pod_startup_latency_threshold: 35s + kubernetes_version: "1.34" + scrape_kubelets: True + scrape_containerd: True + scrape_ksm: True + scrape_metrics_server: True + use_custom_kubelet: false + os_sku: Ubuntu2404 + vm_size: Standard_D4_v3 + base-100-u2404-d8v3: + matrix_key: base-100-u2404-d8v3 + node_count: 1 + max_pods: ${{ parameters.max_pods }} + repeats: 1 + operation_timeout: 3m + load_type: memory + pod_startup_latency_threshold: 35s + kubernetes_version: "1.34" + scrape_kubelets: True + scrape_containerd: True + scrape_ksm: True + scrape_metrics_server: True + use_custom_kubelet: false + os_sku: Ubuntu2404 + vm_size: Standard_D8_v3 + base-100-u2404-d16v3: + matrix_key: base-100-u2404-d16v3 + node_count: 1 + max_pods: ${{ parameters.max_pods }} + repeats: 1 + operation_timeout: 3m + load_type: memory + pod_startup_latency_threshold: 35s + kubernetes_version: "1.34" + scrape_kubelets: True + scrape_containerd: True + scrape_ksm: True + scrape_metrics_server: True + use_custom_kubelet: false + os_sku: Ubuntu2404 + vm_size: Standard_D16_v3 + u2204-d2v3: + matrix_key: u2204-d2v3 + node_count: 1 + max_pods: ${{ parameters.max_pods }} + repeats: 1 + operation_timeout: 3m + load_type: memory + pod_startup_latency_threshold: 35s + kubernetes_version: "1.34" + scrape_kubelets: True + scrape_containerd: True + scrape_ksm: True + scrape_metrics_server: True + use_custom_kubelet: True + custom_kubelet_config: ${{ parameters.custom_kubelet_config }} + os_sku: Ubuntu2204 + vm_size: Standard_D2_v3 + u2204-d4v3: + matrix_key: u2204-d4v3 + node_count: 1 + max_pods: ${{ parameters.max_pods }} + repeats: 1 + operation_timeout: 3m + load_type: memory + pod_startup_latency_threshold: 35s + kubernetes_version: "1.34" + scrape_kubelets: True + scrape_containerd: True + scrape_ksm: True + scrape_metrics_server: True + use_custom_kubelet: True + custom_kubelet_config: ${{ parameters.custom_kubelet_config }} + os_sku: Ubuntu2204 + vm_size: Standard_D4_v3 + u2204-d8v3: + matrix_key: u2204-d8v3 + node_count: 1 + max_pods: ${{ parameters.max_pods }} + repeats: 1 + operation_timeout: 3m + load_type: memory + pod_startup_latency_threshold: 35s + kubernetes_version: "1.34" + scrape_kubelets: True + scrape_containerd: True + scrape_ksm: True + scrape_metrics_server: True + use_custom_kubelet: True + custom_kubelet_config: ${{ parameters.custom_kubelet_config }} + os_sku: Ubuntu2204 + vm_size: Standard_D8_v3 + u2204-d16v3: + matrix_key: u2204-d16v3 + node_count: 1 + max_pods: ${{ parameters.max_pods }} + repeats: 1 + operation_timeout: 3m + load_type: memory + pod_startup_latency_threshold: 35s + kubernetes_version: "1.34" + scrape_kubelets: True + scrape_containerd: True + scrape_ksm: True + scrape_metrics_server: True + use_custom_kubelet: True + custom_kubelet_config: ${{ parameters.custom_kubelet_config }} + os_sku: Ubuntu2204 + vm_size: Standard_D16_v3 + u2404-d2v3: + matrix_key: u2404-d2v3 + node_count: 1 + max_pods: ${{ parameters.max_pods }} + repeats: 1 + operation_timeout: 3m + load_type: memory + pod_startup_latency_threshold: 35s + kubernetes_version: "1.34" + scrape_kubelets: True + scrape_containerd: True + scrape_ksm: True + scrape_metrics_server: True + use_custom_kubelet: True + custom_kubelet_config: ${{ parameters.custom_kubelet_config }} + os_sku: Ubuntu2404 + vm_size: Standard_D2_v3 + u2404-d4v3: + matrix_key: u2404-d4v3 + node_count: 1 + max_pods: ${{ parameters.max_pods }} + repeats: 1 + operation_timeout: 3m + load_type: memory + pod_startup_latency_threshold: 35s + kubernetes_version: "1.34" + scrape_kubelets: True + scrape_containerd: True + scrape_ksm: True + scrape_metrics_server: True + use_custom_kubelet: True + custom_kubelet_config: ${{ parameters.custom_kubelet_config }} + os_sku: Ubuntu2404 + vm_size: Standard_D4_v3 + u2404-d8v3: + matrix_key: u2404-d8v3 + node_count: 1 + max_pods: ${{ parameters.max_pods }} + repeats: 1 + operation_timeout: 3m + load_type: memory + pod_startup_latency_threshold: 35s + kubernetes_version: "1.34" + scrape_kubelets: True + scrape_containerd: True + scrape_ksm: True + scrape_metrics_server: True + use_custom_kubelet: True + custom_kubelet_config: ${{ parameters.custom_kubelet_config }} + os_sku: Ubuntu2404 + vm_size: Standard_D8_v3 + u2404-d16v3: + matrix_key: u2404-d16v3 + node_count: 1 + max_pods: ${{ parameters.max_pods }} + repeats: 1 + operation_timeout: 3m + load_type: memory + pod_startup_latency_threshold: 35s + kubernetes_version: "1.34" + scrape_kubelets: True + scrape_containerd: True + scrape_ksm: True + scrape_metrics_server: True + use_custom_kubelet: True + custom_kubelet_config: ${{ parameters.custom_kubelet_config }} + os_sku: Ubuntu2404 + vm_size: Standard_D16_v3 + max_parallel: 4 + timeout_in_minutes: 120 + credential_type: service_connection + ssh_key_enabled: false diff --git a/scenarios/perf-eval/k8s-node-stress/terraform-inputs/azure-ubuntu2204.tfvars b/scenarios/perf-eval/k8s-node-stress/terraform-inputs/azure.tfvars similarity index 100% rename from scenarios/perf-eval/k8s-node-stress/terraform-inputs/azure-ubuntu2204.tfvars rename to scenarios/perf-eval/k8s-node-stress/terraform-inputs/azure.tfvars diff --git a/scenarios/perf-eval/k8s-node-stress/terraform-test-inputs/azure-ubuntu2204.json b/scenarios/perf-eval/k8s-node-stress/terraform-test-inputs/azure.json similarity index 50% rename from scenarios/perf-eval/k8s-node-stress/terraform-test-inputs/azure-ubuntu2204.json rename to scenarios/perf-eval/k8s-node-stress/terraform-test-inputs/azure.json index fe80d2d474..16128b0253 100644 --- a/scenarios/perf-eval/k8s-node-stress/terraform-test-inputs/azure-ubuntu2204.json +++ b/scenarios/perf-eval/k8s-node-stress/terraform-test-inputs/azure.json @@ -1,4 +1,4 @@ { "run_id" : "123456789", - "region" : "westus2" + "region" : "swedencentral" } diff --git a/steps/engine/clusterloader2/cri/execute.yml b/steps/engine/clusterloader2/cri/execute.yml index dd133c6bdb..61141efa4a 100644 --- a/steps/engine/clusterloader2/cri/execute.yml +++ b/steps/engine/clusterloader2/cri/execute.yml @@ -12,8 +12,17 @@ steps: - script: | set -eo pipefail + # Create and set permissions on results directory BEFORE Docker runs + mkdir -p $(Pipeline.Workspace)/s/modules/python/clusterloader2/cri/results + chmod 755 $(Pipeline.Workspace)/s/modules/python/clusterloader2/cri/results + + kubectl get nodes -o wide + + NODE_TO_MEASURE=$(kubectl get nodes | grep userpool | cut -d" " -f1) + PYTHONPATH=$PYTHONPATH:$(pwd) python3 $PYTHON_SCRIPT_FILE override \ --node_count $NODE_COUNT \ + --node_to_measure $NODE_TO_MEASURE \ --node_per_step ${NODE_PER_STEP:-$NODE_COUNT} \ --max_pods $MAX_PODS \ --repeats $REPEATS \ diff --git a/steps/setup-tests.yml b/steps/setup-tests.yml index e938ef2eff..a19f9798f1 100644 --- a/steps/setup-tests.yml +++ b/steps/setup-tests.yml @@ -20,7 +20,12 @@ parameters: steps: - script: | if [ -n "$RUN_ID" ]; then - run_id=$RUN_ID + scope=$(echo "$MATRIX_KEY" | cut -d '-' -f1) + if [ $scope = "base" ]; then + run_id=$MATRIX_KEY-$(Build.BuildId) + else + run_id=$RUN_ID-$MATRIX_KEY-$(Build.BuildId) + fi else run_id=$(Build.BuildId)-$(System.JobId) fi diff --git a/steps/topology/k8s-resource-pressure/chart/Chart.yaml b/steps/topology/k8s-resource-pressure/chart/Chart.yaml new file mode 100644 index 0000000000..b6fed55a84 --- /dev/null +++ b/steps/topology/k8s-resource-pressure/chart/Chart.yaml @@ -0,0 +1,6 @@ +apiVersion: v2 +name: kubelet-config-updater +description: Deploy a DaemonSet to update kubelet flags on nodes +type: application +version: 0.1.0 +appVersion: "1.0" diff --git a/steps/topology/k8s-resource-pressure/chart/templates/daemonset.yaml b/steps/topology/k8s-resource-pressure/chart/templates/daemonset.yaml new file mode 100644 index 0000000000..290cd2781d --- /dev/null +++ b/steps/topology/k8s-resource-pressure/chart/templates/daemonset.yaml @@ -0,0 +1,76 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: kubelet-config-updater + namespace: kube-system + labels: + app: kubelet-config-updater +spec: + selector: + matchLabels: + app: kubelet-config-updater + template: + metadata: + labels: + app: kubelet-config-updater + spec: + hostPID: true + nodeSelector: + cri-resource-consume: "true" + tolerations: + - key: cri-resource-consume + operator: "Equal" + value: "true" + effect: "NoSchedule" + - key: cri-resource-consume + operator: "Equal" + value: "true" + effect: "NoExecute" + containers: + - name: kubelet-config-updater + image: ghcr.io/containerd/busybox:1.36 + securityContext: + privileged: true + command: + - /bin/sh + - -c + - | + echo "Updating kubelet configuration..." + export CUSTOM_EVICTION_FLAGS="{{ .Values.kubeletFlags }}" + export kubelet_flags_eof_key="--node-ip" + for kvp in $CUSTOM_EVICTION_FLAGS; do + key=${kvp%%=*} + value=${kvp#*=} + esc_key=$(printf '%s' "${key}" | sed -e 's/[][\/.^$*+?()|]/\\&/g') + esc_value=$(printf '%s' "${value}" | sed -e 's/[][\/.^$*+?()|]/\\&/g') + echo "Processing key: ${esc_key} with value: ${esc_value}" + if grep -q -E "(^|[[:space:]])${esc_key}=[^[:space:]]+" /etc/default/kubelet; then + echo "Found ${esc_key} in /etc/default/kubelet" + sed -i -r -E "s/(^|[[:space:]])(${esc_key})=[^[:space:]]*/\1\2=${esc_value}/g" "/etc/default/kubelet" + else + echo "Adding ${esc_key}=${esc_value} to /etc/default/kubelet" + repl=$(printf ' %s=%s' "${esc_key}" "${esc_value}") + sed -i -r -E "s/(${kubelet_flags_eof_key}=[^[:space:]]+)/\1${repl}/g" "/etc/default/kubelet" + export kubelet_flags_eof_key="${esc_key}" + fi + done + echo "Checking Updated kubelet configuration:" + cat /etc/default/kubelet + echo "Restarting kubelet..." + nsenter --mount=/proc/1/ns/mnt -- systemctl restart kubelet || true + echo "Done. Sleeping indefinitely to keep the pod running." + sleep infinity + volumeMounts: + - name: systemd + mountPath: /run/systemd + - name: kubelet-config + mountPath: /etc/default + volumes: + - name: kubelet-config + hostPath: + path: /etc/default + type: Directory + - name: systemd + hostPath: + path: /run/systemd + restartPolicy: Always diff --git a/steps/topology/k8s-resource-pressure/chart/values.yaml b/steps/topology/k8s-resource-pressure/chart/values.yaml new file mode 100644 index 0000000000..b85302e7d9 --- /dev/null +++ b/steps/topology/k8s-resource-pressure/chart/values.yaml @@ -0,0 +1,3 @@ +# Provide a single string with kubelet flags to add, example: +# "--eviction-hard=memory.available<100Mi --system-reserved=cpu=100m,memory=200Mi" +kubeletFlags: "" diff --git a/steps/topology/k8s-resource-pressure/collect-clusterloader2.yml b/steps/topology/k8s-resource-pressure/collect-clusterloader2.yml index ee0c8a1bb1..760eece48b 100644 --- a/steps/topology/k8s-resource-pressure/collect-clusterloader2.yml +++ b/steps/topology/k8s-resource-pressure/collect-clusterloader2.yml @@ -10,8 +10,24 @@ parameters: default: {} steps: +- script: | + echo "Copying Node internal data from DaemonSet pods..." + POD=$(kubectl get pods -n kube-system -l app=node-reader -o name | cut -d'/' -f2) + kubectl cp "kube-system/$POD:/app/config/node_reader.json" $(Pipeline.Workspace)/s/modules/python/clusterloader2/cri/results/node_reader.json + echo "NodeReader data copied successfully" + displayName: 'Copy Node internal data from DaemonSet' - template: /steps/engine/clusterloader2/cri/collect.yml parameters: cloud: ${{ parameters.cloud }} engine_input: ${{ parameters.engine_input }} region: ${{ parameters.regions[0] }} +- task: PublishPipelineArtifact@1 + inputs: + targetPath: '$(Pipeline.Workspace)/s/modules/python/clusterloader2/cri/config' + artifactName: 'cri-config-$(System.JobName)' + displayName: "Publish Test Config" +- task: PublishPipelineArtifact@1 + inputs: + targetPath: '$(Pipeline.Workspace)/s/modules/python/clusterloader2/cri/results' + artifactName: 'cri-results-$(System.JobName)' + displayName: "Publish CRI Benchmark Results" diff --git a/steps/topology/k8s-resource-pressure/node-reader-daemonset.yml b/steps/topology/k8s-resource-pressure/node-reader-daemonset.yml new file mode 100644 index 0000000000..7eded386ac --- /dev/null +++ b/steps/topology/k8s-resource-pressure/node-reader-daemonset.yml @@ -0,0 +1,86 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: node-reader + namespace: kube-system + labels: + app: node-reader +spec: + selector: + matchLabels: + app: node-reader + template: + metadata: + labels: + app: node-reader + spec: + hostPID: true + nodeSelector: + cri-resource-consume: "true" + tolerations: + - key: cri-resource-consume + operator: "Equal" + value: "true" + effect: "NoSchedule" + - key: cri-resource-consume + operator: "Equal" + value: "true" + effect: "NoExecute" + containers: + - name: node-reader + image: alpine:latest + securityContext: + privileged: true + command: + - /bin/sh + - -c + - | + apk add --no-cache jq + + if [ -f /app/config/node_reader.json ]; then + rm -f /app/config/node_reader.json + fi + + jq -n '{ + "sys_oom_kill": 0, + "pod_oom_kill": 0, + "sys_pgscan": 0, + "pod_pgscan": 0 + }' > /app/config/node_reader.json + + mem_cap=$(grep MemTotal /proc/meminfo | awk '{print $2}') + + while true; do + sys_oom_kill=$(grep -w oom_kill /sys/fs/cgroup/system.slice/memory.events | awk '{print $2}') + pod_oom_kill=$(grep -w oom_kill /sys/fs/cgroup/kubepods.slice/memory.events | awk '{print $2}') + jq --arg sys_oom_kill "$sys_oom_kill" --arg pod_oom_kill "$pod_oom_kill" '.sys_oom_kill = ($sys_oom_kill | tonumber) | .pod_oom_kill = ($pod_oom_kill | tonumber)' /app/config/node_reader.json > node_reader.json.tmp && mv node_reader.json.tmp /app/config/node_reader.json + + sys_pgscan=$(grep -w pgscan /sys/fs/cgroup/system.slice/memory.stat | awk '{print $2}') + pod_pgscan=$(grep -w pgscan /sys/fs/cgroup/kubepods.slice/memory.stat | awk '{print $2}') + jq --argjson sys_pgscan "$sys_pgscan" --argjson pod_pgscan "$pod_pgscan" --argjson mem_cap "$mem_cap" '.sys_pgscan = ($sys_pgscan/$mem_cap) | .pod_pgscan = ($pod_pgscan/$mem_cap)' /app/config/node_reader.json > node_reader.json.tmp && mv node_reader.json.tmp /app/config/node_reader.json + + sleep 1 + done + volumeMounts: + - name: proc + mountPath: /proc + readOnly: true + - name: systemd + mountPath: /run/systemd + readOnly: true + - name: node-reader-config + mountPath: /app/config + volumes: + - name: proc + hostPath: + path: /proc + type: Directory + - name: systemd + hostPath: + path: /run/systemd + type: Directory + - name: node-reader-config + hostPath: + path: /tmp/telescope-node-reader-data + type: DirectoryOrCreate + restartPolicy: Always diff --git a/steps/topology/k8s-resource-pressure/validate-resources.yml b/steps/topology/k8s-resource-pressure/validate-resources.yml index e0efb27b4f..9f4074788f 100644 --- a/steps/topology/k8s-resource-pressure/validate-resources.yml +++ b/steps/topology/k8s-resource-pressure/validate-resources.yml @@ -14,3 +14,52 @@ steps: - template: /steps/engine/clusterloader2/large-cluster/validate.yml parameters: desired_nodes: 3 +- script: | + set -euo pipefail + set -x + + if [ $USE_CUSTOM_KUBELET = "True" ]; then + echo "Applying custom kubelet configuration via Helm chart..." + flags_string=$(echo "$CUSTOM_EVICTION_FLAGS" | jq -r ' + def format_value(k; v): + if (v|type) == "object" then + (if (k == "eviction-hard") or (k == "eviction-soft") then "<" + else "=" end) as $delim + | (v | to_entries | map(.key + $delim + (.value|tostring)) | join(",")) + elif (v|type) == "array" then + (v | map(tostring) | join(",")) + else + (v | tostring) + end; + + to_entries + | map("--" + .key + "=" + (format_value(.key; .value))) + | join(" ") + ') + + printf '%s' "$flags_string" > "kubelet_flags.txt" + + helm upgrade --install kubelet-config-updater ./steps/topology/k8s-resource-pressure/chart \ + --namespace kube-system --create-namespace \ + --set-file kubeletFlags="kubelet_flags.txt" + + sleep 5 + fi + + kubectl get pods -A -o wide + + env: + CUSTOM_EVICTION_FLAGS: $(CUSTOM_KUBELET_CONFIG) + + displayName: "Validate Kubelet Custom Config Applied" + +- script: | + set -euo pipefail + set -x + + kubectl apply -f ./steps/topology/k8s-resource-pressure/node-reader-daemonset.yml + + sleep 5 + kubectl get pods -A -o wide + + displayName: "Start Node Reader"