feat: allow for configuring auto_scaler_profile (#278)

davidspek · web-flow · commit c3753fea9884 · 2023-01-17T16:01:07.000+08:00
* feat: allow for configuring auto_scaler_profile

Signed-off-by: David van der Spek &lt;vanderspek.david@gmail.com&gt;
diff --git a/README.md b/README.md
diff --git a/locals.tf b/locals.tf
@@ -1,4 +1,6 @@
 locals {
+  # Abstract if auto_scaler_profile_scale_down_delay_after_delete is not set or null we should use the scan_interval.
+  auto_scaler_profile_scale_down_delay_after_delete = var.auto_scaler_profile_scale_down_delay_after_delete == null ? var.auto_scaler_profile_scan_interval : var.auto_scaler_profile_scale_down_delay_after_delete
   # automatic upgrades are either:
   # - null
   # - patch, but then the kubernetes_version must not specify a patch number and orchestrator_version must be null
diff --git a/main.tf b/main.tf
@@ -97,6 +97,29 @@ resource "azurerm_kubernetes_cluster" "main" {
       subnet_name = var.aci_connector_linux_subnet_name
     }
   }
+  dynamic "auto_scaler_profile" {
+    for_each = var.auto_scaler_profile_enabled ? ["default_auto_scaler_profile"] : []
+
+    content {
+      balance_similar_node_groups      = var.auto_scaler_profile_balance_similar_node_groups
+      empty_bulk_delete_max            = var.auto_scaler_profile_empty_bulk_delete_max
+      expander                         = var.auto_scaler_profile_expander
+      max_graceful_termination_sec     = var.auto_scaler_profile_max_graceful_termination_sec
+      max_node_provisioning_time       = var.auto_scaler_profile_max_node_provisioning_time
+      max_unready_nodes                = var.auto_scaler_profile_max_unready_nodes
+      max_unready_percentage           = var.auto_scaler_profile_max_unready_percentage
+      new_pod_scale_up_delay           = var.auto_scaler_profile_new_pod_scale_up_delay
+      scale_down_delay_after_add       = var.auto_scaler_profile_scale_down_delay_after_add
+      scale_down_delay_after_delete    = local.auto_scaler_profile_scale_down_delay_after_delete
+      scale_down_delay_after_failure   = var.auto_scaler_profile_scale_down_delay_after_failure
+      scale_down_unneeded              = var.auto_scaler_profile_scale_down_unneeded
+      scale_down_unready               = var.auto_scaler_profile_scale_down_unready
+      scale_down_utilization_threshold = var.auto_scaler_profile_scale_down_utilization_threshold
+      scan_interval                    = var.auto_scaler_profile_scan_interval
+      skip_nodes_with_local_storage    = var.auto_scaler_profile_skip_nodes_with_local_storage
+      skip_nodes_with_system_pods      = var.auto_scaler_profile_skip_nodes_with_system_pods
+    }
+  }
   dynamic "azure_active_directory_role_based_access_control" {
     for_each = var.role_based_access_control_enabled && var.rbac_aad && var.rbac_aad_managed ? ["rbac"] : []
 
diff --git a/test/unit/unit_test.go b/test/unit/unit_test.go
@@ -199,6 +199,32 @@ func TestInvalidVarsForAutomaticUpgrades(t *testing.T) {
 	}
 }
 
+func TestScaleDownDelayAfterDeleteNotSetShouldUseScanInterval(t *testing.T) {
+	test_helper.RunE2ETest(t, "../../", "unit-test-fixture", terraform.Options{
+		Upgrade: false,
+		Vars:    dummyRequiredVariables(),
+	}, func(t *testing.T, output test_helper.TerraformOutput) {
+		scaleDownDelayAfterDelete, ok := output["auto_scaler_profile_scale_down_delay_after_delete"].(string)
+		assert.True(t, ok)
+		scanInterval, ok := output["auto_scaler_profile_scan_interval"].(string)
+		assert.True(t, ok)
+		assert.Equal(t, scanInterval, scaleDownDelayAfterDelete)
+	})
+}
+
+func TestScaleDownDelayAfterDeleteSetShouldUseVar(t *testing.T) {
+	vars := dummyRequiredVariables()
+	vars["auto_scaler_profile_scale_down_delay_after_delete"] = "15s"
+	test_helper.RunE2ETest(t, "../../", "unit-test-fixture", terraform.Options{
+		Upgrade: false,
+		Vars:    vars,
+	}, func(t *testing.T, output test_helper.TerraformOutput) {
+		scaleDownDelayAfterDelete, ok := output["auto_scaler_profile_scale_down_delay_after_delete"].(string)
+		assert.True(t, ok)
+		assert.Equal(t, "15s", scaleDownDelayAfterDelete)
+	})
+}
+
 func dummyRequiredVariables() map[string]interface{} {
 	return map[string]interface{}{
 		"prefix":              "foo",
diff --git a/unit-test-fixture/outputs.tf b/unit-test-fixture/outputs.tf
@@ -13,3 +13,11 @@ output "log_analytics_workspace" {
 output "automatic_channel_upgrade_check" {
   value = local.automatic_channel_upgrade_check
 }
+
+output "auto_scaler_profile_scale_down_delay_after_delete" {
+  value = local.auto_scaler_profile_scale_down_delay_after_delete
+}
+
+output "auto_scaler_profile_scan_interval" {
+  value = var.auto_scaler_profile_scan_interval
+}
diff --git a/variables.tf b/variables.tf
@@ -93,6 +93,119 @@ variable "api_server_authorized_ip_ranges" {
   default     = null
 }
 
+variable "auto_scaler_profile_balance_similar_node_groups" {
+  description = "Detect similar node groups and balance the number of nodes between them. Defaults to `false`."
+  type        = bool
+  default     = false
+}
+
+variable "auto_scaler_profile_empty_bulk_delete_max" {
+  description = "Maximum number of empty nodes that can be deleted at the same time. Defaults to `10`."
+  type        = number
+  default     = 10
+}
+
+variable "auto_scaler_profile_enabled" {
+  type        = bool
+  description = "Enable configuring the auto scaler profile"
+  default     = false
+  nullable    = false
+}
+
+variable "auto_scaler_profile_expander" {
+  description = "Expander to use. Possible values are `least-waste`, `priority`, `most-pods` and `random`. Defaults to `random`."
+  type        = string
+  default     = "random"
+  validation {
+    condition     = contains(["least-waste", "most-pods", "priority", "random"], var.auto_scaler_profile_expander)
+    error_message = "Must be either `least-waste`, `most-pods`, `priority` or `random`."
+  }
+}
+
+variable "auto_scaler_profile_max_graceful_termination_sec" {
+  description = "Maximum number of seconds the cluster autoscaler waits for pod termination when trying to scale down a node. Defaults to `600`."
+  type        = string
+  default     = "600"
+}
+
+variable "auto_scaler_profile_max_node_provisioning_time" {
+  description = "Maximum time the autoscaler waits for a node to be provisioned. Defaults to `15m`."
+  type        = string
+  default     = "15m"
+}
+
+variable "auto_scaler_profile_max_unready_nodes" {
+  description = "Maximum Number of allowed unready nodes. Defaults to `3`."
+  type        = number
+  default     = 3
+}
+
+variable "auto_scaler_profile_max_unready_percentage" {
+  description = "Maximum percentage of unready nodes the cluster autoscaler will stop if the percentage is exceeded. Defaults to `45`."
+  type        = number
+  default     = 45
+}
+
+variable "auto_scaler_profile_new_pod_scale_up_delay" {
+  description = "For scenarios like burst/batch scale where you don't want CA to act before the kubernetes scheduler could schedule all the pods, you can tell CA to ignore unscheduled pods before they're a certain age. Defaults to `10s`."
+  type        = string
+  default     = "10s"
+}
+
+variable "auto_scaler_profile_scale_down_delay_after_add" {
+  description = "How long after the scale up of AKS nodes the scale down evaluation resumes. Defaults to `10m`."
+  type        = string
+  default     = "10m"
+}
+
+variable "auto_scaler_profile_scale_down_delay_after_delete" {
+  description = "How long after node deletion that scale down evaluation resumes. Defaults to the value used for `scan_interval`."
+  type        = string
+  default     = null
+}
+
+variable "auto_scaler_profile_scale_down_delay_after_failure" {
+  description = "How long after scale down failure that scale down evaluation resumes. Defaults to `3m`."
+  type        = string
+  default     = "3m"
+}
+
+variable "auto_scaler_profile_scale_down_unneeded" {
+  description = "How long a node should be unneeded before it is eligible for scale down. Defaults to `10m`."
+  type        = string
+  default     = "10m"
+}
+
+variable "auto_scaler_profile_scale_down_unready" {
+  description = "How long an unready node should be unneeded before it is eligible for scale down. Defaults to `20m`."
+  type        = string
+  default     = "20m"
+}
+
+variable "auto_scaler_profile_scale_down_utilization_threshold" {
+  description = "Node utilization level, defined as sum of requested resources divided by capacity, below which a node can be considered for scale down. Defaults to `0.5`."
+  type        = string
+  default     = "0.5"
+}
+
+variable "auto_scaler_profile_scan_interval" {
+  description = "How often the AKS Cluster should be re-evaluated for scale up/down. Defaults to `10s`."
+  type        = string
+  default     = "10s"
+}
+
+variable "auto_scaler_profile_skip_nodes_with_local_storage" {
+  description = "If `true` cluster autoscaler will never delete nodes with pods with local storage, for example, EmptyDir or HostPath. Defaults to `true`."
+  type        = bool
+  default     = true
+}
+
+variable "auto_scaler_profile_skip_nodes_with_system_pods" {
+  description = "If `true` cluster autoscaler will never delete nodes with pods from kube-system (except for DaemonSet or mirror pods). Defaults to `true`."
+  type        = bool
+  default     = true
+}
+
 variable "automatic_channel_upgrade" {
   type        = string
   default     = null