Hello,
I’m trying to change the distribution of pod replicas on a k8s infrastructure we manage through TerraForm.
For the moment they are all on the same node, and I see it by getting the pod in question:
kubectl get pod auth-iam-7cf5cfbfc5-q4djz -o wide --sort-by=.spec.nodeName
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
auth-iam-7cf5cfbfc5-q4djz 4/4 Running 0 5h28m 10.XYZ.XYZ.XX ip-10-XYZ-XYX-YY.eu-central-1.compute.internal <none> <none>
kubectl get pod auth-iam-7cf5cfbfc5-q4djz -o wide --sort-by=.status.nodeName
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
auth-iam-7cf5cfbfc5-q4djz 4/4 Running 0 5h26m 10.XYZ.XYZ.XX ip-10-XYZ-XYX-YY.eu-central-1.compute.internal <none> <none>
I want to distribute the replicas of this pod so I modified the manifest through TerraForm, inserting a podAntiAffinity block in the spec of the template.
# affinity rule requires distribution of IAM pod replicas among nodes with different hostname
affinity {
pod_anti_affinity {
required_during_scheduling_ignored_during_execution {
label_selector {
match_expressions {
key = "component"
operator = "In"
values = ["iam"]
}
}
topology_key = "kubernetes.io/hostname"
}
}
}
It builds correctly and deploys. Upon deployment I can see the new pod spinning up, and as soon as it reaches the desired number of replicas, the old pod is shut down. But all of the replicas are again on the same pod, so kubectl get pod auth-iam-7cf5cfbfc5-q4djz -o wide --sort-by=.status.nodeName
and kubectl get pod auth-iam-7cf5cfbfc5-q4djz -o wide --sort-by=.spec.nodeName
still show all of the pod replicas on one node.
For context and ease of reproduction of the issue, here is the code for the deployment resource where the template for the pod is defined:
resource "kubernetes_deployment" "iam" {
metadata {
name = var.service_name
namespace = var.namespace
labels = local.deployment_labels
}
spec {
replicas = 1
selector {
match_labels = local.match_labels
}
strategy {
type = var.pod.strategy.type
rolling_update {
max_surge = var.pod.strategy.max_surge
max_unavailable = var.pod.strategy.max_unavailable
}
}
template {
metadata {
labels = local.deployment_labels
annotations = var.pod.annotations
}
spec {
service_account_name = kubernetes_service_account.iam.metadata.0.name
automount_service_account_token = true
image_pull_secrets {
name = var.docker_pull_secret_name
}
init_container {
name = "${var.service_name}-init"
image = var.containers.init.image
command = var.containers.init.command
args = var.containers.init.args
dynamic "volume_mount" {
for_each = local.init_volumes
content {
name = volume_mount.key
mount_path = volume_mount.value
}
}
}
# affinity rule requires distribution of IAM pod replicas among nodes with different hostname
affinity {
pod_anti_affinity {
required_during_scheduling_ignored_during_execution {
label_selector {
match_expressions {
key = "component"
operator = "In"
values = ["iam"]
}
}
topology_key = "kubernetes.io/hostname"
}
}
}
# IAM
container {
name = var.service_name
image = var.containers.iam.image
command = var.containers.iam.command
args = var.containers.iam.args
security_context {
run_as_user = var.containers.iam.security_context_user_id
}
dynamic "env" {
for_each = local.local_env_vars
content {
name = env.key
value = env.value
}
}
env_from {
config_map_ref {
name = var.containers.iam.env_config_map_name
}
}
port {
container_port = var.containers.iam.port
}
resources {
limits = var.containers.iam.resources.limits
requests = var.containers.iam.resources.requests
}
liveness_probe {
http_get {
path = var.containers.iam.liveness.path
port = var.containers.iam.liveness.port
}
initial_delay_seconds = var.containers.iam.liveness.delay
}
readiness_probe {
http_get {
path = var.containers.iam.readiness.path
port = var.containers.iam.readiness.port
}
initial_delay_seconds = var.containers.iam.readiness.delay
}
dynamic "volume_mount" {
for_each = local.iam_volumes
content {
name = volume_mount.key
mount_path = volume_mount.value
}
}
}
# Promtail
container {
name = var.containers.promtail.name
image = var.containers.promtail.image
command = var.containers.promtail.command
args = var.containers.promtail.args
resources {
limits = var.containers.promtail.resources.limits
requests = var.containers.promtail.resources.requests
}
dynamic "volume_mount" {
for_each = local.promtail_volumes
content {
name = volume_mount.key
mount_path = volume_mount.value
}
}
}
# configmap volumes
dynamic "volume" {
for_each = var.volumes.configmaps
content {
name = volume.key
config_map {
name = volume.value.configmap
}
}
}
}
}
}
}
locals {
# IAM container volume mounts
iam_volume_list = flatten([
for volume_type, volume_obj in var.volumes : [
for volume_name, volume_detail in volume_obj : {
name = volume_name
mount_point = lookup(volume_detail.mount_points, "iam", "")
}
]
])
iam_volumes = {
for v in local.iam_volume_list : v.name => v.mount_point if v.mount_point != ""
}
# init container volume mounts
init_volume_list = flatten([
for volume_type, volume_obj in var.volumes : [
for volume_name, volume_detail in volume_obj : {
name = volume_name
mount_point = lookup(volume_detail.mount_points, "init", "")
}
]
])
init_volumes = {
for v in local.init_volume_list : v.name => v.mount_point if v.mount_point != ""
}
# promtail container volume mounts
promtail_volume_list = flatten([
for volume_type, volume_obj in var.volumes : [
for volume_name, volume_detail in volume_obj : {
name = volume_name
mount_point = lookup(volume_detail.mount_points, "promtail", "")
}
]
])
promtail_volumes = {
for v in local.promtail_volume_list : v.name => v.mount_point if v.mount_point != ""
}
}
Cluster information:
Kubernetes version: (server version) 1.21.4-eks
Cloud being used: AWS
Installation method: TerraForm
Host OS: ubuntu-focal-20.04-amd64 AMI
CNI and version: don’t have access to this info
CRI and version: docker://20.10.7