Skip to content

Commit e6e3a3d

Browse files
committed
feat: add Velero to EKS
1 parent cafbc7a commit e6e3a3d

File tree

6 files changed

+286
-1
lines changed

6 files changed

+286
-1
lines changed

terraform/aws/aws-eks/README.md

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ In particular it creates:
1010
- Cloud Watch, EFS, CSI Snapshots, Kube Server, Kube State Metrics addons
1111
- K8S metrics server on `system` nodepool
1212
- ExternalDNS
13+
- Velero - for backup - scheduled backups for each configured namespaces and cluster-backup
1314
- Private DNS Route53 zone
1415
- Configure storage, ingress, node pool classes
1516
- Configure app namespaces (quota, limits, networkpolicies)
@@ -99,3 +100,34 @@ k9s
99100
```bash
100101
make list-pod-identity-associations [ENV=dev]
101102
```
103+
104+
### Backups
105+
106+
```bash
107+
# create instant backup from scheduled backup
108+
velero backup create cluster-daily-backup1 --from-schedule cluster-daily-backup
109+
velero backup create learning-backup1 --from-schedule learning-daily-backup
110+
111+
# list backups
112+
velero get backups
113+
velero backup get
114+
115+
# list schedules
116+
velero get schedules
117+
velero schedule get
118+
119+
# describe particular backup
120+
velero backup describe learning-backup1
121+
# get list of object, details about CSI snapshots etc
122+
velero backup describe learning-backup1 --details
123+
124+
# get log happenned during backup
125+
velero backup log learning-backup1
126+
127+
128+
# delete particular backup
129+
velero backup delete learning-backup1 --confirm
130+
131+
# delete all backups
132+
velero backup delete --all --confirm
133+
```
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
resource "aws_s3_bucket" "backup" {
2+
bucket = "${local.account_id}-${local.prefix}-velero-backups"
3+
force_destroy = "true"
4+
object_lock_enabled = "false"
5+
}
6+
7+
8+
resource "aws_s3_bucket_server_side_encryption_configuration" "backup_enc" {
9+
bucket = aws_s3_bucket.backup.id
10+
11+
rule {
12+
apply_server_side_encryption_by_default {
13+
sse_algorithm = "AES256"
14+
}
15+
16+
bucket_key_enabled = "true"
17+
}
18+
}
19+
20+
21+
resource "aws_s3_bucket_versioning" "backup_versioning" {
22+
bucket = aws_s3_bucket.backup.id
23+
versioning_configuration {
24+
status = "Disabled"
25+
}
26+
}
27+
28+
29+
data "aws_iam_policy_document" "backup_irsa_trust_policy" {
30+
statement {
31+
effect = "Allow"
32+
actions = ["sts:AssumeRoleWithWebIdentity"]
33+
principals {
34+
type = "Federated"
35+
identifiers = [aws_iam_openid_connect_provider.oidc_provider.arn]
36+
}
37+
condition {
38+
test = "StringEquals"
39+
variable = "${aws_iam_openid_connect_provider.oidc_provider.url}:sub"
40+
values = ["system:serviceaccount:velero:velero-server"]
41+
}
42+
}
43+
}
44+
45+
46+
resource "aws_iam_role" "backup_irsa" {
47+
name = "${local.prefix}-velero-irsa"
48+
assume_role_policy = data.aws_iam_policy_document.backup_irsa_trust_policy.json
49+
}
50+
51+
resource "aws_iam_role_policy_attachment" "backup_irsa" {
52+
policy_arn = aws_iam_policy.velero-policy.arn
53+
role = aws_iam_role.backup_irsa.name
54+
}
55+
56+
57+
resource "aws_iam_policy" "velero-policy" {
58+
description = "Allow pass ReadOnlyAccess role to Tools"
59+
name = "${local.prefix}-velero-irsa"
60+
61+
policy = <<EOF
62+
{
63+
"Version": "2012-10-17",
64+
"Statement": [
65+
{
66+
"Effect": "Allow",
67+
"Action": [
68+
"ec2:DescribeVolumes",
69+
"ec2:DescribeSnapshots",
70+
"ec2:CreateTags",
71+
"ec2:CreateVolume",
72+
"ec2:CreateSnapshot",
73+
"ec2:DeleteSnapshot"
74+
],
75+
"Resource": "*"
76+
},
77+
{
78+
"Effect": "Allow",
79+
"Action": [
80+
"s3:GetObject",
81+
"s3:DeleteObject",
82+
"s3:PutObject",
83+
"s3:AbortMultipartUpload",
84+
"s3:ListMultipartUploadParts"
85+
],
86+
"Resource": [
87+
"arn:aws:s3:::${aws_s3_bucket.backup.bucket}/*"
88+
]
89+
},
90+
{
91+
"Effect": "Allow",
92+
"Action": [
93+
"s3:ListBucket"
94+
],
95+
"Resource": [
96+
"arn:aws:s3:::${aws_s3_bucket.backup.bucket}"
97+
]
98+
}
99+
]
100+
}
101+
EOF
102+
}

terraform/aws/aws-eks/module/cluster-config-chart/templates/storage-classes.yaml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,3 +26,16 @@ metadata:
2626
provisioner: efs.csi.aws.com
2727
reclaimPolicy: Delete
2828
volumeBindingMode: Immediate
29+
---
30+
apiVersion: snapshot.storage.k8s.io/v1
31+
kind: VolumeSnapshotClass
32+
metadata:
33+
name: ebs-csi
34+
labels:
35+
velero.io/csi-volumesnapshot-class: "true"
36+
driver: ebs.csi.eks.amazonaws.com
37+
# flag does not matter as
38+
# per
39+
# https://github.com/vmware-tanzu/velero/blob/release-1.16/design/clean_artifacts_in_csi_flow.md
40+
# VolumeSnapshot and VolumeSnapshotContent are deleted when Backup is created automatically either way
41+
deletionPolicy: Retain # or Delete

terraform/aws/aws-eks/module/configure-cluster.sh

Lines changed: 54 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#!/usr/bin/env bash
22

3-
ACCOUNT_ID="${1:?CLUSTER_NAME is required}"
3+
ACCOUNT_ID="${1:?ACCOUNT_ID is required}"
44
CLUSTER_NAME="${2:?CLUSTER_NAME is required}"
55
REGION="${3:?REGION is required}"
66
NAMESPACES="${4:?NAMESPACES is required}"
@@ -107,6 +107,8 @@ spec:
107107
useClusterChecksRunners: true
108108
orchestratorExplorer:
109109
enabled: true
110+
npm:
111+
enabled: true
110112
usm:
111113
enabled: true
112114
apm:
@@ -168,13 +170,64 @@ EOF
168170
}
169171
}
170172

173+
function ensure-backup() {
174+
helm repo add vmware-tanzu https://vmware-tanzu.github.io/helm-charts
175+
helm repo update
176+
helm upgrade --install velero -n velero --create-namespace vmware-tanzu/velero -f "${DIRNAME}/velero.yaml" \
177+
--set configuration.backupStorageLocation[0].name="default" \
178+
--set configuration.backupStorageLocation[0].provider="aws" \
179+
--set configuration.backupStorageLocation[0].bucket="${ACCOUNT_ID}-${CLUSTER_NAME}-velero-backups" \
180+
--set configuration.backupStorageLocation[0].config.region="${REGION}" \
181+
--set configuration.volumeSnapshotLocation[0].name="default" \
182+
--set configuration.volumeSnapshotLocation[0].provider="aws" \
183+
--set configuration.volumeSnapshotLocation[0].config.region="${REGION}" \
184+
--set serviceAccount.server.annotations."eks\\.amazonaws\\.com/role-arn"="arn:aws:iam::${ACCOUNT_ID}:role/${CLUSTER_NAME}-velero-irsa"
185+
186+
for NAMESPACE in $(echo "${NAMESPACES}" | jq -cr '.[]'); do
187+
188+
NS="$(echo "${NAMESPACE}" | jq -r ".name")"
189+
190+
cat <<EOF | oc apply -f -
191+
apiVersion: velero.io/v1
192+
kind: Schedule
193+
metadata:
194+
name: ${NS}-daily-backup
195+
namespace: velero
196+
spec:
197+
schedule: "0 1 * * *" # Daily at 1 AM, so RPO is 24h
198+
template:
199+
includedNamespaces:
200+
- ${NS}
201+
ttl: 168h0m0s # 7 days retention
202+
EOF
203+
done
204+
205+
NAMESPACES_NAMES="$(echo "${NAMESPACES}" | jq -cr '[.[].name]')"
206+
cat <<EOF | oc apply -f -
207+
apiVersion: velero.io/v1
208+
kind: Schedule
209+
metadata:
210+
name: cluster-daily-backup
211+
namespace: velero
212+
spec:
213+
schedule: "0 2 * * *" # Daily at 2 AM, RPO = 24h
214+
template:
215+
includedNamespaces:
216+
- "*" # all namespaces
217+
excludedNamespaces: ${NAMESPACES_NAMES}
218+
includeClusterResources: true
219+
ttl: 168h # 7 days retention
220+
EOF
221+
222+
}
171223
# Main
172224
login-to-eks
173225
ensure-cluster-config
174226
ensure-datadog-agent
175227
configure-namespaces
176228
ensure-nginx
177229
ensure-externaldns
230+
ensure-backup
178231

179232
# TODO install SecretManager integration
180233
# https://github.com/aws/secrets-store-csi-driver-provider-aws

terraform/aws/aws-eks/module/eks.tf

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,5 +234,6 @@ resource "null_resource" "cluster-config" {
234234
aws_eks_access_entry.admin,
235235
aws_eks_access_policy_association.admin,
236236
aws_eks_pod_identity_association.externaldns,
237+
aws_iam_role_policy_attachment.backup_irsa,
237238
]
238239
}
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
# helm show values vmware-tanzu/velero
2+
3+
image:
4+
repository: velero/velero
5+
tag: latest
6+
pullPolicy: Always
7+
8+
9+
nodeSelector:
10+
karpenter.sh/nodepool: "system"
11+
12+
tolerations:
13+
- key: "CriticalAddonsOnly"
14+
operator: "Exists"
15+
- effect: "NoExecute"
16+
operator: "Exists"
17+
tolerationSeconds: 300
18+
19+
# For AWS
20+
# configuration:
21+
# backupStorageLocation:
22+
# - bucket: $BUCKET
23+
# provider: aws
24+
# volumeSnapshotLocation:
25+
# - config:
26+
# region: $REGION
27+
# provider: aws
28+
# Init containers to add to the Velero deployment's pod spec. At least one plugin provider image is required.
29+
# If the value is a string then it is evaluated as a template.
30+
initContainers:
31+
- name: velero-plugin-for-aws
32+
image: velero/velero-plugin-for-aws:latest
33+
imagePullPolicy: Always
34+
volumeMounts:
35+
- mountPath: /target
36+
name: plugins
37+
38+
credentials:
39+
# Whether a secret should be used. Set to false if, for examples:
40+
# - using kube2iam or kiam to provide AWS IAM credentials instead of providing the key file. (AWS only)
41+
# - using workload identity instead of providing the key file. (Azure/GCP only)
42+
useSecret: false
43+
44+
# IRSA for Velero
45+
# serviceAccount:
46+
# server:
47+
# annotations:
48+
# eks.amazonaws.com/role-arn: "arn:aws:iam::${ACCOUNT}:role/eks-velero-backup"
49+
50+
# init container uses bitnamilegacy image - but it is no more working
51+
kubectl:
52+
image:
53+
repository: docker.io/bitnamilegacy/kubectl
54+
tag: latest
55+
56+
configuration:
57+
# Must have to
58+
features: EnableCSI
59+
# When POD has no annotation like: backup.velero.io/backup-volumes: <<name-of-volume-in-the-pod>>
60+
# then if it is EBS and VolumeSnapshotClass with
61+
# label velero.io/csi-volumesnapshot-class=true exists
62+
# velero will use it to snapshot EBS volumes.
63+
# Otherwise EFS volumes will be skipped in backup.
64+
#
65+
# If defaultVolumesToFsBackup is true then all volumes
66+
# without the annotation will be treated as FS volumes. EBS volumes will not be snapshotted.
67+
defaultVolumesToFsBackup: false
68+
69+
# For FS backups (like EGS)
70+
deployNodeAgent: true
71+
nodeAgent:
72+
resources:
73+
requests:
74+
cpu: 100m
75+
memory: 256Mi
76+
limits:
77+
cpu: 1000m
78+
memory: 512Mi
79+
tolerations:
80+
- key: "CriticalAddonsOnly"
81+
operator: "Exists"
82+
- effect: "NoExecute"
83+
operator: "Exists"
84+
tolerationSeconds: 300

0 commit comments

Comments
 (0)