forked from pingcap/tiflow
-
Notifications
You must be signed in to change notification settings - Fork 0
181 lines (148 loc) · 6.73 KB
/
chaos.yaml
File metadata and controls
181 lines (148 loc) · 6.73 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
name: chaos
on:
schedule:
- cron: '40 16-23 * * *' # run at minute 0 and 40 every hour from 00:00 ~ 07:00 UTC+8
jobs:
pre_job:
runs-on: ubuntu-latest
timeout-minutes: 40
outputs:
should_skip: ${{ steps.skip_check.outputs.should_skip }}
steps:
- id: skip_check
uses: fkirc/skip-duplicate-actions@master
with:
# All of these options are optional, so you can remove them if you are happy with the defaults
# https://github.com/marketplace/actions/skip-duplicate-actions
concurrent_skipping: 'never'
skip_after_successful_duplicate: 'true'
paths_ignore: '["**/README.md"]'
cancel_others: 'true'
do_not_skip: '["workflow_dispatch", "schedule"]' # only skip pull_request
# This workflow contains a single job called "main_job"
main_job:
needs: pre_job
if: ${{ needs.pre_job.outputs.should_skip != 'true' }}
# The type of runner that the job will run on
runs-on: ubuntu-latest
timeout-minutes: 40
strategy:
fail-fast: false
matrix:
chaos-obj: ["pod-failure-cdc", "pod-kill-cdc", "network-partition-cdc", "network-emulation-cdc", "io-chaos-cdc"]
# Steps represent a sequence of tasks that will be executed as part of the job
steps:
- name: Set up Go
uses: actions/setup-go@v2
with:
go-version: '1.16.4'
- name: Print Go version
run: go version
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
- name: Check out code
uses: actions/checkout@v2
- name: Set up K3s cluster
run: |
curl -sfL https://get.k3s.io | INSTALL_K3S_VERSION=v1.18.9+k3s1 sh -s - \
--write-kubeconfig-mode=644 \
"${k3s_disable_command:---disable}" metrics-server \
"${k3s_disable_command:---disable}" traefik \
--flannel-backend=none \
--docker
shell: bash
# this may be failed sometimes, and I want to exit the workflow directly if failed,
# but GitHub Actions doesnt' support early-exit yet, see https://github.com/actions/runner/issues/662.
# so, simply wait for a long time.
- name: Wait for coredns
run: |
kubectl rollout status --watch --timeout 600s deployment/coredns -n kube-system
shell: bash
env:
KUBECONFIG: /etc/rancher/k3s/k3s.yaml
- name: Export KUBECONFIG environment variable
run: |
echo 'KUBECONFIG=/etc/rancher/k3s/k3s.yaml' >> $GITHUB_ENV
shell: bash
- name: Print cluster information
run: |
kubectl config view
kubectl cluster-info
kubectl get nodes
kubectl get pods -n kube-system
kubectl get sc
kubectl version
- name: Install tidb-operator
run: |
echo "install helm 3"
curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/master/scripts/get-helm-3
chmod 700 get_helm.sh
./get_helm.sh
echo "install tidb-operator CRDs"
kubectl apply -f https://raw.githubusercontent.com/pingcap/tidb-operator/v1.1.12/manifests/crd.yaml
kubectl get crd
echo "install tidb-operator into namespace playground"
helm repo add pingcap https://charts.pingcap.org/
kubectl create namespace playground
helm install --namespace playground tidb-operator pingcap/tidb-operator --version v1.1.12
- name: Wait for tidb-operator Ready
run: |
kubectl wait --for=condition=Ready pod -l app.kubernetes.io/instance=tidb-operator -n playground --timeout=300s || true
kubectl get pods --namespace playground -l app.kubernetes.io/instance=tidb-operator
- name: Deploy Upstream / Downstream TiDB Cluster into playground
run: |
kubectl apply -f $GITHUB_WORKSPACE/chaos/manifests/upstream.yaml -n playground
kubectl apply -f $GITHUB_WORKSPACE/chaos/manifests/downstream.yaml -n playground
sleep 100
kubectl wait --for=condition=Ready pod -l app.kubernetes.io/managed-by=tidb-operator -n playground --timeout=300s || true
kubectl get all -n playground
- name: Init Upstream / Downstream TiDB Cluster
run: |
kubectl run -i --rm --image=mysql --restart=Never mysql-client -n playground -- mysql -h upstream-tidb -P4000 -e "CREATE DATABASE bank"
kubectl run -i --rm --image=mysql --restart=Never mysql-client -n playground -- mysql -h downstream-tidb -P4000 -e "CREATE DATABASE bank"
- name: Deploy CDC into playground
run: |
docker build -t cdc:chaos $GITHUB_WORKSPACE
kubectl apply -f $GITHUB_WORKSPACE/chaos/manifests/ticdc.yaml -n playground
echo "wait for TiCDC Ready"
kubectl wait --for=condition=Ready pod/cdc-0 -n playground --timeout=300s || true
echo "show pvc"
kubectl get pvc -n playground
- name: Create CDC changefeed
run: |
kubectl exec -i cdc-0 -n playground -- ./cdc cli changefeed create --pd=http://upstream-pd.playground.svc:2379 --sink-uri="mysql://root@downstream-tidb.playground.svc:4000/"
- name: Deploy bank workload into playground
run: |
docker build -f $GITHUB_WORKSPACE/chaos/Dockerfile.chaos -t bank:chaos $GITHUB_WORKSPACE
docker image list
kubectl apply -f $GITHUB_WORKSPACE/chaos/manifests/workload.yaml -n playground
kubectl get -f $GITHUB_WORKSPACE/chaos/manifests/workload.yaml -n playground
kubectl describe -f $GITHUB_WORKSPACE/chaos/manifests/workload.yaml -n playground
- name: Encode chaos-mesh action
run: |
echo CFG_BASE64=$(base64 -w 0 $GITHUB_WORKSPACE/chaos/manifests/${{ matrix.chaos-obj }}.yaml) >> $GITHUB_ENV
- name: Run chaos mesh action
uses: chaos-mesh/chaos-mesh-action@master
env:
CFG_BASE64: ${{ env.CFG_BASE64 }}
CHAOS_MESH_VERSION: v1.0.0
- name: Wait for chaos test case complete
run: |
$GITHUB_WORKSPACE/chaos/checker.sh
- name: Collect logs
if: ${{ always() }}
run: |
mkdir ./logs
sudo cp -r -L /var/log/containers/. ./logs
sudo find /var/ -type f -regex '.*cdc.*.log$' | sudo xargs -i cp {} ./logs || true
sudo chown -R runner ./logs
# Update logs as artifact seems not stable, so we set `continue-on-error: true` here.
- name: Upload logs
continue-on-error: true
uses: actions/upload-artifact@v2
if: ${{ always() }}
with:
name: chaos-base-logs.${{ matrix.chaos-obj }}
path: |
./logs
!./logs/coredns-*
!./logs/local-path-provisioner-*