Mitigate CI flakiness (#70)

* Increase SSH connection timeouts and retries

* Make MetalLB timeouts configurable

* Retry applying MetalLB CRs

* Fix location of MetalLB CRs template

* Make MetalLB wait logic more compact

* Fix typo

* retrigger 1

* retrigger 2

* retrigger 3

* retrigger 4

* retrigger 5
This commit is contained in:
Simon Leiner 2022-09-08 01:47:58 +02:00 committed by GitHub
parent 4365a2a54b
commit 60bc09b085
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 50 additions and 56 deletions

View File

@ -50,6 +50,8 @@ jobs:
run: molecule test --scenario-name ${{ matrix.scenario }}
env:
ANSIBLE_K3S_LOG_DIR: ${{ runner.temp }}/logs/k3s-ansible/${{ matrix.scenario }}
ANSIBLE_SSH_RETRIES: 4
ANSIBLE_TIMEOUT: 60
- name: Upload log files
if: always() # do this even if a step before has failed

View File

@ -0,0 +1,3 @@
---
# Timeout to wait for MetalLB services to come up
metal_lb_available_timeout: 60s

View File

@ -26,52 +26,43 @@
with_items: "{{ groups['master'] }}"
run_once: true
- name: Wait for metallb controller to be running
- name: Wait for MetalLB resources
command: >-
k3s kubectl wait deployment -n metallb-system controller --for condition=Available=True --timeout=60s
k3s kubectl wait {{ item.resource }}
--namespace='metallb-system'
{% if item.name | default(False) -%} {{ item.name }} {%- endif %}
{% if item.selector | default(False) -%} --selector='{{ item.selector }}' {%- endif %}
{% if item.condition | default(False) -%} {{ item.condition }} {%- endif %}
--timeout='{{ metal_lb_available_timeout }}'
changed_when: false
with_items: "{{ groups['master'] }}"
run_once: true
- name: Wait for metallb webhook service to be running
command: >-
k3s kubectl wait -n metallb-system --for=jsonpath='{.status.phase}'=Running pods \
--selector component=controller --timeout=60s
changed_when: false
with_items: "{{ groups['master'] }}"
run_once: true
- name: Wait for metallb pods in replicasets
command: >-
k3s kubectl wait pods -n metallb-system --for condition=Ready \
--selector component=controller,app=metallb --timeout=60s
changed_when: false
with_items: "{{ groups['master'] }}"
run_once: true
- name: Wait for the metallb controller readyReplicas
command: >-
k3s kubectl wait -n metallb-system --for=jsonpath='{.status.readyReplicas}'=1 replicasets \
--selector component=controller,app=metallb --timeout=60s
changed_when: false
with_items: "{{ groups['master'] }}"
run_once: true
- name: Wait for the metallb controller fullyLabeledReplicas
command: >-
k3s kubectl wait -n metallb-system --for=jsonpath='{.status.fullyLabeledReplicas}'=1 replicasets \
--selector component=controller,app=metallb --timeout=60s
changed_when: false
with_items: "{{ groups['master'] }}"
run_once: true
- name: Wait for the metallb controller availableReplicas
command: >-
k3s kubectl wait -n metallb-system --for=jsonpath='{.status.availableReplicas}'=1 replicasets \
--selector component=controller,app=metallb --timeout=60s
changed_when: false
with_items: "{{ groups['master'] }}"
run_once: true
with_items:
- description: controller
resource: deployment
name: controller
condition: --for condition=Available=True
- description: webhook service
resource: pod
selector: component=controller
condition: --for=jsonpath='{.status.phase}'=Running
- description: pods in replica sets
resource: pod
selector: component=controller,app=metallb
condition: --for condition=Ready
- description: ready replicas of controller
resource: replicaset
selector: component=controller,app=metallb
condition: --for=jsonpath='{.status.readyReplicas}'=1
- description: fully labeled replicas of controller
resource: replicaset
selector: component=controller,app=metallb
condition: --for=jsonpath='{.status.fullyLabeledReplicas}'=1
- description: available replicas of controller
resource: replicaset
selector: component=controller,app=metallb
condition: --for=jsonpath='{.status.availableReplicas}'=1
loop_control:
label: "{{ item.description }}"
- name: Test metallb-system webhook-service endpoint
command: >-
@ -83,25 +74,23 @@
- name: Apply metallb CRs
command: >-
k3s kubectl apply -f /tmp/k3s/metallb-crs.yaml
--timeout='{{ metal_lb_available_timeout }}'
register: this
changed_when: false
with_items: "{{ groups['master'] }}"
run_once: true
until: this.rc == 0
retries: 5
- name: Test metallb-system IPAddressPool
- name: Test metallb-system resources
command: >-
k3s kubectl -n metallb-system get IPAddressPool
k3s kubectl -n metallb-system get {{ item }}
changed_when: false
with_items: "{{ groups['master'] }}"
run_once: true
with_items:
- IPAddressPool
- L2Advertisement
- name: Test metallb-system L2Advertisement
command: >-
k3s kubectl -n metallb-system get L2Advertisement
changed_when: false
with_items: "{{ groups['master'] }}"
run_once: true
- name: Remove tmp director used for manifests
- name: Remove tmp directory used for manifests
file:
path: /tmp/k3s
state: absent