Add Cilium CNI option (#435)

* Add Cilium CNI option

* Tweak version checks and add BGP resource verify

* Update metallb detection for kube-vip feat compat
This commit is contained in:
sholdee 2024-01-29 19:29:13 -06:00 committed by GitHub
parent bcd37a6904
commit 6ffc25dfe5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 383 additions and 10 deletions

View File

@ -13,6 +13,7 @@ jobs:
- ipv6 - ipv6
- single_node - single_node
- calico - calico
- cilium
- kube-vip - kube-vip
fail-fast: false fail-fast: false
env: env:

View File

@ -13,9 +13,27 @@ flannel_iface: "eth0"
# uncomment calico_iface to use tigera operator/calico cni instead of flannel https://docs.tigera.io/calico/latest/about # uncomment calico_iface to use tigera operator/calico cni instead of flannel https://docs.tigera.io/calico/latest/about
# calico_iface: "eth0" # calico_iface: "eth0"
calico_ebpf: false # use eBPF dataplane instead of iptables calico_ebpf: false # use eBPF dataplane instead of iptables
calico_cidr: "10.52.0.0/16" # calico cluster pod cidr pool
calico_tag: "v3.27.0" # calico version tag calico_tag: "v3.27.0" # calico version tag
# uncomment cilium_iface to use cilium cni instead of flannel or calico
# ensure v4.19.57, v5.1.16, v5.2.0 or more recent kernel
# cilium_iface: "eth0"
cilium_mode: "native" # native when nodes on same subnet or using bgp, else set routed
cilium_tag: "v1.14.6" # cilium version tag
cilium_hubble: true # enable hubble observability relay and ui
# if using calico or cilium, you may specify the cluster pod cidr pool
cluster_cidr: "10.52.0.0/16"
# enable cilium bgp control plane for lb services and pod cidrs. disables metallb.
cilium_bgp: false
# bgp parameters for cilium cni. only active when cilium_iface is defined and cilium_bgp is true.
cilium_bgp_my_asn: "64513"
cilium_bgp_peer_asn: "64512"
cilium_bgp_peer_address: "192.168.30.1"
cilium_bgp_lb_cidr: "192.168.31.0/24" # cidr for cilium loadbalancer ipam
# apiserver_endpoint is virtual ip-address which will be configured on each master # apiserver_endpoint is virtual ip-address which will be configured on each master
apiserver_endpoint: "192.168.30.222" apiserver_endpoint: "192.168.30.222"
@ -26,25 +44,25 @@ k3s_token: "some-SUPER-DEDEUPER-secret-password"
# The IP on which the node is reachable in the cluster. # The IP on which the node is reachable in the cluster.
# Here, a sensible default is provided, you can still override # Here, a sensible default is provided, you can still override
# it for each of your hosts, though. # it for each of your hosts, though.
k3s_node_ip: "{{ ansible_facts[(calico_iface | default(flannel_iface))]['ipv4']['address'] }}" k3s_node_ip: "{{ ansible_facts[(cilium_iface | default(calico_iface | default(flannel_iface)))]['ipv4']['address'] }}"
# Disable the taint manually by setting: k3s_master_taint = false # Disable the taint manually by setting: k3s_master_taint = false
k3s_master_taint: "{{ true if groups['node'] | default([]) | length >= 1 else false }}" k3s_master_taint: "{{ true if groups['node'] | default([]) | length >= 1 else false }}"
# these arguments are recommended for servers as well as agents: # these arguments are recommended for servers as well as agents:
extra_args: >- extra_args: >-
{{ '--flannel-iface=' + flannel_iface if calico_iface is not defined else '' }} {{ '--flannel-iface=' + flannel_iface if calico_iface is not defined and cilium_iface is not defined else '' }}
--node-ip={{ k3s_node_ip }} --node-ip={{ k3s_node_ip }}
# change these to your liking, the only required are: --disable servicelb, --tls-san {{ apiserver_endpoint }} # change these to your liking, the only required are: --disable servicelb, --tls-san {{ apiserver_endpoint }}
# the contents of the if block is also required if using calico # the contents of the if block is also required if using calico or cilium
extra_server_args: >- extra_server_args: >-
{{ extra_args }} {{ extra_args }}
{{ '--node-taint node-role.kubernetes.io/master=true:NoSchedule' if k3s_master_taint else '' }} {{ '--node-taint node-role.kubernetes.io/master=true:NoSchedule' if k3s_master_taint else '' }}
{% if calico_iface is defined %} {% if calico_iface is defined or cilium_iface is defined %}
--flannel-backend=none --flannel-backend=none
--disable-network-policy --disable-network-policy
--cluster-cidr={{ calico_cidr | default('10.52.0.0/16') }} --cluster-cidr={{ cluster_cidr | default('10.52.0.0/16') }}
{% endif %} {% endif %}
--tls-san {{ apiserver_endpoint }} --tls-san {{ apiserver_endpoint }}
--disable servicelb --disable servicelb

View File

@ -15,6 +15,8 @@ We have these scenarios:
Very similar to the default scenario, but uses only a single node for all cluster functionality. Very similar to the default scenario, but uses only a single node for all cluster functionality.
- **calico**: - **calico**:
The same as single node, but uses calico cni instead of flannel. The same as single node, but uses calico cni instead of flannel.
- **cilium**:
The same as single node, but uses cilium cni instead of flannel.
- **kube-vip** - **kube-vip**
The same as single node, but uses kube-vip as service loadbalancer instead of MetalLB The same as single node, but uses kube-vip as service loadbalancer instead of MetalLB

View File

@ -0,0 +1,49 @@
---
dependency:
name: galaxy
driver:
name: vagrant
platforms:
- name: control1
box: generic/ubuntu2204
memory: 4096
cpus: 4
config_options:
# We currently can not use public-key based authentication on Ubuntu 22.04,
# see: https://github.com/chef/bento/issues/1405
ssh.username: "vagrant"
ssh.password: "vagrant"
groups:
- k3s_cluster
- master
interfaces:
- network_name: private_network
ip: 192.168.30.63
provisioner:
name: ansible
env:
ANSIBLE_VERBOSITY: 1
playbooks:
converge: ../resources/converge.yml
side_effect: ../resources/reset.yml
verify: ../resources/verify.yml
inventory:
links:
group_vars: ../../inventory/sample/group_vars
scenario:
test_sequence:
- dependency
- cleanup
- destroy
- syntax
- create
- prepare
- converge
# idempotence is not possible with the playbook in its current form.
- verify
# We are repurposing side_effect here to test the reset playbook.
# This is why we do not run it before verify (which tests the cluster),
# but after the verify step.
- side_effect
- cleanup
- destroy

View File

@ -0,0 +1,16 @@
---
- name: Apply overrides
hosts: all
tasks:
- name: Override host variables
ansible.builtin.set_fact:
# See:
# https://github.com/flannel-io/flannel/blob/67d603aaf45ef80f5dd39f43714fc5e6f8a637eb/Documentation/troubleshooting.md#Vagrant
cilium_iface: eth1
# The test VMs might be a bit slow, so we give them more time to join the cluster:
retry_count: 45
# Make sure that our IP ranges do not collide with those of the other scenarios
apiserver_endpoint: "192.168.30.225"
metal_lb_ip_range: "192.168.30.110-192.168.30.119"

View File

@ -29,7 +29,7 @@
- name: Deploy metallb manifest - name: Deploy metallb manifest
include_tasks: metallb.yml include_tasks: metallb.yml
tags: metallb tags: metallb
when: kube_vip_lb_ip_range is not defined when: kube_vip_lb_ip_range is not defined and (not cilium_bgp or cilium_iface is not defined)
- name: Deploy kube-vip manifest - name: Deploy kube-vip manifest
include_tasks: kube-vip.yml include_tasks: kube-vip.yml

View File

@ -0,0 +1,253 @@
---
- name: Prepare Cilium CLI on first master and deploy CNI
when: ansible_hostname == hostvars[groups[group_name_master | default('master')][0]]['ansible_hostname']
run_once: true
block:
- name: Create tmp directory on first master
file:
path: /tmp/k3s
state: directory
owner: root
group: root
mode: 0755
- name: Check if Cilium CLI is installed
ansible.builtin.command: cilium version
register: cilium_cli_installed
failed_when: false
changed_when: false
ignore_errors: true
- name: Check for Cilium CLI version in command output
set_fact:
installed_cli_version: >-
{{
cilium_cli_installed.stdout_lines
| join(' ')
| regex_findall('cilium-cli: (v\d+\.\d+\.\d+)')
| first
| default('unknown')
}}
when: cilium_cli_installed.rc == 0
- name: Get latest stable Cilium CLI version file
ansible.builtin.get_url:
url: "https://raw.githubusercontent.com/cilium/cilium-cli/main/stable.txt"
dest: "/tmp/k3s/cilium-cli-stable.txt"
owner: root
group: root
mode: 0755
- name: Read Cilium CLI stable version from file
ansible.builtin.command: cat /tmp/k3s/cilium-cli-stable.txt
register: cli_ver
changed_when: false
- name: Log installed Cilium CLI version
ansible.builtin.debug:
msg: "Installed Cilium CLI version: {{ installed_cli_version | default('Not installed') }}"
- name: Log latest stable Cilium CLI version
ansible.builtin.debug:
msg: "Latest Cilium CLI version: {{ cli_ver.stdout }}"
- name: Determine if Cilium CLI needs installation or update
set_fact:
cilium_cli_needs_update: >-
{{
cilium_cli_installed.rc != 0 or
(cilium_cli_installed.rc == 0 and
installed_cli_version != cli_ver.stdout)
}}
- name: Install or update Cilium CLI
when: cilium_cli_needs_update
block:
- name: Set architecture variable
ansible.builtin.set_fact:
cli_arch: "{{ 'arm64' if ansible_architecture == 'aarch64' else 'amd64' }}"
- name: Download Cilium CLI and checksum
ansible.builtin.get_url:
url: "{{ cilium_base_url }}/cilium-linux-{{ cli_arch }}{{ item }}"
dest: "/tmp/k3s/cilium-linux-{{ cli_arch }}{{ item }}"
owner: root
group: root
mode: 0755
loop:
- ".tar.gz"
- ".tar.gz.sha256sum"
vars:
cilium_base_url: "https://github.com/cilium/cilium-cli/releases/download/{{ cli_ver.stdout }}"
- name: Verify the downloaded tarball
ansible.builtin.shell: |
cd /tmp/k3s && sha256sum --check cilium-linux-{{ cli_arch }}.tar.gz.sha256sum
args:
executable: /bin/bash
changed_when: false
- name: Extract Cilium CLI to /usr/local/bin
ansible.builtin.unarchive:
src: "/tmp/k3s/cilium-linux-{{ cli_arch }}.tar.gz"
dest: /usr/local/bin
remote_src: true
- name: Remove downloaded tarball and checksum file
ansible.builtin.file:
path: "{{ item }}"
state: absent
loop:
- "/tmp/k3s/cilium-linux-{{ cli_arch }}.tar.gz"
- "/tmp/k3s/cilium-linux-{{ cli_arch }}.tar.gz.sha256sum"
- name: Wait for connectivity to kube VIP
ansible.builtin.command: ping -c 1 {{ apiserver_endpoint }}
register: ping_result
until: ping_result.rc == 0
retries: 21
delay: 1
ignore_errors: true
changed_when: false
- name: Fail if kube VIP not reachable
ansible.builtin.fail:
msg: "API endpoint {{ apiserver_endpoint }} is not reachable"
when: ping_result.rc != 0
- name: Test for existing Cilium install
ansible.builtin.command: k3s kubectl -n kube-system get daemonsets cilium
register: cilium_installed
failed_when: false
changed_when: false
ignore_errors: true
- name: Check existing Cilium install
when: cilium_installed.rc == 0
block:
- name: Check Cilium version
ansible.builtin.command: cilium version
register: cilium_version
failed_when: false
changed_when: false
ignore_errors: true
- name: Parse installed Cilium version
set_fact:
installed_cilium_version: >-
{{
cilium_version.stdout_lines
| join(' ')
| regex_findall('cilium image.+(\d+\.\d+\.\d+)')
| first
| default('unknown')
}}
- name: Determine if Cilium needs update
set_fact:
cilium_needs_update: >-
{{ 'v' + installed_cilium_version != cilium_tag }}
- name: Log result
ansible.builtin.debug:
msg: >
Installed Cilium version: {{ installed_cilium_version }},
Target Cilium version: {{ cilium_tag }},
Update needed: {{ cilium_needs_update }}
- name: Install Cilium
ansible.builtin.command: >-
{% if cilium_installed.rc != 0 %}
cilium install
{% else %}
cilium upgrade
{% endif %}
--version "{{ cilium_tag }}"
--helm-set operator.replicas="1"
{{ '--helm-set devices=' + cilium_iface if cilium_iface != 'auto' else '' }}
--helm-set ipam.operator.clusterPoolIPv4PodCIDRList={{ cluster_cidr }}
{% if cilium_mode == "native" or (cilium_bgp and cilium_exportPodCIDR != 'false') %}
--helm-set ipv4NativeRoutingCIDR={{ cluster_cidr }}
{% endif %}
--helm-set k8sServiceHost={{ apiserver_endpoint }}
--helm-set k8sServicePort="6443"
--helm-set routingMode={{ cilium_mode | default("native") }}
--helm-set autoDirectNodeRoutes={{ "true" if cilium_mode == "native" else "false" }}
--helm-set kubeProxyReplacement={{ kube_proxy_replacement | default("true") }}
--helm-set bpf.masquerade={{ enable_bpf_masquerade | default("true") }}
--helm-set bgpControlPlane.enabled={{ cilium_bgp | default("false") }}
--helm-set hubble.enabled={{ "true" if cilium_hubble else "false" }}
--helm-set hubble.relay.enabled={{ "true" if cilium_hubble else "false" }}
--helm-set hubble.ui.enabled={{ "true" if cilium_hubble else "false" }}
{% if kube_proxy_replacement is not false %}
--helm-set bpf.loadBalancer.algorithm={{ bpf_lb_algorithm | default("maglev") }}
--helm-set bpf.loadBalancer.mode={{ bpf_lb_mode | default("hybrid") }}
{% endif %}
environment:
KUBECONFIG: /home/{{ ansible_user }}/.kube/config
register: cilium_install_result
changed_when: cilium_install_result.rc == 0
when: cilium_installed.rc != 0 or cilium_needs_update
- name: Wait for Cilium resources
command: >-
{% if item.type == 'daemonset' %}
k3s kubectl wait pods
--namespace=kube-system
--selector='k8s-app=cilium'
--for=condition=Ready
{% else %}
k3s kubectl wait {{ item.type }}/{{ item.name }}
--namespace=kube-system
--for=condition=Available
{% endif %}
--timeout=7s
register: cr_result
changed_when: false
until: cr_result is succeeded
retries: 30
delay: 7
with_items:
- {name: cilium-operator, type: deployment}
- {name: cilium, type: daemonset, selector: 'k8s-app=cilium'}
- {name: hubble-relay, type: deployment, check_hubble: true}
- {name: hubble-ui, type: deployment, check_hubble: true}
loop_control:
label: "{{ item.type }}/{{ item.name }}"
when: >-
not item.check_hubble | default(false) or (item.check_hubble | default(false) and cilium_hubble)
- name: Configure Cilium BGP
when: cilium_bgp
block:
- name: Copy BGP manifests to first master
ansible.builtin.template:
src: "cilium.crs.j2"
dest: /tmp/k3s/cilium-bgp.yaml
owner: root
group: root
mode: 0755
- name: Apply BGP manifests
ansible.builtin.command:
cmd: kubectl apply -f /tmp/k3s/cilium-bgp.yaml
register: apply_cr
changed_when: "'configured' in apply_cr.stdout or 'created' in apply_cr.stdout"
failed_when: "'is invalid' in apply_cr.stderr"
ignore_errors: true
- name: Print error message if BGP manifests application fails
ansible.builtin.debug:
msg: "{{ apply_cr.stderr }}"
when: "'is invalid' in apply_cr.stderr"
- name: Test for BGP config resources
ansible.builtin.command: "{{ item }}"
loop:
- k3s kubectl get CiliumBGPPeeringPolicy.cilium.io
- k3s kubectl get CiliumLoadBalancerIPPool.cilium.io
changed_when: false
loop_control:
label: "{{ item }}"

View File

@ -2,12 +2,17 @@
- name: Deploy calico - name: Deploy calico
include_tasks: calico.yml include_tasks: calico.yml
tags: calico tags: calico
when: calico_iface is defined when: calico_iface is defined and cilium_iface is not defined
- name: Deploy cilium
include_tasks: cilium.yml
tags: cilium
when: cilium_iface is defined
- name: Deploy metallb pool - name: Deploy metallb pool
include_tasks: metallb.yml include_tasks: metallb.yml
tags: metallb tags: metallb
when: kube_vip_lb_ip_range is not defined when: kube_vip_lb_ip_range is not defined and (not cilium_bgp or cilium_iface is not defined)
- name: Remove tmp directory used for manifests - name: Remove tmp directory used for manifests
file: file:

View File

@ -10,7 +10,7 @@ spec:
# Note: The ipPools section cannot be modified post-install. # Note: The ipPools section cannot be modified post-install.
ipPools: ipPools:
- blockSize: {{ calico_blockSize | default('26') }} - blockSize: {{ calico_blockSize | default('26') }}
cidr: {{ calico_cidr | default('10.52.0.0/16') }} cidr: {{ cluster_cidr | default('10.52.0.0/16') }}
encapsulation: {{ calico_encapsulation | default('VXLANCrossSubnet') }} encapsulation: {{ calico_encapsulation | default('VXLANCrossSubnet') }}
natOutgoing: {{ calico_natOutgoing | default('Enabled') }} natOutgoing: {{ calico_natOutgoing | default('Enabled') }}
nodeSelector: {{ calico_nodeSelector | default('all()') }} nodeSelector: {{ calico_nodeSelector | default('all()') }}

View File

@ -0,0 +1,29 @@
apiVersion: "cilium.io/v2alpha1"
kind: CiliumBGPPeeringPolicy
metadata:
name: 01-bgp-peering-policy
spec: # CiliumBGPPeeringPolicySpec
virtualRouters: # []CiliumBGPVirtualRouter
- localASN: {{ cilium_bgp_my_asn }}
exportPodCIDR: {{ cilium_exportPodCIDR | default('true') }}
neighbors: # []CiliumBGPNeighbor
- peerAddress: '{{ cilium_bgp_peer_address + "/32"}}'
peerASN: {{ cilium_bgp_peer_asn }}
eBGPMultihopTTL: 10
connectRetryTimeSeconds: 120
holdTimeSeconds: 90
keepAliveTimeSeconds: 30
gracefulRestart:
enabled: true
restartTimeSeconds: 120
serviceSelector:
matchExpressions:
- {key: somekey, operator: NotIn, values: ['never-used-value']}
---
apiVersion: "cilium.io/v2alpha1"
kind: CiliumLoadBalancerIPPool
metadata:
name: "01-lb-pool"
spec:
cidrs:
- cidr: "{{ cilium_bgp_lb_cidr }}"