diff --git a/.ansible-lint b/.ansible-lint index 01d5c43..940e20e 100644 --- a/.ansible-lint +++ b/.ansible-lint @@ -9,8 +9,9 @@ exclude_paths: # The "converge" and "reset" playbooks use import_playbook in # conjunction with the "env" lookup plugin, which lets the # syntax check of ansible-lint fail. - - '**/converge.yml' - - '**/reset.yml' + - 'molecule/**/converge.yml' + - 'molecule/**/prepare.yml' + - 'molecule/**/reset.yml' skip_list: - 'fqcn-builtins' diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index ba15fe8..f0b4662 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -15,6 +15,7 @@ jobs: matrix: scenario: - default + - ipv6 - single_node fail-fast: false env: @@ -25,16 +26,19 @@ jobs: uses: actions/checkout@2541b1294d2704b0964813337f33b291d3f8596b # 3.0.2 - name: Configure VirtualBox - run: >- - sudo mkdir -p /etc/vbox && - echo "* 192.168.30.0/24" | sudo tee -a /etc/vbox/networks.conf > /dev/null + run: |- + sudo mkdir -p /etc/vbox + cat < /dev/null + * 192.168.30.0/24 + * fdad:bad:ba55::/64 + EOF - name: Cache Vagrant boxes uses: actions/cache@fd5de65bc895cf536527842281bea11763fefd77 # 3.0.8 with: path: | ~/.vagrant.d/boxes - key: vagrant-boxes-${{ hashFiles('**/Vagrantfile') }} + key: vagrant-boxes-${{ hashFiles('**/molecule.yml') }} restore-keys: | vagrant-boxes diff --git a/README.md b/README.md index 3ce05a1..88f174f 100644 --- a/README.md +++ b/README.md @@ -29,6 +29,7 @@ on processor architecture: ## ✅ System requirements - Deployment environment must have Ansible 2.4.0+. If you need a quick primer on Ansible [you can check out my docs and setting up Ansible](https://docs.technotim.live/posts/ansible-automation/). + Furthermore, the [`netaddr` package](https://pypi.org/project/netaddr/) must be available to Ansible. If you have installed Ansible via apt, this is already taken care of. If you have installed Ansible via `pip`, make sure to install `netaddr` into the respective virtual environment. - `server` and `agent` nodes should have passwordless SSH access, if not you can supply arguments to provide credentials `--ask-pass --ask-become-pass` to each command. ## 🚀 Getting Started diff --git a/collections/requirements.yml b/collections/requirements.yml index 1a8f389..0d176b4 100644 --- a/collections/requirements.yml +++ b/collections/requirements.yml @@ -1,5 +1,6 @@ --- collections: + - name: ansible.utils - name: community.general - name: ansible.posix - name: kubernetes.core diff --git a/example/service.yml b/example/service.yml index 2ba6c7d..a309465 100644 --- a/example/service.yml +++ b/example/service.yml @@ -4,6 +4,7 @@ kind: Service metadata: name: nginx spec: + ipFamilyPolicy: PreferDualStack selector: app: nginx ports: diff --git a/molecule/README.md b/molecule/README.md index e14e902..25cfdad 100644 --- a/molecule/README.md +++ b/molecule/README.md @@ -8,6 +8,9 @@ We have these scenarios: - **default**: A 3 control + 2 worker node cluster based very closely on the [sample inventory](../inventory/sample/). +- **ipv6**: + A cluster that is externally accessible via IPv6 ([more information](ipv6/README.md)) + To save a bit of test time, this cluster is _not_ highly available, it consists of only one control and one worker node. - **single_node**: Very similar to the default scenario, but uses only a single node for all cluster functionality. @@ -32,6 +35,7 @@ To set the subnet up for use with VirtualBox, please make sure that `/etc/vbox/n ``` * 192.168.30.0/24 +* fdad:bad:ba55::/64 ``` ### Install Python dependencies diff --git a/molecule/ipv6/README.md b/molecule/ipv6/README.md new file mode 100644 index 0000000..eaaeeab --- /dev/null +++ b/molecule/ipv6/README.md @@ -0,0 +1,35 @@ +# Sample IPv6 configuration for `k3s-ansible` + +This scenario contains a cluster configuration which is _IPv6 first_, but still supports dual-stack networking with IPv4 for most things. +This means: + +- The API server VIP is an IPv6 address. +- The MetalLB pool consists of both IPv4 and IPv4 addresses. +- Nodes as well as cluster-internal resources (pods and services) are accessible via IPv4 as well as IPv6. + +## Network design + +All IPv6 addresses used in this scenario share a single `/48` prefix: `fdad:bad:ba55`. +The following subnets are used: + +- `fdad:bad:ba55:`**`0`**`::/64` is the subnet which contains the cluster components meant for external access. + That includes: + + - The VIP for the Kubernetes API server: `fdad:bad:ba55::333` + - Services load-balanced by MetalLB: `fdad:bad:ba55::1b:0/112` + - Cluster nodes: `fdad:bad:ba55::de:0/112` + - The host executing Vagrant: `fdad:bad:ba55::1` + + In a home lab setup, this might be your LAN. + +- `fdad:bad:ba55:`**`4200`**`::/56` is used internally by the cluster for pods. + +- `fdad:bad:ba55:`**`4300`**`::/108` is used internally by the cluster for services. + +IPv4 networking is also available: + +- The nodes have addresses inside `192.168.123.0/24`. + MetalLB also has a bit of address space in this range: `192.168.123.80-192.168.123.90` +- For pods and services, the k3s defaults (`10.42.0.0/16` and `10.43.0.0/16)` are used. + +Note that the host running Vagrant is not part any of these IPv4 networks. diff --git a/molecule/ipv6/host_vars/control1.yml b/molecule/ipv6/host_vars/control1.yml new file mode 100644 index 0000000..aa675db --- /dev/null +++ b/molecule/ipv6/host_vars/control1.yml @@ -0,0 +1,3 @@ +--- +node_ipv4: 192.168.123.11 +node_ipv6: fdad:bad:ba55::de:11 diff --git a/molecule/ipv6/host_vars/node1.yml b/molecule/ipv6/host_vars/node1.yml new file mode 100644 index 0000000..57ba927 --- /dev/null +++ b/molecule/ipv6/host_vars/node1.yml @@ -0,0 +1,3 @@ +--- +node_ipv4: 192.168.123.21 +node_ipv6: fdad:bad:ba55::de:21 diff --git a/molecule/ipv6/molecule.yml b/molecule/ipv6/molecule.yml new file mode 100644 index 0000000..760f944 --- /dev/null +++ b/molecule/ipv6/molecule.yml @@ -0,0 +1,57 @@ +--- +dependency: + name: galaxy +driver: + name: vagrant +platforms: + - &control + name: control1 + box: generic/ubuntu2204 + memory: 2048 + cpus: 2 + config_options: + # We currently can not use public-key based authentication on Ubuntu 22.04, + # see: https://github.com/chef/bento/issues/1405 + ssh.username: "vagrant" + ssh.password: "vagrant" + groups: + - k3s_cluster + - master + interfaces: + - network_name: private_network + ip: fdad:bad:ba55::de:11 + - <<: *control + name: node1 + groups: + - k3s_cluster + - node + interfaces: + - network_name: private_network + ip: fdad:bad:ba55::de:21 +provisioner: + name: ansible + playbooks: + converge: ../resources/converge.yml + side_effect: ../resources/reset.yml + verify: ../resources/verify.yml + inventory: + links: + group_vars: ../../inventory/sample/group_vars +scenario: + test_sequence: + - dependency + - lint + - cleanup + - destroy + - syntax + - create + - prepare + - converge + # idempotence is not possible with the playbook in its current form. + - verify + # We are repurposing side_effect here to test the reset playbook. + # This is why we do not run it before verify (which tests the cluster), + # but after the verify step. + - side_effect + - cleanup + - destroy diff --git a/molecule/ipv6/overrides.yml b/molecule/ipv6/overrides.yml new file mode 100644 index 0000000..ae1717a --- /dev/null +++ b/molecule/ipv6/overrides.yml @@ -0,0 +1,43 @@ +--- +- name: Apply overrides + hosts: all + tasks: + - name: Override host variables (1/2) + ansible.builtin.set_fact: + # See: https://github.com/flannel-io/flannel/blob/67d603aaf45ef80f5dd39f43714fc5e6f8a637eb/Documentation/troubleshooting.md#Vagrant # noqa yaml[line-length] + flannel_iface: eth1 + + # The test VMs might be a bit slow, so we give them more time to join the cluster: + retry_count: 45 + + # IPv6 configuration + # ###################################################################### + + # The API server will be reachable on IPv6 only + apiserver_endpoint: fdad:bad:ba55::333 + + # We give MetalLB address space for both IPv4 and IPv6 + metal_lb_ip_range: + - fdad:bad:ba55::1b:0/112 + - 192.168.123.80-192.168.123.90 + + # k3s_node_ip is by default set to the IPv4 address of flannel_iface. + # We want IPv6 addresses here of course, so we just specify them + # manually below. + k3s_node_ip: "{{ node_ipv4 }},{{ node_ipv6 }}" + + - name: Override host variables (2/2) + # Since "extra_args" depends on "k3s_node_ip" and "flannel_iface" we have + # to set this AFTER overriding the both of them. + ansible.builtin.set_fact: + # A few extra server args are necessary: + # - the network policy needs to be disabled. + # - we need to manually specify the subnets for services and pods, as + # the default has IPv4 ranges only. + extra_server_args: >- + {{ extra_args }} + --disable servicelb + --disable traefik + --disable-network-policy + --cluster-cidr=10.42.0.0/16,fdad:bad:ba55:4200::/56 + --service-cidr=10.43.0.0/16,fdad:bad:ba55:4300::/108 diff --git a/molecule/ipv6/prepare.yml b/molecule/ipv6/prepare.yml new file mode 100644 index 0000000..cea50d8 --- /dev/null +++ b/molecule/ipv6/prepare.yml @@ -0,0 +1,51 @@ +--- +- name: Apply overrides + ansible.builtin.import_playbook: >- + {{ lookup("ansible.builtin.env", "MOLECULE_SCENARIO_DIRECTORY") }}/overrides.yml + +- name: Configure dual-stack networking + hosts: all + become: true + + # Unfortunately, as of 2022-09, Vagrant does not support the configuration + # of both IPv4 and IPv6 addresses for a single network adapter. So we have + # to configure that ourselves. + # Moreover, we have to explicitly enable IPv6 for the loopback interface. + + tasks: + - name: Enable IPv6 for network interfaces + ansible.posix.sysctl: + name: net.ipv6.conf.{{ item }}.disable_ipv6 + value: "0" + with_items: + - all + - default + - lo + + - name: Disable duplicate address detection + # Duplicate address detection did repeatedly fail within the virtual + # network. But since this setup does not use SLAAC anyway, we can safely + # disable it. + ansible.posix.sysctl: + name: net.ipv6.conf.{{ item }}.accept_dad + value: "0" + with_items: + - "{{ flannel_iface }}" + + - name: Write IPv4 configuration + ansible.builtin.template: + src: 55-flannel-ipv4.yaml.j2 + dest: /etc/netplan/55-flannel-ipv4.yaml + owner: root + group: root + mode: 0644 + register: netplan_template + + - name: Apply netplan configuration + # Conceptually, this should be a handler rather than a task. + # However, we are currently not in a role context - creating + # one just for this seemed overkill. + when: netplan_template.changed + ansible.builtin.command: + cmd: netplan apply + changed_when: true diff --git a/molecule/ipv6/templates/55-flannel-ipv4.yaml.j2 b/molecule/ipv6/templates/55-flannel-ipv4.yaml.j2 new file mode 100644 index 0000000..6f68777 --- /dev/null +++ b/molecule/ipv6/templates/55-flannel-ipv4.yaml.j2 @@ -0,0 +1,8 @@ +--- +network: + version: 2 + renderer: networkd + ethernets: + {{ flannel_iface }}: + addresses: + - {{ node_ipv4 }}/24 diff --git a/molecule/resources/verify/from_outside/tasks/test/deploy-example.yml b/molecule/resources/verify/from_outside/tasks/test/deploy-example.yml index 5c8b148..d150c44 100644 --- a/molecule/resources/verify/from_outside/tasks/test/deploy-example.yml +++ b/molecule/resources/verify/from_outside/tasks/test/deploy-example.yml @@ -34,7 +34,7 @@ - name: Assert that the nginx welcome page is available ansible.builtin.uri: - url: http://{{ ip }}:{{ port }}/ + url: http://{{ ip | ansible.utils.ipwrap }}:{{ port }}/ return_content: yes register: result failed_when: "'Welcome to nginx!' not in result.content" diff --git a/requirements.txt b/requirements.txt index b849848..9fe5fcb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,4 +4,5 @@ jsonpatch kubernetes>=12.0.0 molecule-vagrant>=1.0.0 molecule>=4.0.1 +netaddr>=0.8.0 pyyaml>=3.11 diff --git a/roles/k3s/master/tasks/main.yml b/roles/k3s/master/tasks/main.yml index 6668194..7e6ecf1 100644 --- a/roles/k3s/master/tasks/main.yml +++ b/roles/k3s/master/tasks/main.yml @@ -152,10 +152,10 @@ owner: "{{ ansible_user }}" mode: "u=rw,g=,o=" -- name: Configure kubectl cluster to https://{{ apiserver_endpoint }}:6443 +- name: Configure kubectl cluster to https://{{ apiserver_endpoint | ansible.utils.ipwrap }}:6443 command: >- k3s kubectl config set-cluster default - --server=https://{{ apiserver_endpoint }}:6443 + --server=https://{{ apiserver_endpoint | ansible.utils.ipwrap }}:6443 --kubeconfig ~{{ ansible_user }}/.kube/config changed_when: true diff --git a/roles/k3s/master/templates/vip.yaml.j2 b/roles/k3s/master/templates/vip.yaml.j2 index 6f2db68..2629398 100644 --- a/roles/k3s/master/templates/vip.yaml.j2 +++ b/roles/k3s/master/templates/vip.yaml.j2 @@ -33,7 +33,7 @@ spec: - name: vip_interface value: {{ flannel_iface }} - name: vip_cidr - value: "32" + value: "{{ apiserver_endpoint | ansible.utils.ipsubnet | ansible.utils.ipaddr('prefix') }}" - name: cp_enable value: "true" - name: cp_namespace diff --git a/roles/k3s/node/templates/k3s.service.j2 b/roles/k3s/node/templates/k3s.service.j2 index 01baa64..67abadb 100644 --- a/roles/k3s/node/templates/k3s.service.j2 +++ b/roles/k3s/node/templates/k3s.service.j2 @@ -7,7 +7,7 @@ After=network-online.target Type=notify ExecStartPre=-/sbin/modprobe br_netfilter ExecStartPre=-/sbin/modprobe overlay -ExecStart=/usr/local/bin/k3s agent --server https://{{ apiserver_endpoint }}:6443 --token {{ hostvars[groups['master'][0]]['token'] | default(k3s_token) }} {{ extra_agent_args | default("") }} +ExecStart=/usr/local/bin/k3s agent --server https://{{ apiserver_endpoint | ansible.utils.ipwrap }}:6443 --token {{ hostvars[groups['master'][0]]['token'] | default(k3s_token) }} {{ extra_agent_args | default("") }} KillMode=process Delegate=yes # Having non-zero Limit*s causes performance problems due to accounting overhead diff --git a/roles/k3s/post/templates/metallb.crs.j2 b/roles/k3s/post/templates/metallb.crs.j2 index 6b9d93d..6b6b0af 100644 --- a/roles/k3s/post/templates/metallb.crs.j2 +++ b/roles/k3s/post/templates/metallb.crs.j2 @@ -5,7 +5,14 @@ metadata: namespace: metallb-system spec: addresses: - - {{ metal_lb_ip_range }} +{% if metal_lb_ip_range is string %} +{# metal_lb_ip_range was used in the legacy way: single string instead of a list #} +{# => transform to list with single element #} +{% set metal_lb_ip_range = [metal_lb_ip_range] %} +{% endif %} +{% for range in metal_lb_ip_range %} + - {{ range }} +{% endfor %} --- apiVersion: metallb.io/v1beta1 kind: L2Advertisement diff --git a/roles/prereq/tasks/main.yml b/roles/prereq/tasks/main.yml index a6ddeb5..dcab613 100644 --- a/roles/prereq/tasks/main.yml +++ b/roles/prereq/tasks/main.yml @@ -23,6 +23,13 @@ state: present reload: yes +- name: Enable IPv6 router advertisements + sysctl: + name: net.ipv6.conf.all.accept_ra + value: "2" + state: present + reload: yes + - name: Add br_netfilter to /etc/modules-load.d/ copy: content: "br_netfilter" diff --git a/roles/reset/tasks/main.yml b/roles/reset/tasks/main.yml index 19c3e9b..ae0388c 100644 --- a/roles/reset/tasks/main.yml +++ b/roles/reset/tasks/main.yml @@ -44,7 +44,6 @@ - /var/lib/kubelet - /var/lib/rancher/k3s - /var/lib/rancher/ - - /usr/local/bin/k3s - /var/lib/cni/ - name: Reload daemon_reload