Commit 7cc70ad6 authored by Nicholas Malcolm's avatar Nicholas Malcolm Committed by Derek Nola
Browse files

HA fixes and code tweaks



- Reboot playbook updated for HA testing
- Small adjustment made after testing HA with playbook
- Fix reset playbook failing in some cases

Signed-off-by: default avatarDerek Nola <derek.nola@suse.com>
parent cd725780
Loading
Loading
Loading
Loading
+12 −2
Original line number Diff line number Diff line
---
- name: Reboot cluster
  hosts: k3s_cluster
- name: Reboot cluster servers staggered
  hosts: server
  become: true
  gather_facts: true
  serial: 1
  tasks:
    - name: Reboot
      ansible.builtin.reboot:
        test_command: kubectl get nodes

- name: Reboot cluster agents staggered
  hosts: agent
  become: true
  gather_facts: true
  serial: 1
+1 −1
Original line number Diff line number Diff line
@@ -7,7 +7,7 @@ After=network-online.target
Type=notify
ExecStartPre=-/sbin/modprobe br_netfilter
ExecStartPre=-/sbin/modprobe overlay
ExecStart=/usr/local/bin/k3s agent --data-dir {{ k3s_server_location }} --server https://{{ api_endpoint }}:{{ api_port }} --token {{ hostvars[groups['server'][0]]['token'] }} {{ extra_agent_args | default("") }}
ExecStart=/usr/local/bin/k3s agent --data-dir {{ k3s_server_location }} --server https://{{ api_endpoint }}:{{ api_port }} --token {{ hostvars[groups['server'][0]]['token'] }} {{ extra_agent_args }}
KillMode=process
Delegate=yes
# Having non-zero Limit*s causes performance problems due to accounting overhead
+15 −23
Original line number Diff line number Diff line
---
- name: Clean previous runs of k3s-init
  ansible.builtin.command: systemctl reset-failed k3s-init
  failed_when: false
  changed_when: false

- name: Init first server node
  block:
    - name: Start temporary service with cluster-init
    - name: Start temporary service for HA cluster
      ansible.builtin.command:
        cmd: >
          systemd-run -p RestartSec=2 -p Restart=on-failure --unit=k3s-init k3s server
          --cluster-init --tls-san {{ api_endpoint }} --data-dir {{ k3s_server_location }}
          {{ extra_server_args | default('') }}
          {{ extra_server_args}}
        creates: "{{ systemd_dir }}/k3s-init.service"
      when: groups['server'] | length > 1

    - name: Start temporary service
    - name: Start temporary service for single server cluster
      ansible.builtin.command:
        cmd: >
          systemd-run -p RestartSec=2 -p Restart=on-failure --unit=k3s-init k3s server
           --tls-san {{ api_endpoint }} --data-dir {{ k3s_server_location }} { extra_server_args | default('') }}
           --tls-san {{ api_endpoint }} --data-dir {{ k3s_server_location }} {{ extra_server_args }}
        creates: "{{ systemd_dir }}/k3s-init.service"
      when: groups['server'] | length == 1

@@ -80,12 +75,12 @@
        flat: true
  when: ansible_hostname == groups['server'][0]

- name: Init additonal server nodes
- name: Init additonal server nodes if any
  ansible.builtin.command:
    cmd: >
      systemd-run -p RestartSec=2 -p Restart=on-failure --unit=k3s-init k3s server --token "{{ hostvars[groups['server'][0]]['token'] }}"
      --server https://{{ hostvars[groups['server'][0]]['ansible_host'] | default(groups['server'][0]) }}:{{ api_port }}
      --data-dir {{ k3s_server_location }} {{ extra_server_args | default('') }}
      systemd-run -p RestartSec=2 -p Restart=on-failure --unit=k3s-init k3s server
      --token "{{ hostvars[groups['server'][0]]['token'] }}" --server https://{{ api_endpoint }}:{{ api_port }}
      --tls-san {{ api_endpoint }} --data-dir {{ k3s_server_location }} {{ extra_server_args }}
    creates: "{{ systemd_dir }}/k3s-init.service"
  when: ansible_hostname != groups['server'][0]

@@ -107,13 +102,13 @@
      failed_when: false

- name: Copy K3s service file
  register: k3s_service
  template:
  ansible.builtin.template:
    src: "k3s-server.service.j2"
    dest: "{{ systemd_dir }}/k3s-server.service"
    owner: root
    group: root
    mode: 0644
  register: k3s_service

- name: Enable and check K3s service
  ansible.builtin.systemd:
@@ -122,14 +117,11 @@
    state: restarted
    enabled: true

- name: Create kubectl symlink
  ansible.builtin.file:
    src: /usr/local/bin/k3s
    dest: /usr/local/bin/kubectl
    state: link

- name: Create crictl symlink
- name: Create symlinks
  ansible.builtin.file:
    src: /usr/local/bin/k3s
    dest: /usr/local/bin/crictl
    dest: /usr/local/bin/{{ item }}
    state: link
  with_items:
    - kubectl
    - crictl
+1 −1
Original line number Diff line number Diff line
@@ -7,7 +7,7 @@ After=network-online.target
Type=notify
ExecStartPre=-/sbin/modprobe br_netfilter
ExecStartPre=-/sbin/modprobe overlay
ExecStart=/usr/local/bin/k3s server --data-dir {{ k3s_server_location }} {{ extra_server_args | default("") }}
ExecStart=/usr/local/bin/k3s server --data-dir {{ k3s_server_location }} {{ extra_server_args }}
KillMode=process
Delegate=yes
# Having non-zero Limit*s causes performance problems due to accounting overhead
+9 −2
Original line number Diff line number Diff line
---
- name: Clean previous failed runs of k3s-init
  # systemd builtin does not support reset-failed
  ansible.builtin.command: systemctl reset-failed k3s-init
  failed_when: false
  changed_when: false

- name: Disable services
  ansible.builtin.systemd:
    name: "{{ item }}"
@@ -6,12 +12,13 @@
    enabled: false
  failed_when: false
  with_items:
    - k3s-init
    - k3s-server
    - k3s-agent

- name: Kill container shim
  register: pkill_containerd_shim_runc
  ansible.builtin.command: pkill -9 -f "k3s/data/[^/]+/bin/containerd-shim-runc"
  register: pkill_containerd_shim_runc
  changed_when: "pkill_containerd_shim_runc.rc == 0"
  failed_when: false

@@ -25,7 +32,7 @@
  loop_control:
    loop_var: mounted_fs

- name: Remove service files, binaries and data
- name: Remove service files, binaries, and data
  ansible.builtin.file:
    name: "{{ item }}"
    state: absent