This commit is contained in:
havelight-ee
2023-05-30 14:44:26 +09:00
parent 9a3174deef
commit 4c32a7239d
2598 changed files with 164595 additions and 487 deletions

View File

@@ -0,0 +1,23 @@
---
- hosts: kube_control_plane
tasks:
- name: Check the API servers are responding
uri:
url: "https://{{ access_ip | default(ansible_default_ipv4.address) }}:{{ kube_apiserver_port | default(6443) }}/version"
validate_certs: no
status_code: 200
register: apiserver_response
retries: 12
delay: 5
until: apiserver_response is success
- debug: # noqa unnamed-task
msg: "{{ apiserver_response.json }}"
- name: Check API servers version
assert:
that:
- apiserver_response.json.gitVersion == kube_version
fail_msg: "apiserver version different than expected {{ kube_version }}"
when: kube_version is defined

View File

@@ -0,0 +1,35 @@
---
- hosts: kube_control_plane[0]
tasks:
- name: Force binaries directory for Flatcar Container Linux by Kinvolk
set_fact:
bin_dir: "/opt/bin"
when: ansible_os_family in ["Flatcar", "Flatcar Container Linux by Kinvolk"]
- name: Force binaries directory for other hosts
set_fact:
bin_dir: "/usr/local/bin"
when: not ansible_os_family in ["Flatcar", "Flatcar Container Linux by Kinvolk"]
- import_role: # noqa unnamed-task
name: cluster-dump
- name: Check kubectl output
command: "{{ bin_dir }}/kubectl get nodes"
changed_when: false
register: get_nodes
no_log: true
- debug: # noqa unnamed-task
msg: "{{ get_nodes.stdout.split('\n') }}"
- name: Check that all nodes are running and ready
command: "{{ bin_dir }}/kubectl get nodes --no-headers -o yaml"
changed_when: false
register: get_nodes_yaml
until:
# Check that all nodes are Status=Ready
- '(get_nodes_yaml.stdout | from_yaml)["items"] | map(attribute = "status.conditions") | map("items2dict", key_name="type", value_name="status") | map(attribute="Ready") | list | min'
retries: 30
delay: 10

View File

@@ -0,0 +1,49 @@
---
- hosts: kube_control_plane[0]
tasks:
- name: Force binaries directory for Flatcar Container Linux by Kinvolk
set_fact:
bin_dir: "/opt/bin"
when: ansible_os_family in ["Flatcar", "Flatcar Container Linux by Kinvolk"]
- name: Force binaries directory for other hosts
set_fact:
bin_dir: "/usr/local/bin"
when: not ansible_os_family in ["Flatcar", "Flatcar Container Linux by Kinvolk"]
- import_role: # noqa unnamed-task
name: cluster-dump
- name: Check kubectl output
command: "{{ bin_dir }}/kubectl get pods --all-namespaces -owide"
changed_when: false
register: get_pods
no_log: true
- debug: # noqa unnamed-task
msg: "{{ get_pods.stdout.split('\n') }}"
- name: Check that all pods are running and ready
command: "{{ bin_dir }}/kubectl get pods --all-namespaces --no-headers -o yaml"
changed_when: false
register: run_pods_log
until:
# Check that all pods are running
- '(run_pods_log.stdout | from_yaml)["items"] | map(attribute = "status.phase") | unique | list == ["Running"]'
# Check that all pods are ready
- '(run_pods_log.stdout | from_yaml)["items"] | map(attribute = "status.containerStatuses") | map("map", attribute = "ready") | map("min") | min'
retries: 30
delay: 10
failed_when: false
no_log: true
- name: Check kubectl output
command: "{{ bin_dir }}/kubectl get pods --all-namespaces -owide"
changed_when: false
register: get_pods
no_log: true
- debug: # noqa unnamed-task
msg: "{{ get_pods.stdout.split('\n') }}"
failed_when: not run_pods_log is success

View File

@@ -0,0 +1,171 @@
---
- hosts: kube_control_plane[0]
vars:
test_image_repo: registry.k8s.io/e2e-test-images/agnhost
test_image_tag: "2.40"
tasks:
- name: Force binaries directory for Flatcar Container Linux by Kinvolk
set_fact:
bin_dir: "/opt/bin"
when: ansible_os_family in ["Flatcar", "Flatcar Container Linux by Kinvolk"]
- name: Force binaries directory for other hosts
set_fact:
bin_dir: "/usr/local/bin"
when: not ansible_os_family in ["Flatcar", "Flatcar Container Linux by Kinvolk"]
- name: Approve kubelet serving certificates
block:
- name: Get certificate signing requests
command: "{{ bin_dir }}/kubectl get csr -o name"
register: get_csr
changed_when: false
- name: Check there are csrs
assert:
that: get_csr.stdout_lines | length > 0
fail_msg: kubelet_rotate_server_certificates is {{ kubelet_rotate_server_certificates }} but no csr's found
- name: Approve certificates
command: "{{ bin_dir }}/kubectl certificate approve {{ get_csr.stdout_lines | join(' ') }}"
register: certificate_approve
when: get_csr.stdout_lines | length > 0
changed_when: certificate_approve.stdout
- debug: # noqa unnamed-task
msg: "{{ certificate_approve.stdout.split('\n') }}"
when: kubelet_rotate_server_certificates | default(false)
- name: Create test namespace
command: "{{ bin_dir }}/kubectl create namespace test"
changed_when: false
- name: Wait for API token of test namespace
shell: "set -o pipefail && {{ bin_dir }}/kubectl describe serviceaccounts default --namespace test | grep Tokens | awk '{print $2}'"
args:
executable: /bin/bash
changed_when: false
register: default_token
until: default_token.stdout | length > 0
retries: 5
delay: 5
- name: Run 2 agnhost pods in test ns
shell:
cmd: |
cat <<EOF | {{ bin_dir }}/kubectl apply -f -
apiVersion: v1
kind: Pod
metadata:
name: {{ item }}
namespace: test
spec:
containers:
- name: agnhost
image: {{ test_image_repo }}:{{ test_image_tag }}
command: ['/agnhost', 'netexec', '--http-port=8080']
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop: ['ALL']
runAsUser: 1000
runAsNonRoot: true
seccompProfile:
type: RuntimeDefault
EOF
changed_when: false
loop:
- agnhost1
- agnhost2
- import_role: # noqa unnamed-task
name: cluster-dump
- name: Check that all pods are running and ready
command: "{{ bin_dir }}/kubectl get pods --namespace test --no-headers -o yaml"
changed_when: false
register: run_pods_log
until:
# Check that all pods are running
- '(run_pods_log.stdout | from_yaml)["items"] | map(attribute = "status.phase") | unique | list == ["Running"]'
# Check that all pods are ready
- '(run_pods_log.stdout | from_yaml)["items"] | map(attribute = "status.containerStatuses") | map("map", attribute = "ready") | map("min") | min'
retries: 18
delay: 10
failed_when: false
no_log: true
- name: Get pod names
command: "{{ bin_dir }}/kubectl get pods -n test -o json"
changed_when: false
register: pods
no_log: true
- debug: # noqa unnamed-task
msg: "{{ pods.stdout.split('\n') }}"
failed_when: not run_pods_log is success
- name: Get hostnet pods
command: "{{ bin_dir }}/kubectl get pods -n test -o
jsonpath='{range .items[?(.spec.hostNetwork)]}{.metadata.name} {.status.podIP} {.status.containerStatuses} {end}'"
changed_when: false
register: hostnet_pods
ignore_errors: true # noqa ignore-errors
no_log: true
- name: Get running pods
command: "{{ bin_dir }}/kubectl get pods -n test -o
jsonpath='{range .items[?(.status.phase==\"Running\")]}{.metadata.name} {.status.podIP} {.status.containerStatuses} {end}'"
changed_when: False
register: running_pods
no_log: true
- name: Check kubectl output
command: "{{ bin_dir }}/kubectl get pods --all-namespaces -owide"
changed_when: False
register: get_pods
no_log: true
- debug: # noqa unnamed-task
msg: "{{ get_pods.stdout.split('\n') }}"
- name: Set networking facts
set_fact:
kube_pods_subnet: 10.233.64.0/18
pod_names: "{{ (pods.stdout | from_json)['items'] | map(attribute = 'metadata.name') | list }}"
pod_ips: "{{ (pods.stdout | from_json)['items'] | selectattr('status.podIP', 'defined') | map(attribute = 'status.podIP') | list }}"
pods_hostnet: |
{% set list = hostnet_pods.stdout.split(" ") %}
{{ list }}
pods_running: |
{% set list = running_pods.stdout.split(" ") %}
{{ list }}
- name: Check pods IP are in correct network
assert:
that: item | ipaddr(kube_pods_subnet)
when:
- not item in pods_hostnet
- item in pods_running
with_items: "{{ pod_ips }}"
- name: Curl between pods is working
command: "{{ bin_dir }}/kubectl -n test exec {{ item[0] }} -- curl {{ item[1] }}:8080"
when:
- not item[0] in pods_hostnet
- not item[1] in pods_hostnet
with_nested:
- "{{ pod_names }}"
- "{{ pod_ips }}"
- name: Curl between hostnet pods is working
command: "{{ bin_dir }}/kubectl -n test exec {{ item[0] }} -- curl {{ item[1] }}:8080"
when:
- item[0] in pods_hostnet
- item[1] in pods_hostnet
with_nested:
- "{{ pod_names }}"
- "{{ pod_ips }}"

View File

@@ -0,0 +1,235 @@
---
- hosts: kube_node
tasks:
- name: Test tunl0 routes
shell: "set -o pipefail && ! /sbin/ip ro | grep '/{{ calico_pool_blocksize }} | default(26) via' | grep -v tunl0"
args:
executable: /bin/bash
when:
- (calico_ipip_mode is defined and calico_ipip_mode != 'Never' or cloud_provider is defined)
- kube_network_plugin|default('calico') == 'calico'
- hosts: k8s_cluster
vars:
agent_report_interval: 10
netcheck_namespace: default
netchecker_port: 31081
tasks:
- name: Force binaries directory for Container Linux by CoreOS and Flatcar
set_fact:
bin_dir: "/opt/bin"
when: ansible_os_family in ["Flatcar", "Flatcar Container Linux by Kinvolk"]
- name: Force binaries directory on other hosts
set_fact:
bin_dir: "/usr/local/bin"
when: not ansible_os_family in ["Flatcar", "Flatcar Container Linux by Kinvolk"]
- import_role: # noqa unnamed-task
name: cluster-dump
- name: Wait for netchecker server
shell: "set -o pipefail && {{ bin_dir }}/kubectl get pods -o wide --namespace {{ netcheck_namespace }} | grep ^netchecker-server"
args:
executable: /bin/bash
register: ncs_pod
until: ncs_pod.stdout.find('Running') != -1
retries: 3
delay: 10
when: inventory_hostname == groups['kube_control_plane'][0]
- name: Wait for netchecker agents
shell: "set -o pipefail && {{ bin_dir }}/kubectl get pods -o wide --namespace {{ netcheck_namespace }} | grep '^netchecker-agent-.*Running'"
args:
executable: /bin/bash
register: nca_pod
until: nca_pod.stdout_lines|length >= groups['k8s_cluster']|intersect(ansible_play_hosts)|length * 2
retries: 3
delay: 10
failed_when: false
when: inventory_hostname == groups['kube_control_plane'][0]
- name: Get netchecker pods
command: "{{ bin_dir }}/kubectl -n {{ netcheck_namespace }} describe pod -l app={{ item }}"
run_once: true
delegate_to: "{{ groups['kube_control_plane'][0] }}"
no_log: false
with_items:
- netchecker-agent
- netchecker-agent-hostnet
when: not nca_pod is success
- debug: # noqa unnamed-task
var: nca_pod.stdout_lines
when: inventory_hostname == groups['kube_control_plane'][0]
- name: Get netchecker agents
uri:
url: "http://{{ ansible_default_ipv4.address }}:{{ netchecker_port }}/api/v1/agents/"
return_content: yes
run_once: true
delegate_to: "{{ groups['kube_control_plane'][0] }}"
register: agents
retries: 18
delay: "{{ agent_report_interval }}"
until: agents.content|length > 0 and
agents.content[0] == '{' and
agents.content|from_json|length >= groups['k8s_cluster']|intersect(ansible_play_hosts)|length * 2
failed_when: false
no_log: false
- name: Check netchecker status
uri:
url: "http://{{ ansible_default_ipv4.address }}:{{ netchecker_port }}/api/v1/connectivity_check"
status_code: 200
return_content: yes
delegate_to: "{{ groups['kube_control_plane'][0] }}"
run_once: true
register: connectivity_check
retries: 3
delay: "{{ agent_report_interval }}"
until: connectivity_check.content|length > 0 and
connectivity_check.content[0] == '{'
no_log: false
failed_when: false
when:
- agents.content != '{}'
- debug: # noqa unnamed-task
var: ncs_pod
run_once: true
- name: Get kube-proxy logs
command: "{{ bin_dir }}/kubectl -n kube-system logs -l k8s-app=kube-proxy"
no_log: false
when:
- inventory_hostname == groups['kube_control_plane'][0]
- not connectivity_check is success
- name: Get logs from other apps
command: "{{ bin_dir }}/kubectl -n kube-system logs -l k8s-app={{ item }} --all-containers"
when:
- inventory_hostname == groups['kube_control_plane'][0]
- not connectivity_check is success
no_log: false
with_items:
- kube-router
- flannel
- canal-node
- calico-node
- cilium
- name: Parse agents list
set_fact:
agents_check_result: "{{ agents.content | from_json }}"
delegate_to: "{{ groups['kube_control_plane'][0] }}"
run_once: true
when:
- agents is success
- agents.content is defined
- agents.content[0] == '{'
- debug: # noqa unnamed-task
var: agents_check_result
delegate_to: "{{ groups['kube_control_plane'][0] }}"
run_once: true
when:
- agents_check_result is defined
- name: Parse connectivity check
set_fact:
connectivity_check_result: "{{ connectivity_check.content | from_json }}"
delegate_to: "{{ groups['kube_control_plane'][0] }}"
run_once: true
when:
- connectivity_check is success
- connectivity_check.content is defined
- connectivity_check.content[0] == '{'
- debug: # noqa unnamed-task
var: connectivity_check_result
delegate_to: "{{ groups['kube_control_plane'][0] }}"
run_once: true
when:
- connectivity_check_result is defined
- name: Check connectivity with all netchecker agents
assert:
that:
- agents_check_result is defined
- connectivity_check_result is defined
- agents_check_result.keys() | length > 0
- not connectivity_check_result.Absent
- not connectivity_check_result.Outdated
msg: "Connectivity check to netchecker agents failed"
delegate_to: "{{ groups['kube_control_plane'][0] }}"
run_once: true
- name: Create macvlan network conf
# We cannot use only shell: below because Ansible will render the text
# with leading spaces, which means the shell will never find the string
# EOF at the beginning of a line. We can avoid Ansible's unhelpful
# heuristics by using the cmd parameter like this:
shell:
cmd: |
cat <<EOF | {{ bin_dir }}/kubectl create -f -
apiVersion: "k8s.cni.cncf.io/v1"
kind: NetworkAttachmentDefinition
metadata:
name: macvlan-conf
spec:
config: '{
"cniVersion": "0.4.0",
"type": "macvlan",
"master": "eth0",
"mode": "bridge",
"ipam": {
"type": "host-local",
"subnet": "192.168.1.0/24",
"rangeStart": "192.168.1.200",
"rangeEnd": "192.168.1.216",
"routes": [
{ "dst": "0.0.0.0/0" }
],
"gateway": "192.168.1.1"
}
}'
EOF
when:
- inventory_hostname == groups['kube_control_plane'][0]
- kube_network_plugin_multus|default(false)|bool
- name: Annotate pod with macvlan network
# We cannot use only shell: below because Ansible will render the text
# with leading spaces, which means the shell will never find the string
# EOF at the beginning of a line. We can avoid Ansible's unhelpful
# heuristics by using the cmd parameter like this:
shell:
cmd: |
cat <<EOF | {{ bin_dir }}/kubectl create -f -
apiVersion: v1
kind: Pod
metadata:
name: samplepod
annotations:
k8s.v1.cni.cncf.io/networks: macvlan-conf
spec:
containers:
- name: samplepod
command: ["/bin/bash", "-c", "sleep 2000000000000"]
image: dougbtv/centos-network
EOF
when:
- inventory_hostname == groups['kube_control_plane'][0]
- kube_network_plugin_multus|default(false)|bool
- name: Check secondary macvlan interface
command: "{{ bin_dir }}/kubectl exec samplepod -- ip addr show dev net1"
register: output
until: output.rc == 0
retries: 90
changed_when: false
when:
- inventory_hostname == groups['kube_control_plane'][0]
- kube_network_plugin_multus|default(false)|bool

View File

@@ -0,0 +1,36 @@
---
- hosts: kube_control_plane[0]
vars:
sonobuoy_version: 0.56.11
sonobuoy_arch: amd64
sonobuoy_parallel: 30
sonobuoy_path: /usr/local/bin/sonobuoy
sonobuoy_mode: Quick
tasks:
- name: Run sonobuoy
when:
- sonobuoy_enabled is defined
- sonobuoy_enabled
block:
- name: Download sonobuoy
get_url:
url: "https://github.com/heptio/sonobuoy/releases/download/v{{ sonobuoy_version }}/sonobuoy_{{ sonobuoy_version }}_linux_{{ sonobuoy_arch }}.tar.gz"
dest: /tmp/sonobuoy.tar.gz
- name: Extract sonobuoy
unarchive:
src: /tmp/sonobuoy.tar.gz
dest: /usr/local/bin/
copy: no
- name: Run sonobuoy
command: "{{ sonobuoy_path }} run --mode {{ sonobuoy_mode }} --e2e-parallel {{ sonobuoy_parallel }} --wait"
when: sonobuoy_enabled | default(false)
- name: Run sonobuoy retrieve
command: "{{ sonobuoy_path }} retrieve"
register: sonobuoy_retrieve
- name: Run inspect results
command: "{{ sonobuoy_path }} results {{ sonobuoy_retrieve.stdout }} --plugin e2e --mode report"

View File

@@ -0,0 +1,19 @@
---
- name: Generate dump folder
command: "{{ bin_dir }}/kubectl cluster-info dump --all-namespaces --output-directory /tmp/cluster-dump"
no_log: true
when: inventory_hostname in groups['kube_control_plane']
- name: Compress directory cluster-dump
archive:
path: /tmp/cluster-dump
dest: /tmp/cluster-dump.tgz
mode: 0644
when: inventory_hostname in groups['kube_control_plane']
- name: Fetch dump file
fetch:
src: /tmp/cluster-dump.tgz
dest: "{{ lookup('env', 'CI_PROJECT_DIR') }}/cluster-dump/{{ inventory_hostname }}.tgz"
flat: true
when: inventory_hostname in groups['kube_control_plane']