[TOC]

0x01 在 kubernetes 集群机器中的运维应用

Q:什么是ansible?
答:它是一个Linux系统上的”自动化运维工具”,类似一个”配置管理工具”;

Step 1.只在其中一台 Master 节点安装 Ansbile 批量运维工具(正常情况下应该有独立的主机来进行管理)

1
2
3
4
5
6
7
8
9
10
11
12
13
# (1)安装&配置:
sudo apt update && sudo apt install ansible
ansible --version
# ansible 2.9.6
# config file = /etc/ansible/ansible.cfg
# configured module search path = ['/root/.ansible/plugins/modules', '/usr/share/ansible/plugins/modules']
# ansible python module location = /usr/lib/python3/dist-packages/ansible
# executable location = /usr/bin/ansible
# python version = 3.8.5 (default, Jul 28 2020, 12:59:40) [GCC 9.3.0]

# 配置文件
# /etc/ansible/hosts
# /etc/ansible/ansible.cfg


Step 2.选择该Master节点作为ssh 密钥登陆其它主机的入口(注意:在正式环境中建议单独建立一台ssh公密钥认证主机,并且一定要保护好该密钥)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# 1.避免首次登录sshd服务时候需要输入yes,此时我们可以将主节点其中一台主机的ssh_config配置中的StrictHostKeyChecking修改为no
sed -i 's/^# StrictHostKeyChecking ask/StrictHostKeyChecking no/g' /etc/ssh/ssh_config

# 2.Master 节点公密钥生成(此处为了简单未设置密钥密码)
ssh-keygen -t ed25519 -C "weiyigeek-IT"
# Generating public/private ed25519 key pair.
ls /root/.ssh/
authorized_keys id_ed25519 id_ed25519.pub

# 3.用上面生成的公钥和密钥进行配置公钥登录(建议修改默认的22端口)
for ip in {223..226};do
ssh-copy-id -p 20211 -i ~/.ssh/id_ed25519.pub [email protected]${ip}
done
# **************WARNING**************
# Authorized only. All activity will be monitored and reported.
# id_ed25519 100% 464 307.9KB/s 00:00
# id_ed25519.pub 100% 102 150.8KB/s 00:00


Step 3.主机资源清单配置

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
sudo tee -a /etc/ansible/hosts <<'EOF'
# Dev & Test Kubernetes
[dtmaster]
weiyigeek-107 ansible_host=127.0.0.1
weiyigeek-108 ansible_host=192.168.1.108
weiyigeek-109 ansible_host=192.168.1.109

[dtnode]
weiyigeek-223 ansible_host=192.168.1.223
weiyigeek-224 ansible_host=192.168.1.224
weiyigeek-225 ansible_host=192.168.1.225
weiyigeek-226 ansible_host=192.168.1.226

[dtk8s:children]
dtmaster
dtnode

[dtk8s:vars]
ansible_port=20211
ansible_user=weiyigeek
# 在使用 sudo 命令时不需要输入密码
ansible_become=true
ansible_become_method=sudo
ansible_become_pass='weiyigeek#2020'

[all:vars]
ansible_python_interpreter=/usr/bin/python3
EOF


Step 4.结果验证:收到”pong”主机回复后,这意味着您已准备好在该服务器上运行Ansible命令和剧本。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
ansible all -m ping
# weiyigeek-224 | SUCCESS => {
# "changed": false,
# "ping": "pong"
# }
# weiyigeek-109 | SUCCESS => {
# "changed": false,
# "ping": "pong"
# }
# weiyigeek-107 | SUCCESS => {
# "changed": false,
# "ping": "pong"
# }
# weiyigeek-108 | SUCCESS => {
# "changed": false,
# "ping": "pong"
# }
# weiyigeek-223 | SUCCESS => {
# "changed": false,
# "ping": "pong"
# }
# weiyigeek-225 | SUCCESS => {
# "changed": false,
# "ping": "pong"
# }
# weiyigeek-226 | SUCCESS => {
# "changed": false,
# "ping": "pong"
# }


Step 5.命令执行测试

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
# 命令执行
> ansible all -a "whoami" # 缺省 -m shell
> ansible all -m shell -a "whoami" # 支持 管道符
# weiyigeek-224 | CHANGED | rc=0 >>
# root
# weiyigeek-223 | CHANGED | rc=0 >>
# root
# weiyigeek-107 | CHANGED | rc=0 >>
# root
# weiyigeek-109 | CHANGED | rc=0 >>
# root
# weiyigeek-108 | CHANGED | rc=0 >>
# root
# weiyigeek-225 | CHANGED | rc=0 >>
# root
# weiyigeek-226 | CHANGED | rc=0 >>
# root

# 软件更新查看 (在配置文件中配置了ansible_become则支持该sudo命令)
> ansible -a "sudo apt list --upgradable"


Step 6.K8s工作节点环境验证 ( environment verify )

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# 1.节点之中不可以有重复的主机名、MAC 地址或 product_uuid 
# IP 地址
> ansible dtk8s -a "ifconfig -a" | egrep "inet 192.168" -C 1
# ens160: flags=4163<UP,BROADCAST,RUNNING,MULTICAST> mtu 1500
# inet 192.168.1.107 netmask 255.255.255.0 broadcast 192.168.1.255
# inet6 fe80::250:56ff:fe8a:e8db prefixlen 64 scopeid 0x20<link>
# --
# ens160: flags=4163<UP,BROADCAST,RUNNING,MULTICAST> mtu 1500
# inet 192.168.1.108 netmask 255.255.255.0 broadcast 192.168.1.255
# inet6 fe80::250:56ff:fe8a:ca7d prefixlen 64 scopeid 0x20<link>
# --
# ens160: flags=4163<UP,BROADCAST,RUNNING,MULTICAST> mtu 1500
# inet 192.168.1.109 netmask 255.255.255.0 broadcast 192.168.1.255
# inet6 fe80::250:56ff:fe8a:c363 prefixlen 64 scopeid 0x20<link>
# --
# eno1: flags=4163<UP,BROADCAST,RUNNING,MULTICAST> mtu 1500
# inet 192.168.1.223 netmask 255.255.255.0 broadcast 192.168.1.255
# inet6 fe80::7a2b:cbff:fe2e:9d16 prefixlen 64 scopeid 0x20<link>
# --
# eno1: flags=4163<UP,BROADCAST,RUNNING,MULTICAST> mtu 1500
# inet 192.168.1.224 netmask 255.255.255.0 broadcast 192.168.1.255
# inet6 fe80::d6ae:52ff:fed2:b8ef prefixlen 64 scopeid 0x20<link>
# --
# eno1: flags=4163<UP,BROADCAST,RUNNING,MULTICAST> mtu 1500
# inet 192.168.1.225 netmask 255.255.255.0 broadcast 192.168.1.255
# inet6 fe80::862b:2bff:fe5c:8781 prefixlen 64 scopeid 0x20<link>
# --
# eno1: flags=4163<UP,BROADCAST,RUNNING,MULTICAST> mtu 1500
# inet 192.168.1.226 netmask 255.255.255.0 broadcast 192.168.1.255
# inet6 fe80::d6ae:52ff:fe82:8aa prefixlen 64 scopeid 0x20<link>


# 2.主机名称及其UUID
> ansible dtk8s -a "sudo cat /sys/class/dmi/id/product_uuid"
# [WARNING]: Consider using 'become', 'become_method', and 'become_user' rather than running sudo
# weiyigeek-224 | CHANGED | rc=0 >>
# 4c4c4544-0034-3710-8036-c2c04f473032
# weiyigeek-109 | CHANGED | rc=0 >>
# 33ce0a42-267a-701a-86ab-fede021587c3
# weiyigeek-108 | CHANGED | rc=0 >>
# f4240a42-e010-e17a-b8c7-ed047755351c
# weiyigeek-107 | CHANGED | rc=0 >>
# a9200a42-9a5c-3c64-3ad6-d7debcb0d793
# weiyigeek-223 | CHANGED | rc=0 >>
# 4c4c4544-0052-4810-8042-c8c04f353358
# weiyigeek-225 | CHANGED | rc=0 >>
# 4c4c4544-004e-4c10-8052-c7c04f5a3258
# weiyigeek-226 | CHANGED | rc=0 >>
# 4c4c4544-004c-3110-8050-b5c04f443358


#3.内核版本验证
# PS: 3.10.x 内核存在一些 Bugs导致运行的 Docker、Kubernetes 不稳定建议采用4.18 >= 以上的内核版本
~$ ansible dtk8s -m shell -a "uname -r"
weiyigeek-224 | CHANGED | rc=0 >>
5.4.0-42-generic
weiyigeek-107 | CHANGED | rc=0 >>
5.4.0-60-generic
weiyigeek-108 | CHANGED | rc=0 >>
5.4.0-60-generic
weiyigeek-109 | CHANGED | rc=0 >>
5.4.0-60-generic
weiyigeek-223 | CHANGED | rc=0 >>
5.4.0-42-generic
weiyigeek-226 | CHANGED | rc=0 >>
5.4.0-42-generic
weiyigeek-225 | CHANGED | rc=0 >>
5.4.0-42-generic


Step 7.Ansible 主机中文件拷贝到组里主机的指定目录包括初始化机器的init.sh脚本以及hosts文件

1
2
3
4
5
6
# 初始化以及依赖包下载
ansible dtnode -m copy -a 'src=~/init.sh dest=~/'
ansible dtnode -m shell -a "chmod a+x ./init.sh && sudo ./init.sh"

# 主机 hosts 绑定
ansible k8s -m copy -a 'src=~/hosts dest=/etc/hosts'


Step 8.拷贝导出的k8s集群的镜像到master、worker工作节点并导入到本地仓库中

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
ansible dtnode -m copy -a 'src=~/v1.19.6.tar dest=/home/weiyigeek/'
~$ ansible dtnode -m shell -a "docker load -i /home/weiyigeek/v1.19.6.tar"
weiyigeek-226 | CHANGED | rc=0 >>
....
weiyigeek-225 | CHANGED | rc=0 >>
....
weiyigeek-224 | CHANGED | rc=0 >>
.....
weiyigeek-223 | CHANGED | rc=0 >>
Loaded image: registry.cn-hangzhou.aliyuncs.com/google_containers/kube-scheduler:v1.19.6
Loaded image: calico/node:v3.17.1
Loaded image: registry.cn-hangzhou.aliyuncs.com/google_containers/etcd:3.4.13-0
Loaded image: calico/cni:v3.17.1
Loaded image: calico/kube-controllers:v3.17.1
Loaded image: registry.cn-hangzhou.aliyuncs.com/google_containers/coredns:1.7.0
Loaded image: registry.cn-hangzhou.aliyuncs.com/google_containers/pause:3.2
Loaded image: registry.cn-hangzhou.aliyuncs.com/google_containers/kube-proxy:v1.19.6
Loaded image: registry.cn-hangzhou.aliyuncs.com/google_containers/kube-apiserver:v1.19.6
Loaded image: registry.cn-hangzhou.aliyuncs.com/google_containers/kube-controller-manager:v1.19.6
Loaded image: calico/pod2daemon-flexvol:v3.17.1


Step 9.将master与负载机器加入到k8s集群之中

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# master-salve
ansible dtmaster -m shell -a "sudo kubeadm join weiyigeek-lb-vip.k8s:16443 --token 20w21w.httpweiyigeektop --discovery-token-ca-cert-hash sha256:7ea900ef214c98aef6d7daf1380320d0a43f666f2d4b6b7469077bd51790118e --control-plane --certificate-key 8327482265975b7a60f3549222f1093353ecaa148a3404cd10c605d4111566fc"

# worker
ansible dtnode -m shell -a "sudo kubeadm join weiyigeek-lb-vip.k8s:16443 --token 20w21w.httpweiyigeektop --discovery-token-ca-cert-hash sha256:7ea900ef214c98aef6d7daf1380320d0a43f666f2d4b6b7469077bd51790118e"
weiyigeek-226 | CHANGED | rc=0 >>
.... 同下
weiyigeek-223 | CHANGED | rc=0 >>
.... 同下
weiyigeek-224 | CHANGED | rc=0 >>
.... 同下
weiyigeek-225 | CHANGED | rc=0 >>
[preflight] Running pre-flight checks
[preflight] Reading configuration from the cluster...
[preflight] FYI: You can look at this config file with 'kubectl -n kube-system get cm kubeadm-config -oyaml'
[kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
[kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
[kubelet-start] Starting the kubelet
[kubelet-start] Waiting for the kubelet to perform the TLS Bootstrap...

# This node has joined the cluster: # 表示成功加入到集群
Run 'kubectl get nodes' on the control-plane to see this node join the cluster.


Step 10.查看集群node节点信息

1
2
3
4
5
6
7
8
9
~$ kubectl get nodes -o wide
NAME STATUS ROLES AGE VERSION INTERNAL-IP EXTERNAL-IP OS-IMAGE KERNEL-VERSION CONTAINER-RUNTIME
weiyigeek-107 Ready master 8h v1.19.6 192.168.1.107 <none> Ubuntu 20.04.1 LTS 5.4.0-60-generic docker://19.3.14
weiyigeek-108 Ready master 8h v1.19.6 192.168.1.108 <none> Ubuntu 20.04.1 LTS 5.4.0-60-generic docker://19.3.14
weiyigeek-109 Ready master 8h v1.19.6 192.168.1.109 <none> Ubuntu 20.04.1 LTS 5.4.0-60-generic docker://19.3.14
weiyigeek-223 Ready <none> 29s v1.19.6 192.168.1.223 <none> Ubuntu 20.04.1 LTS 5.4.0-42-generic docker://19.3.14
weiyigeek-224 Ready <none> 29s v1.19.6 192.168.1.224 <none> Ubuntu 20.04.1 LTS 5.4.0-42-generic docker://19.3.14
weiyigeek-225 Ready <none> 29s v1.19.6 192.168.1.225 <none> Ubuntu 20.04.1 LTS 5.4.0-42-generic docker://19.3.14
weiyigeek-226 Ready <none> 30s v1.19.6 192.168.1.226 <none> Ubuntu 20.04.1 LTS 5.4.0-42-generic docker://19.3.14