[root@master yaml]# kubectl get svc,pods -o wide
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE SELECTOR
service/db NodePort 10.105.233.179 <none> 3306:31306/TCP 12m io.kompose.service=db
service/webserver-tcp NodePort 10.98.187.153 <none> 8888:30080/TCP,10443:30443/TCP 13m app=web-server
在worker1不但pod里面无法访问mysql(上例中的db)CLUSTER-IP,在node上ping 都不通。
[root@worker1 vagrant]# ping 10.111.182.38
PING 10.111.182.38 (10.111.182.38) 56(84) bytes of data.
^C
--- 10.111.182.38 ping statistics ---
2 packets transmitted, 0 received, 100% packet loss, time 999ms
默认情况下,我们部署的kube-proxy通过查看日志,能看到如下信息:Flag proxy-mode="" unknown,assuming iptables proxy
[root@master vagrant]# kubectl get pod -n kube-system
NAME READY STATUS RESTARTS AGE
coredns-58cc8c89f4-9b548 1/1 Running 8 4d
coredns-58cc8c89f4-g59jz 1/1 Running 3 4d
etcd-master.localdomain 1/1 Running 8 4d
kube-apiserver-master.localdomain 1/1 Running 8 4d
kube-controller-manager-master.localdomain 1/1 Running 8 4d
kube-flannel-ds-ck228 1/1 Running 3 4d
kube-flannel-ds-cxb7k 1/1 Running 3 4d
kube-flannel-ds-jf9l6 1/1 Running 8 4d
kube-proxy-hv5q4 1/1 Running 0 3h15m
kube-proxy-xfww6 1/1 Running 0 3h15m
kube-proxy-xmjc2 1/1 Running 0 3h15m
kube-scheduler-master.localdomain 1/1 Running 8 4d
发现worker1上的错误:
[root@master vagrant]# kubectl logs kube-proxy-xmjc2 -n kube-system
W0128 06:01:38.386276 1 proxier.go:597] Failed to load kernel module ip_vs with modprobe. You can ignore this message when kube-proxy is running inside container without mounting /lib/modules
W0128 06:01:38.386968 1 proxier.go:597] Failed to load kernel module ip_vs_rr with modprobe. You can ignore this message when kube-proxy is running inside container without mounting /lib/modules
W0128 06:01:38.387558 1 proxier.go:597] Failed to load kernel module ip_vs_wrr with modprobe. You can ignore this message when kube-proxy is running inside container without mounting /lib/modules
W0128 06:01:38.388179 1 proxier.go:597] Failed to load kernel module ip_vs_sh with modprobe. You can ignore this message when kube-proxy is running inside container without mounting /lib/modules
W0128 06:01:38.391067 1 proxier.go:597] Failed to load kernel module ip_vs with modprobe. You can ignore this message when kube-proxy is running inside container without mounting /lib/modules
W0128 06:01:38.391651 1 proxier.go:597] Failed to load kernel module ip_vs_rr with modprobe. You can ignore this message when kube-proxy is running inside container without mounting /lib/modules
W0128 06:01:38.392229 1 proxier.go:597] Failed to load kernel module ip_vs_wrr with modprobe. You can ignore this message when kube-proxy is running inside container without mounting /lib/modules
W0128 06:01:38.392789 1 proxier.go:597] Failed to load kernel module ip_vs_sh with modprobe. You can ignore this message when kube-proxy is running inside container without mounting /lib/modules
E0128 06:01:38.393462 1 server_others.go:339] can't determine whether to use ipvs proxy, error: IPVS proxier will not be used because the following required kernel modules are not loaded: [ip_vs ip_vs_rr ip_vs_wrr ip_vs_sh]
I0128 06:01:38.399444 1 node.go:135] Successfully retrieved node IP: 192.168.56.10
I0128 06:01:38.399469 1 server_others.go:149] **Using iptables Proxier.**
I0128 06:01:38.399654 1 server.go:529] Version: v1.16.0
I0128 06:01:38.400358 1 conntrack.go:52] Setting nf_conntrack_max to 131072
I0128 06:01:38.401607 1 config.go:313] Starting service config controller
I0128 06:01:38.401623 1 shared_informer.go:197] Waiting for caches to sync for service config
I0128 06:01:38.402022 1 config.go:131] Starting endpoints config controller
I0128 06:01:38.402046 1 shared_informer.go:197] Waiting for caches to sync for endpoints config
I0128 06:01:38.503432 1 shared_informer.go:204] Caches are synced for endpoints config
I0128 06:01:38.503460 1 shared_informer.go:204] Caches are synced for service config
原因分析:
并没有正确使用ipvs模式
解决方法:
- 在master上修改kube-proxy的配置文件,添加mode 为ipvs。
[root@k8s-master ~]# kubectl edit cm kube-proxy -n kube-system
ipvs:
excludeCIDRs: null
minSyncPeriod: 0s
scheduler: ""
strictARP: false
syncPeriod: 30s
kind: KubeProxyConfiguration
metricsBindAddress: 127.0.0.1:10249
mode: "ipvs"
在master 和worker1上,所有不正确的worker上,执行操作:
cat > /etc/sysconfig/modules/ipvs.modules <<EOF
#!/bin/bash
modprobe -- ip_vs
modprobe -- ip_vs_rr
modprobe -- ip_vs_wrr
modprobe -- ip_vs_sh
modprobe -- nf_conntrack_ipv4
EOF
chmod 755 /etc/sysconfig/modules/ipvs.modules && bash /etc/sysconfig/modules/ipvs.modules && lsmod | grep -e ip_vs -e nf_conntrack_ipv4
删除原来的POD,会自动重启kube-proxy 的pod
[root@k8s-master ~]# kubectl get pod -n kube-system | grep kube-proxy |awk '{system("kubectl delete pod "$1" -n kube-system")}'
也可以单条删除
[root@master vagrant]# kubectl delete pod kube-proxy-xmjc2 -n kube-system
pod "kube-proxy-xmjc2" deleted
重新查看新生成的pod:
[root@master vagrant]# kubectl get pod -n kube-system -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
coredns-58cc8c89f4-9b548 1/1 Running 8 4d 10.244.0.30 master.localdomain <none> <none>
coredns-58cc8c89f4-g59jz 1/1 Running 3 4d 10.244.1.21 worker2 <none> <none>
etcd-master.localdomain 1/1 Running 8 4d 192.168.56.9 master.localdomain <none> <none>
kube-apiserver-master.localdomain 1/1 Running 8 4d 192.168.56.9 master.localdomain <none> <none>
kube-controller-manager-master.localdomain 1/1 Running 8 4d 192.168.56.9 master.localdomain <none> <none>
kube-flannel-ds-ck228 1/1 Running 3 4d 192.168.56.10 worker1.localdomain <none> <none>
kube-flannel-ds-cxb7k 1/1 Running 3 4d 192.168.56.11 worker2 <none> <none>
kube-flannel-ds-jf9l6 1/1 Running 8 4d 192.168.56.9 master.localdomain <none> <none>
kube-proxy-hv5q4 1/1 Running 0 3h17m 192.168.56.9 master.localdomain <none> <none>
kube-proxy-v76kd 1/1 Running 0 3s 192.168.56.10 worker1.localdomain <none> <none>
kube-proxy-xfww6 1/1 Running 0 3h17m 192.168.56.11 worker2 <none> <none>
kube-scheduler-master.localdomain 1/1 Running 8 4d 192.168.56.9 master.localdomain <none> <none>
已经正常:
[root@master vagrant]# kubectl logs kube-proxy-v76kd -n kube-system
I0128 09:18:58.379053 1 node.go:135] Successfully retrieved node IP: 192.168.56.10
I0128 09:18:58.379121 1 server_others.go:176] Using ipvs Proxier.
W0128 09:18:58.379269 1 proxier.go:420] IPVS scheduler not specified, use rr by default
I0128 09:18:58.379421 1 server.go:529] Version: v1.16.0
I0128 09:18:58.380057 1 conntrack.go:52] Setting nf_conntrack_max to 131072
I0128 09:18:58.380506 1 config.go:313] Starting service config controller
I0128 09:18:58.380526 1 shared_informer.go:197] Waiting for caches to sync for service config
I0128 09:18:58.380868 1 config.go:131] Starting endpoints config controller
I0128 09:18:58.380894 1 shared_informer.go:197] Waiting for caches to sync for endpoints config
I0128 09:18:58.483264 1 shared_informer.go:204] Caches are synced for service config
I0128 09:18:58.483655 1 shared_informer.go:204] Caches are synced for endpoints config
从worker1中测试:
[root@worker1 vagrant]# ping 10.98.187.153
PING 10.98.187.153 (10.98.187.153) 56(84) bytes of data.
64 bytes from 10.98.187.153: icmp_seq=1 ttl=64 time=0.092 ms
64 bytes from 10.98.187.153: icmp_seq=2 ttl=64 time=0.037 ms
64 bytes from 10.98.187.153: icmp_seq=3 ttl=64 time=0.024 ms
64 bytes from 10.98.187.153: icmp_seq=4 ttl=64 time=0.089 ms
64 bytes from 10.98.187.153: icmp_seq=5 ttl=64 time=0.097 ms
64 bytes from 10.98.187.153: icmp_seq=6 ttl=64 time=0.090 ms
64 bytes from 10.98.187.153: icmp_seq=7 ttl=64 time=0.090 ms
正常了。
NND。