Bootstrap

ubuntu k8s 1.31

ubuntu 系统 设置

更新源

apt-get upgrade

apt upgrade


apt update
apt-get update

释放root

sudo passwd root

密码

su -
密码

设置root可以登录

cd /etc/ssh/sshd_config.d && vi ssh.conf
PermitRootLogin yes
PasswordAuthentication yes

:wq 保存退出

systemctl restart ssh

系统初始化设置

关闭防火墙

ufw disable

systemctl disable --now ufw

修改 hostname

# 修改 hostname 
hostnamectl set-hostname k8s-01
# 查看修改结果
hostnamectl status
# 设置 hostname 解析
echo "127.0.0.1   $(hostname)" >> /etc/hosts
如果是多个集群,设置多次
echo "192.168.1.64 k8s-01" >> /etc/hosts
echo "192.168.1.65 k8s-02" >> /etc/hosts
echo "192.168.1.66 k8s-03" >> /etc/hosts

关闭 swap

# 禁用交换分区(在旧版的 k8s 中 kubelet 都要求关闭 swapoff ,但最新版的 kubelet 其实已经支持 swap ,因此这一步其实可以不做。)
swapoff -a
# 永久禁用,打开/etc/fstab注释掉swap那一行。  
sed -ri 's/.*swap.*/#&/' /etc/fstab

关闭 selinux

有selinux 可以关闭一下

修改limit

ulimit -SHn 65535
vi /etc/security/limits.conf

末尾添加如下内容

* soft nofile 655360
* hard nofile 131072
* soft nproc 655350
* hard nproc 655350
* soft memlock unlimited
* hard memlock unlimited

:wq 保存退出

时区设置

timedatectl set-timezone Asia/Shanghai

systemctl restart systemd-timesyncd.service


检查设置

timedatectl status
# 或者
date

开启流量转发

设置所需的 sysctl 参数,参数在重新启动后保持不变

修改内核参数(首先确认你的系统已经加载了 br_netfilter 模块,默认是没有该模块的,需要你先安装 bridge-utils)
apt-get install -y bridge-utils
modprobe br_netfilter
lsmod | grep br_netfilter
如果报错找不到包,需要先更新 apt-get update -y

cat > /etc/modules-load.d/k8s.conf <<EOF
overlay
br_netfilter
EOF

cat <<EOF | sudo tee /etc/sysctl.d/k8s.conf
net.bridge.bridge-nf-call-iptables  = 1
net.bridge.bridge-nf-call-ip6tables = 1
net.ipv4.ip_forward                 = 1
EOF

应用 sysctl 参数而不重新启动

sysctl --system


sysctl net.ipv4.ip_forward

lsmod | grep br_netfilter
lsmod | grep overlay

ipvs设置

安装ipvs

apt install -y  ipset ipvsadm 
cat > /etc/modules-load.d/ipvs.conf << EOF
modprobe -- ip_vs
modprobe -- ip_vs_rr
modprobe -- ip_vs_wrr
modprobe -- ip_vs_sh
modprobe -- nf_conntrack
EOF
sudo modprobe ip_vs

lsmod | grep ip_vs

lsmod | grep -e ip_vs -e nf_conntrack

安装容器运行时

安装基础环境


apt -y install apt-transport-https ca-certificates curl software-properties-common

curl -fsSL https://mirrors.aliyun.com/kubernetes/apt/doc/apt-key.gpg | sudo gpg --dearmor -o /etc/apt/keyrings/kubernetes-archive-keyring.gpg

安装 containerd

https://www.ghproxy.cn/https://github.com/containerd/containerd/releases/download/v1.7.24/containerd-1.7.24-linux-amd64.tar.gz

tar Cxzvf /usr/local containerd-1.7.24-linux-amd64.tar.gz

https://gh.llkk.cc/https://github.com/containerd/containerd/releases/download/v1.7.24/cri-containerd-1.7.24-linux-amd64.tar.gz

sudo tar -C / -xzf cri-containerd-1.7.24-linux-amd64.tar.gz
export PATH=$PATH:/usr/local/bin:/usr/local/sbin
source ~/.bashrc
mkdir /etc/containerd

配置 containerd

#1.重新初始化containerd的配置文件
containerd config default | tee /etc/containerd/config.toml 

修改SystemdCgroup

grep SystemdCgroup /etc/containerd/config.toml

#2 修改Cgroup的管理者为systemd组件
sed -ri 's#(SystemdCgroup = )false#\1true#' /etc/containerd/config.toml 

grep SystemdCgroup /etc/containerd/config.toml

修改修改pause的基础镜像版本

grep sandbox_image /etc/containerd/config.toml


#3 修改pause的基础镜像名称
sed -i 's#registry.k8s.io/pause:3.8#registry.aliyuncs.com/google_containers/pause:3.10#' /etc/containerd/config.toml


grep sandbox_image /etc/containerd/config.toml

启动containerd

systemctl daemon-reload
systemctl enable --now containerd
systemctl status containerd
systemctl restart containerd

安装kubelet kubeadm kubectl

添加阿里云k8s源

sudo apt-get update

sudo apt-get install -y apt-transport-https ca-certificates curl gpg

curl -fsSL https://mirrors.aliyun.com/kubernetes-new/core/stable/v1.31/deb/Release.key |gpg --dearmor -o /etc/apt/keyrings/kubernetes-apt-keyring.gpg


echo "deb [signed-by=/etc/apt/keyrings/kubernetes-apt-keyring.gpg] https://mirrors.aliyun.com/kubernetes-new/core/stable/v1.31/deb/ /" | tee /etc/apt/sources.list.d/kubernetes.list


apt-get update

#查看kubectl可用版本
apt-cache madison kubectl

高可用集群【可选】

## 安装keepalived haproxy
apt install -y keepalived haproxy

配置keepalived配置文件

配置keepalived配置文件

 vim /etc/keepalived/keepalived.conf

示例配置文件

MASTER

! /etc/keepalived/keepalived.conf
! Configuration File for keepalived
global_defs {
    router_id LVS_DEVEL
}
vrrp_script check_apiserver {
  script "/etc/keepalived/check_apiserver.sh"
  interval 3
  weight -2
  fall 10
  rise 2
}

vrrp_instance VI_1 {
    # 状态,主节点为MASTER,从节点为BACKUP
    state MASTER
    # 修改为你自己网卡的名字
    interface eth0
    virtual_router_id 51
    # MASTER当中使用101,BACKUP当中使用100
    priority 101
    authentication {
        auth_type PASS
        # 设置好你的密码,keepalived集群当中需要保证这个值的一致
        auth_pass k8s_test
    }
    virtual_ipaddress {
        # 设置你的虚拟IP地址
        192.168.1.67/24
    }
    track_script {
        check_apiserver
    }
}

BACKUP

! /etc/keepalived/keepalived.conf
! Configuration File for keepalived
global_defs {
    router_id LVS_DEVEL
}
vrrp_script check_apiserver {
  script "/etc/keepalived/check_apiserver.sh"
  interval 3
  weight -2
  fall 10
  rise 2
}

vrrp_instance VI_1 {
    # 状态,主节点为MASTER,从节点为BACKUP
    state MASTER
    # 修改为你自己网卡的名字
    interface eth0
    virtual_router_id 51
    # MASTER当中使用101,BACKUP当中使用100
    priority 100
    authentication {
        auth_type PASS
        # 设置好你的密码,keepalived集群当中需要保证这个值的一致
        auth_pass k8s_test
    }
    virtual_ipaddress {
        # 设置你的虚拟IP地址
        192.168.1.67/24
    }
    track_script {
        check_apiserver
    }
}

配置keepalived配置文件 脚本

编辑keepalived配置文件 脚本


  vim /etc/keepalived/check_apiserver.sh

示例配置文件


#!/bin/sh

errorExit() {
    echo "*** $*" 1>&2
    exit 1
}

curl -sfk --max-time 2 https://localhost:7443/healthz -o /dev/null || errorExit "Error GET https://localhost:7443/healthz"

给脚本执行权限

 chmod +x /etc/keepalived/check_apiserver.sh

编辑haproxy配置文件

cp    /etc/haproxy/haproxy.cfg /etc/haproxy/haproxy.cfg_bak
 vim    /etc/haproxy/haproxy.cfg

haproxy示例文件

# /etc/haproxy/haproxy.cfg
#---------------------------------------------------------------------
# Global settings
#---------------------------------------------------------------------
global
    log /dev/log local0
    log /dev/log local1 notice
    daemon

#---------------------------------------------------------------------
# common defaults that all the 'listen' and 'backend' sections will
# use if not designated in their block
#---------------------------------------------------------------------
defaults
    mode                    http
    log                     global
    option                  httplog
    option                  dontlognull
    option http-server-close
    option forwardfor       except 127.0.0.0/24
    option                  redispatch
    retries                 30
    timeout http-request    10s
    timeout queue           20s
    timeout connect         5s
    timeout client          35s
    timeout server          35s
    timeout http-keep-alive 10s
    timeout check           10s

#---------------------------------------------------------------------
# apiserver frontend which proxys to the control plane nodes
#---------------------------------------------------------------------
frontend apiserver
    # 注意负载均衡的端口要与keepalived里面的配置保持一致
    bind *:7443
    mode tcp
    option tcplog
    default_backend apiserver

#---------------------------------------------------------------------
# round robin balancing for apiserver
#---------------------------------------------------------------------
backend apiserver
    
    option httpchk GET /healthz
    http-check expect status 200
    mode tcp
    option ssl-hello-chk
    balance     roundrobin
    
    server k8s-master-01 192.168.1.64:6443 check
    server k8s-master-02 192.168.1.65:6443 check
    server k8s-master-03 192.168.1.66:6443 check

启动keepalived haproxy

 systemctl enable haproxy --now
systemctl enable keepalived --now


systemctl start haproxy
systemctl start keepalived 

systemctl status haproxy
systemctl status keepalived 

如果配置文件更改 ,快速重启keepalived,haproxy

systemctl restart keepalived 

systemctl restart haproxy

安装kubeadm kubectl kubelet

安装kubeadm kubectl kubelet

sudo apt update && \
sudo apt install -y kubelet kubectl kubeadm && \
sudo apt-mark hold kubelet kubeadm kubectl

查看kubeadm版本

kubeadm version

开机启动kubelet

systemctl enable kubelet && systemctl restart kubelet

初始化k8s

输出配置文件

kubeadm config print init-defaults  > kubeadm-config.yaml

修改配置文件

apiVersion: kubeadm.k8s.io/v1beta4
bootstrapTokens:
- groups:
  - system:bootstrappers:kubeadm:default-node-token
  token: abcdef.0123456789abcdef
  ttl: 24h0m0s
  usages:
  - signing
  - authentication
kind: InitConfiguration
localAPIEndpoint:
  # apisServer地址修改
  advertiseAddress: 192.168.1.81
  # 端口不变
  bindPort: 6443
nodeRegistration:
  criSocket: unix:///var/run/containerd/containerd.sock
  imagePullPolicy: IfNotPresent
  imagePullSerial: true
  #节点名称修改
  name: k8s-01
  taints: null
timeouts:
  controlPlaneComponentHealthCheck: 4m0s
  discovery: 5m0s
  etcdAPICall: 2m0s
  kubeletHealthCheck: 4m0s
  kubernetesAPICall: 1m0s
  tlsBootstrap: 5m0s
  upgradeManifests: 5m0s
---
apiServer: {}
apiVersion: kubeadm.k8s.io/v1beta4
##ca证书和颁发证书时间修改
caCertificateValidityPeriod: 876000h0m0s
certificateValidityPeriod: 876000h0m0s
certificatesDir: /etc/kubernetes/pki
clusterName: kubernetes
# 新增控制平台地址 单节点不需要这个 controlPlaneEndpoint
controlPlaneEndpoint: "192.168.1.67:7443"
controllerManager: {}
dns: {}
encryptionAlgorithm: RSA-2048
etcd:
  local:
    dataDir: /var/lib/etcd
#修改镜像拉取地址
imageRepository: registry.aliyuncs.com/google_containers
kind: ClusterConfiguration
kubernetesVersion: 1.32.0
networking:
  dnsDomain: cluster.local
  # 设置service网络地址范围
  serviceSubnet: 10.96.0.0/12
  # 设置pod网络地址范围
  podSubnet: 10.60.0.0/16 
proxy: {}
scheduler: {}
---
#https://kubernetes.io/zh-cn/docs/tasks/administer-cluster/kubeadm/configure-cgroup-driver/
kind: KubeletConfiguration
apiVersion: kubelet.config.k8s.io/v1beta1
cgroupDriver: systemd
---
apiVersion: kubeproxy.config.k8s.io/v1alpha1
kind: KubeProxyConfiguration
#开启ipvs模式
mode: ipvs 

拉取镜像 初始化集群

#拉取镜像
kubeadm config images pull --config kubeadm-config.yaml

#尝试运行 如果无错误,执行下面的初始化
kubeadm init --config kubeadm-config.yaml --dry-run

#初始化k8s集群 单节点不需要--upload-certs
kubeadm init --config kubeadm-config.yaml --upload-certs

初始化成功执行

  mkdir -p $HOME/.kube
  sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
  sudo chown $(id -u):$(id -g) $HOME/.kube/config

  export KUBECONFIG=/etc/kubernetes/admin.conf

其他master节点加入集群

 kubeadm join 192.168.1.67:7443 --token abcdef.0123456789abcdef \
	--discovery-token-ca-cert-hash sha256:006f34ef31937c42d822f3a635368d4861ff5bd8211538d629e0e3c282461349 \
	--control-plane --certificate-key 62eeb97fff1258c0f3eb79e46d68050ee6da66f6256eb74c5e41e98aa55aa6bb

work节点加入集群

kubeadm join 192.168.146.200:6443 --token abcdef.0123456789abcdef \
	--discovery-token-ca-cert-hash sha256:9d783c63bf9a20ce9634788b96905f369668d9f439e444e271907561455b8779 

安装网络插件

curl https://raw.githubusercontent.com/projectcalico/calico/v3.29.1/manifests/calico-typha.yaml -o calico.yaml

wget https://github.com/projectcalico/calico/releases/download/v3.29.1/release-v3.29.1.tgz

tar -xvf release-v3.29.1.tgz
cd release-v3.29.1/images
ctr -n k8s.io images import calico-cni.tar 
ctr -n k8s.io images import calico-flannel-migration-controller.tar 
ctr -n k8s.io images import calico-node.tar 
ctr -n k8s.io images import calico-typha.tar 
ctr -n k8s.io images import calico-dikastes.tar 
ctr -n k8s.io images import calico-kube-controllers.tar 
ctr -n k8s.io images import calico-pod2daemon.tar 


#修改其中的 pod网络范围  CALICO_IPV4POOL_CIDR  默认值应该是 192.168.0.0/16

kubectl apply -f  calico-typha.yaml


初始化失败处理

重置 kubeadm ,执行这个后需要敲 y 回车


kubeadm reset -f
iptables -F && iptables -t nat -F && iptables -t mangle -F && iptables -X

删除上次 init 生成的文件


sudo rm -rf /etc/cni/net.d 
sudo rm -rf /var/lib/etcd 
ipvsadm --clear
rm -rf $HOME/.kube/config
rm -rf /etc/kubernetes

证书续期

证书有效期查看

kubeadm  certs check-expiration   

证书续期

# 备份证书
cp -r /etc/kubernetes /etc/kubernetes.bak
# 重新生成证书
kubeadm certs renew all

# 删除服务让证书生效
# kubectl delete pod etcd-master -n kube-system
# kubectl delete pod kube-apiserver-master -n kube-system
# kubectl delete pod kube-controller-manager-master -n kube-system
# kubectl delete pod kube-scheduler-master -n kube-system

# 删除服务让证书生效
crictl pods --namespace kube-system --name 'kube-scheduler-*|kube-controller-manager-*|kube-apiserver-*|etcd-*' -q | xargs crictl rmp -f 

cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
chown $(id -u):$(id -g) $HOME/.kube/config


去掉污点

# 查看污点
kubectl describe nodes k8s-uat |grep Taints
# 去掉污点 示例
kubectl taint node k8s-01 node.kubernetes.io/not-ready:NoSchedule-
# 查看污点 示例
kubectl taint node k8s-01 node-role.kubernetes.io/control-plane:NoSchedule-
;