Bootstrap

kubeadm安装k8s

1、角色规划

# CentOS Linux release 7.9.2009 (Core) 
master01        10.202.30.22	# 4C8G
node01          10.202.30.30	# 4C8G
node02          10.202.30.31	# 4C8G

2、环境准备

2.1、配置hosts主机名解析

# vim /etc/hosts
10.202.30.22    master01
10.202.30.30    node01
10.202.30.31    node02

2.2、配置各节点之间免密通信

# 生成密钥 (直接回车)
ssh-keygen -t rsa -q -N ''
# 分发公钥到其他节点
for i in master01 node01 node02;do ssh-copy-id -i ~/.ssh/id_rsa.pub $i;done
# 分发 hosts 文件到其他节点
for i in node01 node02;do scp /etc/hosts root@$i:/etc/;done

2.3、配置时间同步

# 安装 chrony
yum install chrony -y
# 设置开机自启
systemctl start chronyd.service
systemctl enable chronyd.service
# 检查 chrony 状态
chronyc sources
# 验证时间同步
date

2.4、关闭防火墙和selinux

# 关闭selinux
## 临时关闭
setenforce 0
## 永久关闭
sed -i 's/enforcing/disabled/' /etc/selinux/config

# 关闭防火墙
systemctl disable --now firewalld
systemctl stop firewalld
systemctl disable firewalld

2.5、关闭swap分区和NetworkMannger

# 关闭swap分区
sed -ri 's/.*swap.*/#&/' /etc/fstab 
swapoff -a 

# 关闭NetworkManager 
systemctl stop NetworkManager 
systemctl disable NetworkManager

2.6、配置centos基础源和epel源

# 配置centos7阿里源
wget -O /etc/yum.repos.d/CentOS-Base.repo https://mirrors.aliyun.com/repo/Centos-7.repo
# 配置epel阿里源
wget -O /etc/yum.repos.d/epel.repo https://mirrors.aliyun.com/repo/epel-7.repo

2.7、配置k8s阿里源

# 配置k8s阿里源(全部节点)(旧版最高支持到1.28))
cat >>/etc/yum.repos.d/kubernetes.repo<< EOF
[kubernetes]
name=Kubernetes
baseurl=https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64/
enabled=1
gpgcheck=0
repo_gpgcheck=0
gpgkey=https://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg https://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg
EOF

# 配置k8s阿里源(全部节点)(新版)
cat <<EOF | tee /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseurl=https://mirrors.aliyun.com/kubernetes-new/core/stable/v1.30/rpm/
enabled=1
gpgcheck=1
gpgkey=https://mirrors.aliyun.com/kubernetes-new/core/stable/v1.30/rpm/repodata/repomd.xml.key
EOF
## ps: 由于官网未开放同步方式, 可能会有索引gpg检查失败的情况, 这时请用 
## yum install -y --nogpgcheck kubelet kubeadm kubectl 安装

# 分发k8s源到其他节点
for i in node01 node02;do scp /etc/yum.repos.d/kubernetes.repo root@$i:/etc/yum.repos.d/;done

2.8、内核升级

# 载入公钥
rpm --import https://www.elrepo.org/RPM-GPG-KEY-elrepo.org
# 安装ELRepo
rpm -Uvh http://www.elrepo.org/elrepo-release-7.0-3.el7.elrepo.noarch.rpm
# 载入elrepo-kernel元数据
yum --disablerepo=\* --enablerepo=elrepo-kernel repolist
# 查看可用的rpm包
yum --disablerepo=\* --enablerepo=elrepo-kernel list kernel*
# 安装长期支持版本的kernel
yum --disablerepo=\* --enablerepo=elrepo-kernel install -y kernel-lt.x86_64
# 删除旧版本工具包
yum remove kernel-tools-libs.x86_64 kernel-tools.x86_64 -y
# 安装新版本工具包
yum --disablerepo=\* --enablerepo=elrepo-kernel install -y kernel-lt-tools.x86_64

# 查看默认启动顺序
awk -F\' '$1=="menuentry " {print $2}' /etc/grub2.cfg  
CentOS Linux (4.4.183-1.el7.elrepo.x86_64) 7 (Core)  
CentOS Linux (3.10.0-327.10.1.el7.x86_64) 7 (Core)  
CentOS Linux (0-rescue-c52097a1078c403da03b8eddeac5080b) 7 (Core)
# 默认启动的顺序是从0开始,新内核是从头插入(目前位置在0,而4.4.4的是在1),所以需要选择0
grub2-set-default 0  
# 重启并检查
reboot  
# 可安装IPVS和docker后再重启

3、安装配置IPVS和docker

3.1、安装IPVS

# 安装 IPVS
yum install -y conntrack-tools ipvsadm ipset conntrack libseccomp
# 加载 IPVS 模块
cat > /etc/sysconfig/modules/ipvs.modules <<EOF
#!/bin/bash
ipvs_modules="ip_vs ip_vs_lc ip_vs_wlc ip_vs_rr ip_vs_wrr ip_vs_lblc ip_vs_lblcr
ip_vs_dh ip_vs_sh ip_vs_fo ip_vs_nq ip_vs_sed ip_vs_ftp nf_conntrack"
for kernel_module in \${ipvs_modules}; do
/sbin/modinfo -F filename \${kernel_module} > /dev/null 2>&1
if [ $? -eq 0 ]; then
/sbin/modprobe \${kernel_module}
fi
done
EOF
chmod 755 /etc/sysconfig/modules/ipvs.modules && bash /etc/sysconfig/modules/ipvs.modules && lsmod | grep ip_vs
# 增加k8s转发配置并使其生效。(所有节点)
## /etc/sysctl.d/k8s.conf
cat > /etc/sysctl.d/k8s.conf << EOF
net.ipv4.ip_forward = 1
net.bridge.bridge-nf-call-iptables = 1
net.bridge.bridge-nf-call-ip6tables = 1
fs.may_detach_mounts = 1
vm.overcommit_memory=1
vm.panic_on_oom=0
fs.inotify.max_user_watches=89100
fs.file-max=52706963
fs.nr_open=52706963
net.ipv4.tcp_keepalive_time = 600
net.ipv4.tcp.keepaliv.probes = 3
net.ipv4.tcp_keepalive_intvl = 15
net.ipv4.tcp.max_tw_buckets = 36000
net.ipv4.tcp_tw_reuse = 1
net.ipv4.tcp.max_orphans = 327680
net.ipv4.tcp_orphan_retries = 3
net.ipv4.tcp_syncookies = 1
net.ipv4.tcp_max_syn_backlog = 16384
net.ipv4.ip_conntrack_max = 65536
net.ipv4.tcp_max_syn_backlog = 16384
net.ipv4.top_timestamps = 0
net.core.somaxconn = 16384
EOF

# 立即生效
sysctl --system

3.2、安装docker

# 安装阿里源docker-ce
# step 1: 安装必要的一些系统工具
sudo yum install -y yum-utils device-mapper-persistent-data lvm2
# Step 2: 添加软件源信息
sudo yum-config-manager --add-repo https://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo
# Step 3
sudo sed -i 's+download.docker.com+mirrors.aliyun.com/docker-ce+' /etc/yum.repos.d/docker-ce.repo
# Step 4: 更新并安装Docker-CE
sudo yum makecache fast
sudo yum -y install docker-ce
# Step 4: 开启Docker服务
systemctl start docker && systemctl enable docker && sudo systemctl status docker
# Step 56: 配置阿里镜像加速(登录阿里云->容器镜像服务->镜像工具)
# 需要再添加 "exec-opts": ["native.cgroupdriver=systemd"]
sudo mkdir -p /etc/docker
sudo tee /etc/docker/daemon.json <<-'EOF'
{
  "registry-mirrors": ["https://d6mtathr.mirror.aliyuncs.com"],
  "exec-opts": ["native.cgroupdriver=systemd"]
}
EOF
# 重启docker服务
sudo systemctl daemon-reload && sudo systemctl restart docker && sudo systemctl status docker

# 修改containerd配置(所有节点)
# #备份源文件
cp /etc/containerd/config.toml /etc/containerd/config.toml.bak
containerd config default > /etc/containerd/config.toml
# vim /etc/containerd/config.toml
1、找到SystemdCgroup = false这一行,将false改为true。
2、找到包含sandbox_image这一行,将地址改为 registry.cn-guangzhou.aliyuncs.com/my_aliyund/pause:v3.9
# 修改后重启containerd
sudo systemctl restart containerd && sudo systemctl status containerd && sudo systemctl enable containerd
###### 内核升级需要重启
#重启并检查

4、安装k8s

4.1、kubeadm初始化

# 所有节点安装:
yum install -y kubelet-1.30.2 kubeadm-1.30.2 kubectl-1.30.2
# 所有节点设置kubelet开机自启:
systemctl enable kubelet.service
# 打印初始化参数:
kubeadm config print init-defaults
# 打印集群安装所需的镜像以及版本:(k8s官方镜像不好拉取,需要提前下载所需的镜像版本)
kubeadm config images list

# 在master01节点执行
# 拉取k8s所需镜像到本地
docker pull registry.cn-guangzhou.aliyuncs.com/my_aliyund/kube-apiserver:v1.30.2
docker pull registry.cn-guangzhou.aliyuncs.com/my_aliyund/kube-controller-manager:v1.30.2
docker pull registry.cn-guangzhou.aliyuncs.com/my_aliyund/kube-scheduler:v1.30.2
docker pull registry.cn-guangzhou.aliyuncs.com/my_aliyund/kube-proxy:v1.30.2
docker pull registry.cn-guangzhou.aliyuncs.com/my_aliyund/pause:3.9
docker pull registry.cn-guangzhou.aliyuncs.com/my_aliyund/etcd:3.5.12-0
docker pull registry.cn-guangzhou.aliyuncs.com/my_aliyund/coredns:v1.11.1

# 可以重新打tag,或者替换镜像源(下面为替换镜像源)
# kubeadm初始化
kubeadm init \
--apiserver-advertise-address=10.202.30.22 \
--control-plane-endpoint=master01 \
--image-repository registry.cn-guangzhou.aliyuncs.com/my_aliyund \
--kubernetes-version v1.30.2  \
--service-cidr=10.10.0.0/12  \
--pod-network-cidr=10.254.0.0/16

# 生成相关token,(用于增加节点)
kubeadm join master01:6443 --token m9dhz2.u0annuoi45g4azer \
    --discovery-token-ca-cert-hash sha256:7f98d15fa8a053931dec6e062e97e00f3cdb66c77bd041ca429ce089f0fc8cac
# 如果忘记保存 这个命令可以重新获取
kubeadm token create --print-join-command

4.2、kubeadm init 错误

[root@master01 ~]# kubeadm init \
> --apiserver-advertise-address=10.202.99.128 \
> --control-plane-endpoint=master01 \
> --image-repository registry.cn-guangzhou.aliyuncs.com/my_aliyund \
> --kubernetes-version v1.30.2  \
> --service-cidr=10.10.0.0/12  \
> --pod-network-cidr=10.254.0.0/16
[init] Using Kubernetes version: v1.30.2
[preflight] Running pre-flight checks
error execution phase preflight: [preflight] Some fatal errors occurred:
        [ERROR CRI]: container runtime is not running: output: time="2024-07-18T03:14:29-04:00" level=fatal msg="validate service connection: validate CRI v1 runtime API for endpoint \"unix:///var/run/containerd/containerd.sock\": rpc error: code = Unimplemented desc = unknown service runtime.v1.RuntimeService"
, error: exit status 1
[preflight] If you know what you are doing, you can make a check non-fatal with `--ignore-preflight-errors=...`
To see the stack trace of this error execute with --v=5 or higher
#####################
# 解决方法
# vim /etc/containerd/config.toml
# SystemdCgroup 参数配置为 true
SystemdCgroup = true
# 所有的 runtime_type 参数配置为 io.containerd.runtime.v1.linux
runtime_type = "io.containerd.runtime.v1.linux"
# 重启 containerd
systemctl restart containerd

## 解决初始化containerd错误的文章
https://zhuanlan.zhihu.com/p/618551600

4.3、初始化成功信息

Your Kubernetes control-plane has initialized successfully!

To start using your cluster, you need to run the following as a regular user:

  mkdir -p $HOME/.kube
  sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
  sudo chown $(id -u):$(id -g) $HOME/.kube/config

Alternatively, if you are the root user, you can run:

  export KUBECONFIG=/etc/kubernetes/admin.conf

You should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
  https://kubernetes.io/docs/concepts/cluster-administration/addons/

You can now join any number of control-plane nodes by copying certificate authorities
and service account keys on each node and then running the following as root:

  kubeadm join master01:6443 --token rdn0pu.r6kxla7vzf4bcftt \
        --discovery-token-ca-cert-hash sha256:611744ae7304f5c18cf46ca3bba42d5b6f5aa671173249fa1a17088ab37308ee \
        --control-plane

Then you can join any number of worker nodes by running the following on each as root:

kubeadm join master01:6443 --token rdn0pu.r6kxla7vzf4bcftt \
        --discovery-token-ca-cert-hash sha256:611744ae7304f5c18cf46ca3bba42d5b6f5aa671173249fa1a17088ab37308ee

4.4、加入节点

# 如发生错误
# E0401 02:25:24.207258    6245 memcache.go:265] couldn't get current server API group list: Get "http://localhost:8080/api?timeout=32s": dial tcp [::1]:8080: connect: connection refused
# 临时解决方案
export KUBECONFIG=/etc/kubernetes/admin.conf
# 长期解决方案
mkdir ~/.kube
cp /etc/kubernetes/admin.conf ~/.kube/config

# 到需要加入集群的节点执行
# 加入master节点
  kubeadm join master01:6443 --token rdn0pu.r6kxla7vzf4bcftt \
        --discovery-token-ca-cert-hash sha256:611744ae7304f5c18cf46ca3bba42d5b6f5aa671173249fa1a17088ab37308ee \
        --control-plane
# 加入node节点
kubeadm join master01:6443 --token rdn0pu.r6kxla7vzf4bcftt \
        --discovery-token-ca-cert-hash sha256:611744ae7304f5c18cf46ca3bba42d5b6f5aa671173249fa1a17088ab37308ee

4.5、安装网络插件(flannel)

# # 查看集群状态
kubectl get nodes
# 状态为 notready
# 检查(coredns是启动不了的状态,导致节点不可用,原因:因为网络插件没有安装)
# 拉取flannel镜像
docker pull registry.cn-guangzhou.aliyuncs.com/my_aliyund/flannel-cni-plugin:v1.1.2
docker pull registry.cn-guangzhou.aliyuncs.com/my_aliyund/flannel:v0.21.5
kubectl apply -f /data/kube-flannel.yaml
# 也可以安装其他网络插件
4.5.1、flannel.yaml
---
kind: Namespace
apiVersion: v1
metadata:
  name: kube-flannel
  labels:
    pod-security.kubernetes.io/enforce: privileged
---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: flannel
rules:
- apiGroups:
  - ""
  resources:
  - pods
  verbs:
  - get
- apiGroups:
  - ""
  resources:
  - nodes
  verbs:
  - list
  - watch
- apiGroups:
  - ""
  resources:
  - nodes/status
  verbs:
  - patch
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: flannel
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: flannel
subjects:
- kind: ServiceAccount
  name: flannel
  namespace: kube-flannel
---
apiVersion: v1
kind: ServiceAccount
metadata:
  name: flannel
  namespace: kube-flannel
---
kind: ConfigMap
apiVersion: v1
metadata:
  name: kube-flannel-cfg
  namespace: kube-flannel
  labels:
    tier: node
    app: flannel
data:
  cni-conf.json: |
    {
      "name": "cbr0",
      "cniVersion": "0.3.1",
      "plugins": [
        {
          "type": "flannel",
          "delegate": {
            "hairpinMode": true,
            "isDefaultGateway": true
          }
        },
        {
          "type": "portmap",
          "capabilities": {
            "portMappings": true
          }
        }
      ]
    }
  net-conf.json: |
    {
      "Network": "10.244.0.0/16",
      "Backend": {
        "Type": "vxlan"
      }
    }
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
  name: kube-flannel-ds
  namespace: kube-flannel
  labels:
    tier: node
    app: flannel
spec:
  selector:
    matchLabels:
      app: flannel
  template:
    metadata:
      labels:
        tier: node
        app: flannel
    spec:
      affinity:
        nodeAffinity:
          requiredDuringSchedulingIgnoredDuringExecution:
            nodeSelectorTerms:
            - matchExpressions:
              - key: kubernetes.io/os
                operator: In
                values:
                - linux
      hostNetwork: true
      priorityClassName: system-node-critical
      tolerations:
      - operator: Exists
        effect: NoSchedule
      serviceAccountName: flannel
      initContainers:
      - name: install-cni-plugin
       #image: flannelcni/flannel-cni-plugin:v1.1.0 for ppc64le and mips64le (dockerhub limitations may apply)
        image: registry.cn-guangzhou.aliyuncs.com/my_aliyund/flannel-cni-plugin:v1.1.2
        command:
        - cp
        args:
        - -f
        - /flannel
        - /opt/cni/bin/flannel
        volumeMounts:
        - name: cni-plugin
          mountPath: /opt/cni/bin
      - name: install-cni
       #image: flannelcni/flannel:v0.20.1 for ppc64le and mips64le (dockerhub limitations may apply)
        image: registry.cn-guangzhou.aliyuncs.com/my_aliyund/flannel:v0.21.5
        command:
        - cp
        args:
        - -f
        - /etc/kube-flannel/cni-conf.json
        - /etc/cni/net.d/10-flannel.conflist
        volumeMounts:
        - name: cni
          mountPath: /etc/cni/net.d
        - name: flannel-cfg
          mountPath: /etc/kube-flannel/
      containers:
      - name: kube-flannel
       #image: flannelcni/flannel:v0.20.1 for ppc64le and mips64le (dockerhub limitations may apply)
        image: registry.cn-guangzhou.aliyuncs.com/my_aliyund/flannel:v0.21.5
        command:
        - /opt/bin/flanneld
        args:
        - --ip-masq
        - --kube-subnet-mgr
        resources:
          requests:
            cpu: "100m"
            memory: "50Mi"
          limits:
            cpu: "100m"
            memory: "50Mi"
        securityContext:
          privileged: false
          capabilities:
            add: ["NET_ADMIN", "NET_RAW"]
        env:
        - name: POD_NAME
          valueFrom:
            fieldRef:
              fieldPath: metadata.name
        - name: POD_NAMESPACE
          valueFrom:
            fieldRef:
              fieldPath: metadata.namespace
        - name: EVENT_QUEUE_DEPTH
          value: "5000"
        volumeMounts:
        - name: run
          mountPath: /run/flannel
        - name: flannel-cfg
          mountPath: /etc/kube-flannel/
        - name: xtables-lock
          mountPath: /run/xtables.lock
      volumes:
      - name: run
        hostPath:
          path: /run/flannel
      - name: cni-plugin
        hostPath:
          path: /opt/cni/bin
      - name: cni
        hostPath:
          path: /etc/cni/net.d
      - name: flannel-cfg
        configMap:
          name: kube-flannel-cfg
      - name: xtables-lock
        hostPath:
          path: /run/xtables.lock
          type: FileOrCreate

4.6、配置bash-completion 命令补全

yum install -y bash-completion

source /usr/share/bash-completion/bash_completion
source <(kubectl completion bash)
kubectl completion bash > ~/.kube/completion.bash.inc
source '/root/.kube/completion.bash.inc'
source $HOME/.bash_profile

4.7、查看集群状态

# 查看集群状态
kubectl cluster-info
# 查看pod状态
kubectl get pod -A
;