跳至主要內容

Keepalived脑裂的解决和预防

知识库集成配置NginxNginx大约 2 分钟

Keepalived脑裂的解决和预防

#!/bin/bash
export PATH=$PATH:/usr/sbin
## 脑裂检查及控制:第三方仲裁机制,使用ping网关ip方式
## 循环次数
CHECK_TIME=3
## 虚拟ip
VIP=$1
## 网关ip(根据实际环境修改)
GATEWAY=192.168.1.8
## 本机网卡
eth=enp2s0
## 服务器和网关通信状态  0=失败,1=成功
keepalived_communication_status=1
## 是否获取vip状态 0=失败,1=成功
get_vip_status=1
## keepalived服务状态 0=未运行,1=运行中
keepalived_service_status=1
## 服务状态运行中字符串
active_status_str='active (running)'
echo "开始执行脚本 check_gateway.sh $VIP;时间:"
date
## 查看是否获取vip状态
function check_get_vip_status() {
  ## 通过ip add命令查看ip信息,搜索$VIP,统计行数,是否等于1
  if [ $(ip add | grep "$VIP" | wc -l) -eq 1 ]; then
    get_vip_status=1
  else
    get_vip_status=0
  fi
  return $get_vip_status
}
 
## 检查通信状态
function check_keepalived_status() {
  ## 检测$VIP 能否ping通$GATEWAY:使用$eth网络设备(-I $eth),发送数据包5(-c 5),源地址$VIP询问目的地[vip] $GATEWAY [网关地址 公用参考ip](-s $VIP $GATEWAY) 日志不保存 >/dev/null 2>&1
  /sbin/arping -I $eth -c 5 -s $VIP $GATEWAY >/dev/null 2>&1
  ## 判断上一步执行结果 等于0成功
  if [ $? = 0 ]; then
    keepalived_communication_status=1
  else
    keepalived_communication_status=0
  fi
  return $keepalived_communication_status
}
 
## 检查keepalived服务状态
function check_keepalived_service_status() {
  ## 通过systemctl status keepalived.service命令查看keepalived服务状态,搜索$active_status_str,统计行数,是否等于1
  if [ $(systemctl status keepalived.service | grep "$active_status_str" | wc -l) -eq 1 ]; then
    keepalived_service_status=1
  else
    keepalived_service_status=0
  fi
  return $keepalived_service_status
}
 
## 循环执行
## 判断$CHECK_TIME 不等于 0
while [ $CHECK_TIME -ne 0 ]; do
  ## 执行check_get_vip_status获取get_vip_status
  check_get_vip_status
  ## 未获取vip
  if [ $get_vip_status = 0 ]; then
    ## 修改CHECK_TIME值 结束循环
    CHECK_TIME=0
    ## 检查服务状态 执行check_keepalived_service_status获取keepalived_service_status
    if [ $keepalived_service_status = 0 ]; then
      echo "执行脚本 check_gateway.sh $VIP;启动keepalived服务"
      systemctl start keepalived.service
    fi
    echo "执行脚本 check_gateway.sh $VIP;执行结果:未获取vip,无需处理,脚本执行结束,时间:"
    date
    ## 正常运行程序并退出程序
    exit 0
  fi
  ## $CHECK_TIME  = $CHECK_TIME-1
  let "CHECK_TIME -= 1"
  ## 执行check_keepalived_status获取keepalived_communication_status
  check_keepalived_status
  ## 判断 $keepalived_communication_status = 1 通信成功
  if [ $keepalived_communication_status = 1 ]; then
    ## 修改CHECK_TIME值 结束循环
    CHECK_TIME=0
    ## 检查服务状态 执行check_keepalived_service_status获取keepalived_service_status
    check_keepalived_service_status
    if [ $keepalived_service_status = 0 ]; then
      echo "执行脚本 check_gateway.sh $VIP;启动keepalived服务"
      systemctl start keepalived.service
    fi
 
    echo "执行脚本 check_gateway.sh $VIP;GATEWAY=$GATEWAY,执行结果:通信正常,无需处理,脚本执行结束,时间:"
    date
    ## 正常运行程序并退出程序
    exit 0
  fi
  ## 通信失败&&连续3次
  if [ $keepalived_communication_status -eq 0 ] && [ $CHECK_TIME -eq 0 ]; then
    ## 关闭keepalived
    echo "执行脚本 check_gateway.sh $VIP;关闭keepalived服务"
    systemctl stop keepalived.service
    echo "执行脚本 check_gateway.sh $VIP;GATEWAY=$GATEWAY,执行结果:通信失败&&连续3次 关闭keepalived,脚本执行结束,时间:"
    date
    ## 非正常运行程序并退出程序
    exit 1
  fi
  sleep 3
done