k8s业务pod端口检测和日志采集bash脚本
- 2025-04-03 12:07:08
- 脚本
- 20
- shevechco
在k8s集群中,我们偶尔要采集业务pod日志进行日志分析,这些日志都存在于各个组件的pod的指定位置,所以我们撰写了脚本去收集,这种情况也只是用于临时的日志分析和业务状态检测,一般集群中都有各种监控日志采集系统。
脚本内容如下:
#!/bin/bash
#set -x
current_dir=`readlink -f $(dirname $0)`
export PATH=$PATH:${current_dir}
namespace="aiops"
backup_dir="${current_dir}/log_$(date +'%Y%m%d')"
mkdir -p ${backup_dir}
function INFO(){
/bin/echo -e "\e[104m\e[97m[INFO]\e[49m\e[39m ${*}"
}
function WARNING(){
/bin/echo >&2 -e "\e[101m\e[97m[WARNING]\e[49m\e[39m ${*}"
}
function ERROR(){
/bin/echo >&2 -e "\e[101m\e[97m[ERROR]\e[49m\e[39m ${*}"
}
mongodb_ip=`kubectl get pod -n ${namespace} controller-mongodb-0 -o wide | grep -v "NOMINATED" | awk '{print $6}'`
gui_ip=`kubectl get svc -n ${namespace} web-gui -o wide | grep -v "CLUSTER-IP" | awk '{print $3}'`
log_server_ip=`kubectl get svc -n ${namespace} log-server -o wide | grep -v "CLUSTER-IP" | awk '{print $3}'`
#实时日志重定向到组件名.log的文件,当有两个参数时,第二个参数是POD容器中的其中一个容器名
function realtime_collection(){
if [ $# -eq 1 ]; then
kubectl logs -n ${namespace} $1 > ${backup_dir}/$1.log
INFO "---正在导出组件: $1 日志到 ${backup_dir}/$1.log!"
elif [ $# -eq 2 ]; then
kubectl logs -n ${namespace} $1 $2 > ${backup_dir}/$2.log
INFO "---正在导出组件: $1/$2 日志到 ${backup_dir}/$2.log!"
else
ERROR "参数错误...!!! "
exit 1
fi
}
#循环读取组件并将组件名作为参数传入realtime_collection
function loop_read_comp(){
comp_name=$1
kubectl get pod -n ${namespace} -o wide | awk '{print $1}' | grep ${comp_name} | while read line
do
realtime_collection ${line}
done
}
#pod组件状态检测
function pod_status_check(){
abnormal_comp_num=`kubectl get pod -n ${namespace} | grep -v "Running" | wc -l`
if [ ${abnormal_comp_num} -eq 1 ]; then
INFO "---${namespace}命名空间下所有组件状态正常! "
else
ERROR "---有状态异常的组件,请检查...!!!"
exit 1
fi
}
#license状态检测
function license_status_check(){
license_remaining=`curl -X GET "http://${controller_ip}:8080/licenses" -s`
if [ $? -eq 0 ]; then
num=0
echo ${license_remaining} | jq .data[].ExpireTime | while read line
do
license_id=`echo ${license_remaining} | jq .data[${num}].Id`
license_status=`echo ${license_remaining} | jq .data[${num}].Status`
INFO "---license 到期时间: ${line}, license_id: ${license_id}, 当前状态: ${license_status}"
((num++))
done
else
ERROR "---无法获取license信息,请检查...!!!"
fi
}
#组件端口检测
function comp_port_check(){
nc -z $2 $3
if [ $? -eq 0 ]; then
INFO "---$1组件$3端口正常!"
else
ERROR "---$1组件$3端口异常,请检查...!!!"
#exit 1
fi
}
INFO "采集节点清单..."
kubectl get node -o wide >${backup_dir}/node.log
INFO "采集${namespace}命名空间下POD清单..."
kubectl get pod -n ${namespace} -o wide >${backup_dir}/pod.log
INFO "检测所有POD组件状态..."
pod_status_check
INFO "检测所有组件端口..."
if ! command -v "nc" >/dev/null 2>&1; then
WARNING "未检测到nc工具,跳过端口检测!"
else
comp_port_check "mongodb" ${mongodb_ip} "27017"
comp_port_check "controller" ${controller_ip} "8080"
comp_port_check "gui" ${gui_ip} "80"
comp_port_check "etcd" ${controller_ha_ip} "2379"
comp_port_check "prometheus" ${gui_ip} "9090"
comp_port_check "log-server" ${log_server_ip} "3100"
fi
if ! command -v "jq" >/dev/null 2>&1; then
WARNING "未安装JQ工具, 跳过license、vdevice检测!"
else
INFO "检测license到期时间..."
license_status_check
fi
INFO " 开始采集etcd日志..."
loop_read_comp "etcd"
INFO "开始采集mongodb日志..."
loop_read_comp "controller-mongodb"
INFO "开始采集GUI日志..."
realtime_collection "web-gui-0"
INFO "开始采集log-server日志..."
loop_read_comp "log-server"
INFO "开始采集exporter日志..."
loop_read_comp "exporter"
INFO "所有日志已采集到${backup_dir}目录下...!"
脚本使用了jq,需要安装下这个工具。
内容版权声明:除非注明,否则皆为本站原创文章。