openclaw-sync/scripts/server-health-report.sh

218 lines
6.4 KiB
Bash
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env bash
set -Eeuo pipefail
# --- 配置区 ---
CHAT_ID="-1003834524994"
TOPIC_ID="4"
BOT_TOKEN="${OPENCLAW_TELEGRAM_BOT_TOKEN:-}"
TOP_N="${TOP_N:-5}"
# --------------
have() { command -v "$1" >/dev/null 2>&1; }
escape_html() {
sed -e 's/&/\&amp;/g' -e 's/</\&lt;/g' -e 's/>/\&gt;/g'
}
trim_text() {
local max="$1"
local text="$2"
if (( ${#text} > max )); then
printf '%s…' "${text:0:max-1}"
else
printf '%s' "$text"
fi
}
draw_bar() {
local percent="$1"
local width=10
local filled=$(( percent * width / 100 ))
(( filled > width )) && filled=$width
local empty=$(( width - filled ))
local bar=""
for ((i=0; i<filled; i++)); do bar+="■"; done
for ((i=0; i<empty; i++)); do bar+="□"; done
printf '%s' "$bar"
}
get_cpu_usage() {
local total_a idle_a total_b idle_b usage
read -r _ u1 n1 s1 i1 w1 irq1 sirq1 st1 _ < /proc/stat
total_a=$((u1+n1+s1+i1+w1+irq1+sirq1+st1))
idle_a=$((i1+w1))
sleep 1
read -r _ u2 n2 s2 i2 w2 irq2 sirq2 st2 _ < /proc/stat
total_b=$((u2+n2+s2+i2+w2+irq2+sirq2+st2))
idle_b=$((i2+w2))
usage=$(awk -v ta="$total_a" -v tb="$total_b" -v ia="$idle_a" -v ib="$idle_b" 'BEGIN { dt=tb-ta; di=ib-ia; if (dt<=0) print "0"; else printf "%.0f", (dt-di)*100/dt }')
printf '%s' "$usage"
}
get_mem_info() {
free -m | awk '/^Mem:/ {printf "%d %d %d\n", $3, $2, ($2>0 ? $3*100/$2 : 0)}'
}
get_swap_info() {
free -m | awk '/^Swap:/ {printf "%d %d %d\n", $3, $2, ($2>0 ? $3*100/$2 : 0)}'
}
get_root_disk_info() {
df -h / | awk 'NR==2 {gsub(/%/,"",$5); printf "%s %s %s\n", $3, $2, $5}'
}
get_root_inode_info() {
df -i / | awk 'NR==2 {gsub(/%/,"",$5); printf "%s %s %s\n", $3, $2, $5}'
}
get_loadavg() {
awk '{print $1" / "$2" / "$3}' /proc/loadavg
}
get_uptime() {
uptime -p | sed 's/^up //'
}
get_temp() {
if have sensors; then
sensors 2>/dev/null | awk '/Package id 0:|Tctl:|Tdie:|Composite:/ {gsub(/^\+/,"",$2); print $1" "$2; count++; if (count>=3) exit}' | paste -sd '; ' -
elif [[ -r /sys/class/thermal/thermal_zone0/temp ]]; then
awk '{printf "thermal_zone0 %.1f°C", $1/1000}' /sys/class/thermal/thermal_zone0/temp
else
printf 'N/A'
fi
}
get_top_partitions() {
df -hP -x tmpfs -x devtmpfs | awk 'NR==1 {next} {gsub(/%/,"",$5); print $5"\t"$6"\t"$3"/"$2}' | sort -rn | head -n 5 | while IFS=$'\t' read -r usep mount usedtotal; do
printf '%s%% %s (%s)\n' "$usep" "$mount" "$usedtotal"
done
}
get_top_cpu_processes() {
ps -eo pid,user,%cpu,%mem,etime,args --sort=-%cpu | awk 'NR>1 && NR<='"$((5+1))"' {
cmd="";
for (i=6; i<=NF; i++) cmd=cmd $i (i<NF?" ":"");
printf "%s\t%s\t%s\t%s\t%s\t%s\n", $1,$2,$3,$4,$5,cmd
}'
}
get_top_mem_processes() {
ps -eo pid,user,%cpu,%mem,etime,args --sort=-%mem | awk 'NR>1 && NR<='"$((5+1))"' {
cmd="";
for (i=6; i<=NF; i++) cmd=cmd $i (i<NF?" ":"");
printf "%s\t%s\t%s\t%s\t%s\t%s\n", $1,$2,$3,$4,$5,cmd
}'
}
format_proc_lines() {
local rows="$1"
while IFS=$'\t' read -r pid user pcpu pmem etime cmd; do
[[ -z "${pid:-}" ]] && continue
cmd=$(trim_text 42 "$cmd")
printf '%s\n' "$(printf '%-6s %-8s %5s%% %5s%% %-10s %s' "$pid" "$user" "$pcpu" "$pmem" "$etime" "$cmd")"
done <<< "$rows"
}
build_alerts() {
local cpu_p="$1" mem_p="$2" disk_p="$3" swap_p="$4" load1="$5" cores="$6"
local alerts=()
(( cpu_p >= 85 )) && alerts+=("CPU ${cpu_p}% 偏高")
(( mem_p >= 85 )) && alerts+=("内存 ${mem_p}% 偏高")
(( disk_p >= 90 )) && alerts+=("根分区 ${disk_p}% 偏高")
(( swap_p >= 30 )) && alerts+=("Swap ${swap_p}% 已使用")
awk -v l1="$load1" -v c="$cores" 'BEGIN {exit !(l1>c)}' && alerts+=("1 分钟负载 ${load1} 高于核心数 ${cores}")
if (( ${#alerts[@]} == 0 )); then
printf '✅ 未见明显异常'
else
printf '⚠️ %s' "$(IFS=''; echo "${alerts[*]}")"
fi
}
build_report() {
local cpu_p mem_used mem_total mem_p swap_used swap_total swap_p
local disk_used disk_total disk_p inode_used inode_total inode_p
local host up load temp status alert_summary cores load1
local top_cpu top_mem top_parts
cpu_p=$(get_cpu_usage)
read -r mem_used mem_total mem_p < <(get_mem_info)
read -r swap_used swap_total swap_p < <(get_swap_info)
read -r disk_used disk_total disk_p < <(get_root_disk_info)
read -r inode_used inode_total inode_p < <(get_root_inode_info)
load=$(get_loadavg)
load1=$(awk '{print $1}' /proc/loadavg)
up=$(get_uptime)
host=$(hostname)
temp=$(get_temp)
cores=$(nproc)
top_cpu=$(format_proc_lines "$(get_top_cpu_processes)")
top_mem=$(format_proc_lines "$(get_top_mem_processes)")
top_parts=$(get_top_partitions)
alert_summary=$(build_alerts "$cpu_p" "$mem_p" "$disk_p" "$swap_p" "$load1" "$cores")
if [[ "$alert_summary" == ✅* ]]; then
status='🟢 正常'
elif (( cpu_p >= 95 || mem_p >= 95 || disk_p >= 95 )); then
status='🔴 告警'
else
status='🟡 关注'
fi
temp=$(printf '%s' "$temp" | escape_html)
top_cpu=$(printf '%s' "$top_cpu" | escape_html)
top_mem=$(printf '%s' "$top_mem" | escape_html)
top_parts=$(printf '%s' "$top_parts" | escape_html)
alert_summary=$(printf '%s' "$alert_summary" | escape_html)
cat <<EOF
<b>🖥 服务器巡检 · ${host}</b>
状态:<b>${status}</b>
${alert_summary}
━━━━━━━━━━━━━━━━━━
<b>核心指标</b>
CPU <code>$(draw_bar "$cpu_p")</code> ${cpu_p}%
RAM <code>$(draw_bar "$mem_p")</code> ${mem_p}%
DSK <code>$(draw_bar "$disk_p")</code> ${disk_p}%
<b>概览</b>
• 运行时间:<code>${up}</code>
• 负载(1/5/15)<code>${load}</code>
• 内存:<code>${mem_used}MB / ${mem_total}MB</code>
• Swap<code>${swap_used}MB / ${swap_total}MB (${swap_p}%)</code>
• 根分区:<code>${disk_used} / ${disk_total}</code>
• Inode<code>${inode_used} / ${inode_total} (${inode_p}%)</code>
• 温度:<code>${temp}</code>
<b>最忙分区 Top 5</b>
<pre>${top_parts}</pre>
<b>CPU Top ${TOP_N}</b>
<pre>PID USER CPU MEM ELAPSED CMD
${top_cpu}</pre>
<b>MEM Top ${TOP_N}</b>
<pre>PID USER CPU MEM ELAPSED CMD
${top_mem}</pre>
🕒 <i>$(date '+%Y-%m-%d %H:%M:%S')</i>
EOF
}
send_telegram() {
local text="$1"
if [[ -z "$BOT_TOKEN" ]]; then
echo "Error: BOT_TOKEN is empty." >&2
return 1
fi
curl -fsS -X POST "https://api.telegram.org/bot${BOT_TOKEN}/sendMessage" \
-d "chat_id=${CHAT_ID}" \
-d "message_thread_id=${TOPIC_ID}" \
-d "parse_mode=HTML" \
--data-urlencode "text=${text}" >/dev/null
}
report=$(build_report)
send_telegram "$report"
printf '%s\n' "$report"