diff --git a/scripts/server-health-report.sh b/scripts/server-health-report.sh new file mode 100755 index 0000000..df674d3 --- /dev/null +++ b/scripts/server-health-report.sh @@ -0,0 +1,156 @@ +#!/usr/bin/env bash +set -Eeuo pipefail + +CHAT_ID="-1003834524994" +TOPIC_ID="4" +BOT_TOKEN="${OPENCLAW_TELEGRAM_BOT_TOKEN:-}" +MAX_LEN=3800 + +have() { command -v "$1" >/dev/null 2>&1; } + +human_uptime() { + uptime -p 2>/dev/null || true +} + +load_avg() { + awk '{print $1, $2, $3}' /proc/loadavg 2>/dev/null || echo "N/A" +} + +cpu_usage_percent() { + local a b idle_a total_a idle_b total_b usage + read -r _ u1 n1 s1 i1 w1 irq1 sirq1 st1 _ < /proc/stat + total_a=$((u1+n1+s1+i1+w1+irq1+sirq1+st1)) + idle_a=$((i1+w1)) + sleep 1 + read -r _ u2 n2 s2 i2 w2 irq2 sirq2 st2 _ < /proc/stat + total_b=$((u2+n2+s2+i2+w2+irq2+sirq2+st2)) + idle_b=$((i2+w2)) + usage=$(awk -v ta="$total_a" -v tb="$total_b" -v ia="$idle_a" -v ib="$idle_b" 'BEGIN { dt=tb-ta; di=ib-ia; if (dt<=0) print "0.0"; else printf "%.1f", (dt-di)*100/dt }') + echo "$usage" +} + +per_core_overview() { + awk '/^cpu[0-9]+ / {print $1}' /proc/stat 2>/dev/null | paste -sd ', ' - | sed 's/^/核心: /' || echo "核心: N/A" +} + +mem_summary() { + free -m 2>/dev/null | awk ' + /^Mem:/ {printf "内存: %d/%d MB (%.0f%%)\n", $3, $2, ($3*100/$2)} + /^Swap:/ {if ($2==0) printf "Swap: 0/0 MB (未启用)\n"; else printf "Swap: %d/%d MB (%.0f%%)\n", $3, $2, ($3*100/$2)} + ' +} + +disk_summary() { + df -h / 2>/dev/null | awk 'NR==2 {printf "磁盘 /: %s/%s (%s), 可用 %s\n", $3, $2, $5, $4}' + df -ih / 2>/dev/null | awk 'NR==2 {printf "Inode /: %s/%s (%s), 可用 %s\n", $3, $2, $5, $4}' +} + +hostname_line() { + printf "主机: %s\n" "$(hostname 2>/dev/null || echo unknown)" +} + +date_line() { + printf "时间: %s\n" "$(date '+%F %T %Z' 2>/dev/null || echo unknown)" +} + +top_cpu() { + ps -eo pid,user,%cpu,%mem,comm --sort=-%cpu 2>/dev/null | head -n 11 | tail -n +2 | awk '{printf "- PID=%s USER=%s CPU=%s%% MEM=%s%% CMD=%s\n", $1,$2,$3,$4,$5}' +} + +top_mem() { + ps -eo pid,user,%cpu,%mem,comm --sort=-%mem 2>/dev/null | head -n 11 | tail -n +2 | awk '{printf "- PID=%s USER=%s CPU=%s%% MEM=%s%% CMD=%s\n", $1,$2,$3,$4,$5}' +} + +temps_block() { + local out="" + if have sensors; then + out=$(sensors 2>/dev/null | sed '/^$/d' | head -n 20 || true) + fi + if [[ -z "$out" ]]; then + out=$(for z in /sys/class/thermal/thermal_zone*; do + [[ -r "$z/temp" ]] || continue + t=$(cat "$z/type" 2>/dev/null || echo "unknown") + v=$(cat "$z/temp" 2>/dev/null || true) + [[ -n "$v" ]] || continue + awk -v type="$t" -v raw="$v" 'BEGIN { printf "- %s: %.1f°C\n", type, raw/1000 }' + done 2>/dev/null | head -n 10) + fi + if [[ -n "$out" ]]; then + printf "%s\n" "$out" + else + echo "- 未获取到温度/传感器数据" + fi +} + +fans_block() { + local found=0 + for f in /sys/class/hwmon/hwmon*; do + [[ -d "$f" ]] || continue + local name + name=$(cat "$f/name" 2>/dev/null || echo "hwmon") + for i in "$f"/fan*_input; do + [[ -r "$i" ]] || continue + found=1 + printf -- "- %s %s=%s RPM\n" "$name" "$(basename "$i" .*)" "$(cat "$i" 2>/dev/null)" + done + done + [[ $found -eq 1 ]] || echo "- 未获取到风扇转速数据" +} + +risk_block() { + local cpu mem load suspicious="无明显异常" + cpu=$(cpu_usage_percent) + mem=$(free -m 2>/dev/null | awk '/^Mem:/ {printf "%.0f", ($3*100/$2)}' || echo 0) + load=$(awk '{print $1}' /proc/loadavg 2>/dev/null || echo 0) + + if awk -v v="$cpu" 'BEGIN{exit !(v>=85)}'; then suspicious="CPU 占用偏高,需关注高占用进程"; fi + if awk -v v="$mem" 'BEGIN{exit !(v>=85)}'; then suspicious="内存占用偏高,需关注内存泄漏或异常进程"; fi + + echo "- 当前判断: $suspicious" + echo "- 建议关注: 持续高 CPU / 高内存、陌生命令名、异常常驻进程、占用异常升高的解释不清进程" +} + +build_report() { + cat <&2; return 1; } + curl -fsS -X POST "https://api.telegram.org/bot${BOT_TOKEN}/sendMessage" \ + -d "chat_id=${CHAT_ID}" \ + -d "message_thread_id=${TOPIC_ID}" \ + --data-urlencode "text=${text}" \ + -d "disable_web_page_preview=true" +} + +main() { + local report + report=$(build_report) + printf '%s\n' "$report" > /home/sinlee/.openclaw/workspace/tmp/server-health-latest.txt + if ((${#report} > MAX_LEN)); then + report="${report:0:MAX_LEN}\n...\n(内容过长,已截断;完整内容见本机 tmp/server-health-latest.txt)" + fi + send_telegram "$report" +} + +main "$@"