以下是一个自动化巡检脚本集的制作方案,包含常见系统检查项和可扩展框架,使用Python和Shell脚本实现:
#!/usr/bin/env python3
# system_inspector.py
import os
import sys
import subprocess
import psutil
import datetime
import socket
class SystemInspector:
def __init__(self):
self.report = []
self.log_file = "/var/log/system_inspector.log"
def log(self, message, level="INFO"):
timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
log_entry = f"[{timestamp}] [{level}] {message}"
self.report.append(log_entry)
def check_cpu(self):
try:
usage = psutil.cpu_percent(interval=1)
load_avg = os.getloadavg()
self.log(f"CPU Usage: {usage}% | Load Average: {load_avg}")
if usage > 90:
self.log("CPU Warning: Usage over 90%!", "WARNING")
except Exception as e:
self.log(f"CPU Check Error: {str(e)}", "ERROR")
def check_memory(self):
try:
mem = psutil.virtual_memory()
self.log(f"Memory Usage: {mem.percent}% | Available: {mem.available/1024/1024:.2f} MB")
if mem.percent > 90:
self.log("Memory Warning: Usage over 90%!", "WARNING")
except Exception as e:
self.log(f"Memory Check Error: {str(e)}", "ERROR")
def check_disk(self):
try:
disks = []
for part in psutil.disk_partitions():
usage = psutil.disk_usage(part.mountpoint)
disks.append(f"{part.mountpoint} {usage.percent}%")
if usage.percent > 90:
self.log(f"Disk Warning: {part.mountpoint} over 90%!", "WARNING")
self.log("Disk Usage: " + " | ".join(disks))
except Exception as e:
self.log(f"Disk Check Error: {str(e)}", "ERROR")
def check_services(self):
services = ["nginx", "mysql", "redis"]
try:
for service in services:
result = subprocess.run(
["systemctl", "is-active", service],
capture_output=True,
text=True
)
status = result.stdout.strip()
self.log(f"Service {service}: {status}")
if status != "active":
self.log(f"Service {service} is not active!", "WARNING")
except Exception as e:
self.log(f"Service Check Error: {str(e)}", "ERROR")
def generate_report(self):
with open(self.log_file, "a") as f:
f.write("\n".join(self.report) + "\n\n")
print(f"Inspection completed. Report saved to {self.log_file}")
if __name__ == "__main__":
inspector = SystemInspector()
inspector.log("=== Starting System Inspection ===")
inspector.check_cpu()
inspector.check_memory()
inspector.check_disk()
inspector.check_services()
inspector.generate_report()
配套Shell脚本(用于基础检查):
#!/bin/bash
# basic_check.sh
# 系统基本信息
echo "===== System Info ====="
echo "Hostname: $(hostname)"
echo "Uptime: $(uptime)"
echo "Kernel: $(uname -r)"
# 内存检查
echo -e "\n===== Memory Usage ====="
free -h
# 磁盘检查
echo -e "\n===== Disk Usage ====="
df -h | grep -v tmpfs
# 进程检查
echo -e "\n===== Top Processes ====="
ps -eo pid,ppid,cmd,%mem,%cpu --sort=-%cpu | head -n 6
# 网络检查
echo -e "\n===== Network Connections ====="
netstat -ant | awk '{print $6}' | grep -v '^