本文目录导读:

基础Shell脚本(适用于Linux)
#!/bin/bash
# 磁盘使用率监控脚本
# 设置告警阈值(百分比)
THRESHOLD=80
# 获取磁盘使用率
df -h | grep -vE '^Filesystem|tmpfs|cdrom' | awk '{ print $5 " " $1 }' | while read output;
do
usage=$(echo $output | awk '{ print $1}' | cut -d'%' -f1)
partition=$(echo $output | awk '{ print $2 }')
if [ $usage -ge $THRESHOLD ]; then
echo "警告: 分区 $partition 使用率已达到 $usage%"
# 可以添加发送邮件的命令
# echo "磁盘空间不足: $partition ($usage%)" | mail -s "磁盘告警" admin@example.com
fi
done
增强版脚本(带日志和邮件告警)
#!/bin/bash
# 配置参数
THRESHOLD=80
LOG_FILE="/var/log/disk_monitor.log"
ADMIN_EMAIL="admin@example.com"
HOSTNAME=$(hostname)
# 日志函数
log_message() {
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" >> $LOG_FILE
}
# 发送邮件告警
send_alert() {
local subject="磁盘告警 - $HOSTNAME"
local body="$1"
echo "$body" | mail -s "$subject" $ADMIN_EMAIL
}
# 主监控逻辑
monitor_disk() {
log_message "开始磁盘使用率检查..."
df -h | grep -vE '^Filesystem|tmpfs|cdrom|loop' | while read line;
do
usage=$(echo $line | awk '{print $5}' | sed 's/%//g')
partition=$(echo $line | awk '{print $6}')
total=$(echo $line | awk '{print $2}')
used=$(echo $line | awk '{print $3}')
available=$(echo $line | awk '{print $4}')
if [ $usage -ge $THRESHOLD ]; then
alert_msg="警告: 分区 $partition 使用率已达 $usage%
总计: $total | 已用: $used | 可用: $available"
log_message "$alert_msg"
send_alert "$alert_msg"
else
log_message "正常: $partition 使用率 $usage%"
fi
done
}
# 执行监控
monitor_disk
Python脚本(更灵活)
#!/usr/bin/env python3
import os
import smtplib
import logging
from email.mime.text import MIMEText
from datetime import datetime
class DiskMonitor:
def __init__(self, threshold=80, log_file='/var/log/disk_monitor.log'):
self.threshold = threshold
self.log_file = log_file
self.setup_logging()
def setup_logging(self):
logging.basicConfig(
filename=self.log_file,
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
def get_disk_usage(self):
"""获取所有分区使用率"""
disk_info = []
try:
# 读取df输出
import subprocess
result = subprocess.run(['df', '-h'], capture_output=True, text=True)
for line in result.stdout.split('\n')[1:]:
if not line.strip():
continue
parts = line.split()
if len(parts) >= 6:
# 过滤掉临时文件系统
if parts[0].startswith('/dev/'):
disk_info.append({
'filesystem': parts[0],
'total': parts[1],
'used': parts[2],
'available': parts[3],
'usage_percent': int(parts[4].replace('%', '')),
'mount_point': parts[5]
})
except Exception as e:
logging.error(f"获取磁盘信息失败: {e}")
return disk_info
def check_and_alert(self):
"""检查磁盘使用率并告警"""
disks = self.get_disk_usage()
alerts = []
for disk in disks:
if disk['usage_percent'] >= self.threshold:
alert_msg = (
f"磁盘告警: {disk['mount_point']}\n"
f"使用率: {disk['usage_percent']}%\n"
f"总计: {disk['total']}\n"
f"已用: {disk['used']}\n"
f"可用: {disk['available']}"
)
alerts.append(alert_msg)
logging.warning(f"磁盘使用率过高: {disk['mount_point']} - {disk['usage_percent']}%")
else:
logging.info(f"正常: {disk['mount_point']} - {disk['usage_percent']}%")
return alerts
def send_email_alert(self, alerts, sender='monitor@example.com',
recipients=['admin@example.com']):
"""发送邮件告警"""
if not alerts:
return
body = "\n\n".join(alerts)
msg = MIMEText(body)
msg['Subject'] = f'[磁盘告警] {socket.gethostname()} - 磁盘使用率过高'
msg['From'] = sender
msg['To'] = ', '.join(recipients)
try:
with smtplib.SMTP('localhost') as server:
server.send_message(msg)
except Exception as e:
logging.error(f"发送邮件失败: {e}")
# 使用示例
if __name__ == "__main__":
import socket
monitor = DiskMonitor(threshold=80)
alerts = monitor.check_and_alert()
if alerts:
monitor.send_email_alert(alerts)
定时任务配置(Crontab)
# 每天8:00-18:00每2小时检查一次 0 */2 * * * /usr/local/bin/disk_monitor.sh # 或者每30分钟检查一次 */30 * * * * /usr/bin/python3 /usr/local/bin/disk_monitor.py # 每天0点执行深度检查 0 0 * * * /usr/local/bin/disk_deep_check.sh
实用监控命令汇总
# 查看磁盘使用率最高的分区
df -h | sort -k5 -r | head -5
# 实时查看磁盘IO
iostat -x 1 5
# 查找大文件(大于100MB)
find / -type f -size +100M -exec ls -lh {} \;
# 查看目录占用空间
du -sh /* 2>/dev/null | sort -rh | head -10
# 查看inode使用情况
df -i
最佳实践建议
-
设置多个阈值:
- 80%:警告级别
- 90%:严重警告
- 95%:紧急处理
-
自动化处理:
#!/bin/bash # 自动清理临时文件 CLEANUP_DIRS=("/tmp" "/var/tmp" "/var/log") for dir in "${CLEANUP_DIRS[@]}"; do find $dir -type f -mtime +7 -exec rm -f {} \; done -
集成到监控系统:
- 结合Zabbix、Prometheus等监控工具
- 配置Webhook通知到Slack、钉钉等
选择哪种方案取决于你的需求:
- 简单监控:使用第一个Shell脚本
- 需要日志和邮件:使用增强版Shell脚本
- 需要灵活配置:使用Python脚本
- 集成到现有系统:配合Zabbix等工具