# Monit 系統監控完整教程
# Monit 簡介
Monit 是一個輕量級的系統監控和管理工具,特點:
- 無需數據庫:純文本配置,配置簡單
- 輕量級:占用資源少,適合各種規模系統
- 功能全面:監控進程、服務、端口、檔案、磁盤等
- 自動化:支持自動重啟失敗的服務、執行自定義腳本
- 告警通知:支持郵件、webhook 等多種通知方式
- Web 界面:可選的 M/Monit 提供圖形化管理界面
官網:https://mmonit.com/monit/
# 安裝
# Ubuntu/Debian
| |
| apt-get update && apt-get install -y monit |
| |
| |
| monit -v |
| |
| |
| systemctl start monit |
| systemctl enable monit |
# CentOS/RHEL
| yum install -y monit |
| |
| systemctl start monit |
| systemctl enable monit |
# Docker
| FROM ubuntu:20.04 |
| |
| RUN apt-get update && apt-get install -y monit |
| |
| COPY monitrc /etc/monit/monitrc |
| RUN chmod 600 /etc/monit/monitrc |
| |
| CMD ["monit", "-I"] |
# 基本命令
| |
| monit -t |
| |
| |
| monit reload |
| |
| |
| monit start |
| |
| |
| monit stop |
| |
| |
| monit status |
| |
| |
| monit validate |
| |
| |
| monit enable <service> |
| monit disable <service> |
| |
| |
| monit restart <service> |
# 配置文件詳解
# 配置文件位置
# 基本配置結構
| |
| set daemon 60 |
| with start delay 120 |
| |
| |
| set logfile syslog facility log_daemon |
| |
| |
| set eventqueue |
| basepath /var/monit |
| slots 1000 |
| |
| |
| set httpd port 2812 |
| use address localhost |
| allow localhost |
| allow admin:admin.password@ |
| |
| |
| set mailserver smtp.gmail.com port 587 |
| username "your-email@gmail.com" password "app-password" |
| using tlsv12 |
| |
| |
| set mail-format { |
| from: Monit Alert <monit@$HOST> |
| subject: [$SERVICE] $EVENT at $DATE |
| message: |
| Event: $EVENT |
| Service: $SERVICE |
| Date: $DATE |
| Host: $HOST |
| Action: $ACTION |
| Description: $DESCRIPTION |
| } |
| |
| |
| set alert admin@example.com { action: alert } |
# 實際應用場景
# 場景 1:監控 SSH 服務並自動重啟
| check process sshd with pidfile /var/run/sshd.pid |
| group networking |
| start program = "/bin/systemctl start ssh" |
| stop program = "/bin/systemctl stop ssh" |
| if does not exist for 2 cycles then restart |
| if cpu usage > 90% for 5 cycles then alert |
| if failed host 127.0.0.1 port 22 protocol SSH then alert |
| if 3 restarts with 5 cycles then set timeout |
| alert admin@example.com |
# 場景 2:監控 Web 服務器
| check process nginx with pidfile /var/run/nginx.pid |
| group webserver |
| start program = "/bin/systemctl start nginx" |
| stop program = "/bin/systemctl stop nginx" |
| depends on nginx_config |
| if does not exist for 1 cycles then restart |
| if failed host 127.0.0.1 port 80 with timeout 5 seconds for 2 cycles then restart |
| if failed host 127.0.0.1 port 443 with timeout 5 seconds for 2 cycles then restart |
| alert admin@example.com |
| |
| |
| check file nginx_config with path /etc/nginx/nginx.conf |
| if failed checksum and timestamp then alert |
| if failed permission 644 then alert |
| alert admin@example.com |
# 場景 3:監控系統資源
| check system localhost |
| if loadavg (1min) > 4.0 for 2 cycles then alert |
| if loadavg (5min) > 2.0 for 5 cycles then alert |
| if memory usage > 80% for 2 cycles then alert |
| if cpu usage (system) > 75% for 3 cycles then alert |
| if cpu usage (user) > 75% for 3 cycles then alert |
| alert admin@example.com |
# 場景 4:監控磁盤空間
| check filesystem root_fs with path / |
| if space usage > 80% for 2 cycles then alert |
| if inode usage > 85% then alert |
| alert admin@example.com |
| |
| check filesystem data_fs with path /data |
| if space usage > 70% for 3 cycles then alert |
| if changed timestamp then alert |
| alert admin@example.com |
# 場景 5:監控數據庫服務
| check process mysql with pidfile /var/run/mysqld/mysqld.pid |
| group database |
| start program = "/bin/systemctl start mysql" |
| stop program = "/bin/systemctl stop mysql" |
| if does not exist for 1 cycles then restart |
| if failed host 127.0.0.1 port 3306 then restart |
| if 3 restarts with 5 cycles then set timeout |
| alert admin@example.com { action: alert, repeat every 5 cycles } |
| |
| check file mysql_sock with path /var/run/mysqld/mysqld.sock |
| if failed permission 660 then alert |
| alert admin@example.com |
# 場景 6:監控 Docker 容器
| check process docker_app with pidfile /var/run/docker/app.pid |
| start program = "docker start app_container" |
| stop program = "docker stop app_container" |
| if does not exist for 1 cycles then restart |
| if failed host 127.0.0.1 port 8080 for 2 cycles then restart |
| alert admin@example.com |
# 場景 7:監控文件完整性
| check file application_file with path /opt/app/config.json |
| if changed checksum then exec "/opt/scripts/notify_change.sh" |
| if failed permission 640 then alert |
| if failed uid root then alert |
| if failed gid app then alert |
| alert admin@example.com |
# 場景 8:自定義監控腳本
| check program backup_status with path "/opt/scripts/backup_check.sh" |
| every 1 hour |
| if status != 0 for 1 cycles then alert |
| alert admin@example.com |
# 郵件告警設置
# Gmail 配置(推薦)
| set mailserver smtp.gmail.com port 587 |
| username "your-email@gmail.com" password "XXXX-XXXX-XXXX-XXXX" |
| using tlsv12 certificate verify disable |
設置步驟:
- 開啟 Gmail 兩步驗證
- 生成應用專用密碼(16 位)
- 將密碼填入上方配置
# Office 365 配置
| set mailserver smtp.office365.com port 587 |
| username "your-user@company.com" password "password" |
| using tlsv12 certificate verify disable |
# 自建郵件服務器
| set mailserver mail.example.com port 25 |
| using no certificate |
# Web 監控界面
# 啟用 Web 服務
| set httpd port 2812 |
| use address 0.0.0.0 |
| allow admin:admin.password@ |
| allow 192.168.1.0/24 |
| with ssl { |
| pem: /etc/monit/monit.pem |
| } |
# 生成 SSL 證書
| openssl req -new -x509 -days 365 -nodes \ |
| -out /etc/monit/monit.pem \ |
| -keyout /etc/monit/monit.pem |
| chmod 600 /etc/monit/monit.pem |
# 訪問 Web 界面
https://ip:2812
用戶名:admin
密碼:admin.password
# M/Monit 集中管理(可選)
如果要集中管理多個 Monit 實例:
| |
| set mmonit https://mmonit-ip:8443/collector |
| with timeout 30 seconds |
| and register without credentials |
| |
| |
| set mmonit https://admin:password@mmonit-ip:8443/collector |
| with timeout 30 seconds |
| and register |
# 故障排查
# 檢查配置語法
| monit -t |
| |
| |
| /etc/monit/monitrc:50: error: parse error |
| syntax OK |
# 查看詳細日誌
| |
| tail -f /var/log/syslog | grep monit |
| |
| |
| cat /var/log/monit.log |
# 調試模式運行
# 常見問題
| 問題 | 解決方案 |
|---|
| 郵件無法發送 | 檢查郵箱憑證、防火牆 25/587 端口 |
| Web 界面無法訪問 | 檢查端口是否監聽,防火牆規則 |
| 進程監控不準確 | 驗證 pidfile 路徑是否正確 |
| 頻繁告警 | 調整檢查周期和閾值 |
# 最佳實踐
- 分層監控:先監控關鍵服務,再逐步添加
- 合理調整閾值:避免頻繁誤告警
- 備份配置:定期備份
/etc/monit/ 目錄 - 測試告警:確保郵件、webhook 等通知正常
- 記錄日誌:啟用日誌方便故障排查
- 定期查看:檢查告警頻率,優化配置
- 自動恢復:為重要服務設置自動重啟
- 權限安全:配置文件設置 600 權限
# 總結
Monit 是一個強大而簡潔的監控工具,特別適合:
- 小到中型系統監控
- 對資源占用要求高的環境
- 需要快速部署的場景
- 結合自定義腳本的靈活監控
掌握 Monit 后,你可以構建一個高效的自動化運維體系。