# Monit 系統監控完整教程

# Monit 簡介

Monit 是一個輕量級的系統監控和管理工具,特點:

  • 無需數據庫:純文本配置,配置簡單
  • 輕量級:占用資源少,適合各種規模系統
  • 功能全面:監控進程、服務、端口、檔案、磁盤等
  • 自動化:支持自動重啟失敗的服務、執行自定義腳本
  • 告警通知:支持郵件、webhook 等多種通知方式
  • Web 界面:可選的 M/Monit 提供圖形化管理界面

官網:https://mmonit.com/monit/

# 安裝

# Ubuntu/Debian

# 安裝
apt-get update && apt-get install -y monit
# 驗證安裝
monit -v
# 啟動服務
systemctl start monit
systemctl enable monit

# CentOS/RHEL

yum install -y monit
systemctl start monit
systemctl enable monit

# Docker

FROM ubuntu:20.04
RUN apt-get update && apt-get install -y monit
COPY monitrc /etc/monit/monitrc
RUN chmod 600 /etc/monit/monitrc
CMD ["monit", "-I"]

# 基本命令

# 檢測配置文件語法
monit -t
# 重載配置
monit reload
# 啟動 monit
monit start
# 停止 monit  
monit stop
# 查看狀態
monit status
# 強制檢查所有服務
monit validate
# 啟用 / 禁用特定檢查
monit enable <service>
monit disable <service>
# 重啟特定服務
monit restart <service>

# 配置文件詳解

# 配置文件位置

/etc/monit/monitrc

# 基本配置結構

# 設置監控周期(秒)
set daemon 60
    with start delay 120  # 啟動後等待 120 秒再開始監控
# 設置日誌
set logfile syslog facility log_daemon
# 設置事件隊列
set eventqueue
    basepath /var/monit
    slots 1000
# Web 控制界面(可選)
set httpd port 2812
    use address localhost
    allow localhost
    allow admin:admin.password@
# 郵件告警
set mailserver smtp.gmail.com port 587
    username "your-email@gmail.com" password "app-password"
    using tlsv12
# 郵件格式
set mail-format {
    from: Monit Alert <monit@$HOST>
    subject: [$SERVICE] $EVENT at $DATE
    message:
    Event: $EVENT
    Service: $SERVICE
    Date: $DATE
    Host: $HOST
    Action: $ACTION
    Description: $DESCRIPTION
}
# 告警接收昏
set alert admin@example.com { action: alert }

# 實際應用場景

# 場景 1:監控 SSH 服務並自動重啟

check process sshd with pidfile /var/run/sshd.pid
    group networking
    start program = "/bin/systemctl start ssh"
    stop program = "/bin/systemctl stop ssh"
    if does not exist for 2 cycles then restart
    if cpu usage > 90% for 5 cycles then alert
    if failed host 127.0.0.1 port 22 protocol SSH then alert
    if 3 restarts with 5 cycles then set timeout
    alert admin@example.com

# 場景 2:監控 Web 服務器

check process nginx with pidfile /var/run/nginx.pid
    group webserver
    start program = "/bin/systemctl start nginx"
    stop program = "/bin/systemctl stop nginx"
    depends on nginx_config
    if does not exist for 1 cycles then restart
    if failed host 127.0.0.1 port 80 with timeout 5 seconds for 2 cycles then restart
    if failed host 127.0.0.1 port 443 with timeout 5 seconds for 2 cycles then restart
    alert admin@example.com
# 檢測配置文件完整性
check file nginx_config with path /etc/nginx/nginx.conf
    if failed checksum and timestamp then alert
    if failed permission 644 then alert
    alert admin@example.com

# 場景 3:監控系統資源

check system localhost
    if loadavg (1min) > 4.0 for 2 cycles then alert
    if loadavg (5min) > 2.0 for 5 cycles then alert
    if memory usage > 80% for 2 cycles then alert
    if cpu usage (system) > 75% for 3 cycles then alert
    if cpu usage (user) > 75% for 3 cycles then alert
    alert admin@example.com

# 場景 4:監控磁盤空間

check filesystem root_fs with path /
    if space usage > 80% for 2 cycles then alert
    if inode usage > 85% then alert
    alert admin@example.com
check filesystem data_fs with path /data
    if space usage > 70% for 3 cycles then alert
    if changed timestamp then alert
    alert admin@example.com

# 場景 5:監控數據庫服務

check process mysql with pidfile /var/run/mysqld/mysqld.pid
    group database
    start program = "/bin/systemctl start mysql"
    stop program = "/bin/systemctl stop mysql"
    if does not exist for 1 cycles then restart
    if failed host 127.0.0.1 port 3306 then restart
    if 3 restarts with 5 cycles then set timeout
    alert admin@example.com { action: alert, repeat every 5 cycles }
check file mysql_sock with path /var/run/mysqld/mysqld.sock
    if failed permission 660 then alert
    alert admin@example.com

# 場景 6:監控 Docker 容器

check process docker_app with pidfile /var/run/docker/app.pid
    start program = "docker start app_container"
    stop program = "docker stop app_container"
    if does not exist for 1 cycles then restart
    if failed host 127.0.0.1 port 8080 for 2 cycles then restart
    alert admin@example.com

# 場景 7:監控文件完整性

check file application_file with path /opt/app/config.json
    if changed checksum then exec "/opt/scripts/notify_change.sh"
    if failed permission 640 then alert
    if failed uid root then alert
    if failed gid app then alert
    alert admin@example.com

# 場景 8:自定義監控腳本

check program backup_status with path "/opt/scripts/backup_check.sh"
    every 1 hour
    if status != 0 for 1 cycles then alert
    alert admin@example.com

# 郵件告警設置

# Gmail 配置(推薦)

set mailserver smtp.gmail.com port 587
    username "your-email@gmail.com" password "XXXX-XXXX-XXXX-XXXX"
    using tlsv12 certificate verify disable

設置步驟:

  1. 開啟 Gmail 兩步驗證
  2. 生成應用專用密碼(16 位)
  3. 將密碼填入上方配置

# Office 365 配置

set mailserver smtp.office365.com port 587
    username "your-user@company.com" password "password"
    using tlsv12 certificate verify disable

# 自建郵件服務器

set mailserver mail.example.com port 25
    using no certificate

# Web 監控界面

# 啟用 Web 服務

set httpd port 2812
    use address 0.0.0.0  # 監聽所有網卡
    allow admin:admin.password@
    allow 192.168.1.0/24  # 允許子網訪問
    with ssl {
        pem: /etc/monit/monit.pem
    }

# 生成 SSL 證書

openssl req -new -x509 -days 365 -nodes \
    -out /etc/monit/monit.pem \
    -keyout /etc/monit/monit.pem
chmod 600 /etc/monit/monit.pem

# 訪問 Web 界面

https://ip:2812
用戶名:admin
密碼:admin.password

# M/Monit 集中管理(可選)

如果要集中管理多個 Monit 實例:

# monit 客戶端配置
set mmonit https://mmonit-ip:8443/collector
    with timeout 30 seconds
    and register without credentials
# 或帶認證
set mmonit https://admin:password@mmonit-ip:8443/collector
    with timeout 30 seconds
    and register

# 故障排查

# 檢查配置語法

monit -t
# 輸出示例
/etc/monit/monitrc:50: error: parse error
syntax OK  # 正常時的輸出

# 查看詳細日誌

# 實時日誌
tail -f /var/log/syslog | grep monit
# 或指定日誌文件
cat /var/log/monit.log

# 調試模式運行

# 前台運行,顯示詳細信息
monit -v -I

# 常見問題

問題解決方案
郵件無法發送檢查郵箱憑證、防火牆 25/587 端口
Web 界面無法訪問檢查端口是否監聽,防火牆規則
進程監控不準確驗證 pidfile 路徑是否正確
頻繁告警調整檢查周期和閾值

# 最佳實踐

  1. 分層監控:先監控關鍵服務,再逐步添加
  2. 合理調整閾值:避免頻繁誤告警
  3. 備份配置:定期備份 /etc/monit/ 目錄
  4. 測試告警:確保郵件、webhook 等通知正常
  5. 記錄日誌:啟用日誌方便故障排查
  6. 定期查看:檢查告警頻率,優化配置
  7. 自動恢復:為重要服務設置自動重啟
  8. 權限安全:配置文件設置 600 權限

# 總結

Monit 是一個強大而簡潔的監控工具,特別適合:

  • 小到中型系統監控
  • 對資源占用要求高的環境
  • 需要快速部署的場景
  • 結合自定義腳本的靈活監控

掌握 Monit 后,你可以構建一個高效的自動化運維體系。

更新於

請我喝咖啡~( ̄▽ ̄)~*