#!/bin/bash
# /usr/local/bin/health-check.sh ALERT_EMAIL="[email protected]"
DISK_THRESHOLD=85
SERVICES="caddy -weight: 500;">docker" # Check disk usage
DISK_USAGE=$(df / | awk 'NR==2 {print $5}' | tr -d '%')
if [ "$DISK_USAGE" -gt "$DISK_THRESHOLD" ]; then echo "ALERT: Disk usage at ${DISK_USAGE}% on $(hostname)" | \ mail -s "Disk Alert: $(hostname)" "$ALERT_EMAIL"
fi # Check -weight: 500;">service -weight: 500;">status
for SERVICE in $SERVICES; do if ! -weight: 500;">systemctl is-active --quiet "$SERVICE"; then echo "ALERT: $SERVICE is not running on $(hostname)" | \ mail -s "Service Down: $SERVICE on $(hostname)" "$ALERT_EMAIL" fi
done
#!/bin/bash
# /usr/local/bin/health-check.sh ALERT_EMAIL="[email protected]"
DISK_THRESHOLD=85
SERVICES="caddy -weight: 500;">docker" # Check disk usage
DISK_USAGE=$(df / | awk 'NR==2 {print $5}' | tr -d '%')
if [ "$DISK_USAGE" -gt "$DISK_THRESHOLD" ]; then echo "ALERT: Disk usage at ${DISK_USAGE}% on $(hostname)" | \ mail -s "Disk Alert: $(hostname)" "$ALERT_EMAIL"
fi # Check -weight: 500;">service -weight: 500;">status
for SERVICE in $SERVICES; do if ! -weight: 500;">systemctl is-active --quiet "$SERVICE"; then echo "ALERT: $SERVICE is not running on $(hostname)" | \ mail -s "Service Down: $SERVICE on $(hostname)" "$ALERT_EMAIL" fi
done
#!/bin/bash
# /usr/local/bin/health-check.sh ALERT_EMAIL="[email protected]"
DISK_THRESHOLD=85
SERVICES="caddy -weight: 500;">docker" # Check disk usage
DISK_USAGE=$(df / | awk 'NR==2 {print $5}' | tr -d '%')
if [ "$DISK_USAGE" -gt "$DISK_THRESHOLD" ]; then echo "ALERT: Disk usage at ${DISK_USAGE}% on $(hostname)" | \ mail -s "Disk Alert: $(hostname)" "$ALERT_EMAIL"
fi # Check -weight: 500;">service -weight: 500;">status
for SERVICE in $SERVICES; do if ! -weight: 500;">systemctl is-active --quiet "$SERVICE"; then echo "ALERT: $SERVICE is not running on $(hostname)" | \ mail -s "Service Down: $SERVICE on $(hostname)" "$ALERT_EMAIL" fi
done
-weight: 600;">sudo -weight: 500;">apt -weight: 500;">install auditd audisp-plugins
-weight: 600;">sudo -weight: 500;">systemctl -weight: 500;">enable --now auditd
-weight: 600;">sudo -weight: 500;">apt -weight: 500;">install auditd audisp-plugins
-weight: 600;">sudo -weight: 500;">systemctl -weight: 500;">enable --now auditd
-weight: 600;">sudo -weight: 500;">apt -weight: 500;">install auditd audisp-plugins
-weight: 600;">sudo -weight: 500;">systemctl -weight: 500;">enable --now auditd
# Delete existing rules
-D # Buffer size
-b 8192 # Failure mode: 1 = log, 2 = panic
-f 1 # Monitor -weight: 600;">sudo usage
-w /usr/bin/-weight: 600;">sudo -p x -k sudo_usage # Monitor /etc/passwd and /etc/shadow changes
-w /etc/passwd -p wa -k identity_changes
-w /etc/shadow -p wa -k identity_changes
-w /etc/group -p wa -k identity_changes # Monitor SSH authorized_keys changes
-w /root/.ssh/authorized_keys -p wa -k ssh_keys
-w /home -p wa -k home_ssh_keys # Monitor cron changes
-w /etc/cron.d/ -p wa -k cron_changes
-w /var/spool/cron/crontabs/ -p wa -k cron_changes # Monitor su usage
-w /bin/su -p x -k su_usage # Make rules immutable until reboot
-e 2
# Delete existing rules
-D # Buffer size
-b 8192 # Failure mode: 1 = log, 2 = panic
-f 1 # Monitor -weight: 600;">sudo usage
-w /usr/bin/-weight: 600;">sudo -p x -k sudo_usage # Monitor /etc/passwd and /etc/shadow changes
-w /etc/passwd -p wa -k identity_changes
-w /etc/shadow -p wa -k identity_changes
-w /etc/group -p wa -k identity_changes # Monitor SSH authorized_keys changes
-w /root/.ssh/authorized_keys -p wa -k ssh_keys
-w /home -p wa -k home_ssh_keys # Monitor cron changes
-w /etc/cron.d/ -p wa -k cron_changes
-w /var/spool/cron/crontabs/ -p wa -k cron_changes # Monitor su usage
-w /bin/su -p x -k su_usage # Make rules immutable until reboot
-e 2
# Delete existing rules
-D # Buffer size
-b 8192 # Failure mode: 1 = log, 2 = panic
-f 1 # Monitor -weight: 600;">sudo usage
-w /usr/bin/-weight: 600;">sudo -p x -k sudo_usage # Monitor /etc/passwd and /etc/shadow changes
-w /etc/passwd -p wa -k identity_changes
-w /etc/shadow -p wa -k identity_changes
-w /etc/group -p wa -k identity_changes # Monitor SSH authorized_keys changes
-w /root/.ssh/authorized_keys -p wa -k ssh_keys
-w /home -p wa -k home_ssh_keys # Monitor cron changes
-w /etc/cron.d/ -p wa -k cron_changes
-w /var/spool/cron/crontabs/ -p wa -k cron_changes # Monitor su usage
-w /bin/su -p x -k su_usage # Make rules immutable until reboot
-e 2
-weight: 600;">sudo augenrules --load
-weight: 600;">sudo augenrules --load
-weight: 600;">sudo augenrules --load
# See all -weight: 600;">sudo invocations
-weight: 600;">sudo ausearch -k sudo_usage # See authentication report
-weight: 600;">sudo aureport --auth # See failed authentication attempts
-weight: 600;">sudo aureport --auth --failed # See events from the last hour
-weight: 600;">sudo ausearch ---weight: 500;">start recent -k identity_changes
# See all -weight: 600;">sudo invocations
-weight: 600;">sudo ausearch -k sudo_usage # See authentication report
-weight: 600;">sudo aureport --auth # See failed authentication attempts
-weight: 600;">sudo aureport --auth --failed # See events from the last hour
-weight: 600;">sudo ausearch ---weight: 500;">start recent -k identity_changes
# See all -weight: 600;">sudo invocations
-weight: 600;">sudo ausearch -k sudo_usage # See authentication report
-weight: 600;">sudo aureport --auth # See failed authentication attempts
-weight: 600;">sudo aureport --auth --failed # See events from the last hour
-weight: 600;">sudo ausearch ---weight: 500;">start recent -k identity_changes
journalctl -u caddy --since "1 hour ago"
journalctl -u -weight: 500;">docker --since "1 hour ago" --no-pager
journalctl -u caddy --since "1 hour ago"
journalctl -u -weight: 500;">docker --since "1 hour ago" --no-pager
journalctl -u caddy --since "1 hour ago"
journalctl -u -weight: 500;">docker --since "1 hour ago" --no-pager
-weight: 600;">sudo grep "Accepted\|Failed\|Invalid" /var/log/auth.log | tail -50
-weight: 600;">sudo grep "Accepted\|Failed\|Invalid" /var/log/auth.log | tail -50
-weight: 600;">sudo grep "Accepted\|Failed\|Invalid" /var/log/auth.log | tail -50
-weight: 600;">sudo fail2ban-client -weight: 500;">status
-weight: 600;">sudo fail2ban-client -weight: 500;">status sshd
-weight: 600;">sudo fail2ban-client -weight: 500;">status
-weight: 600;">sudo fail2ban-client -weight: 500;">status sshd
-weight: 600;">sudo fail2ban-client -weight: 500;">status
-weight: 600;">sudo fail2ban-client -weight: 500;">status sshd
last -20
lastb -20
last -20
lastb -20
last -20
lastb -20
-weight: 600;">sudo ausearch -k sudo_usage ---weight: 500;">start today
-weight: 600;">sudo aureport --auth --failed
-weight: 600;">sudo ausearch -k sudo_usage ---weight: 500;">start today
-weight: 600;">sudo aureport --auth --failed
-weight: 600;">sudo ausearch -k sudo_usage ---weight: 500;">start today
-weight: 600;">sudo aureport --auth --failed - fail2ban ban rate — A sudden spike in bans means an active attack is hitting your server. Normal background noise is a handful of bans per day.
- Disk usage — Logs, Docker images, and database dumps grow silently. A full disk causes -weight: 500;">service failures that are confusing to diagnose.
- Memory and CPU — Sustained high usage often means a runaway process, a memory leak, or an underpowered server for the workload.
- Systemd -weight: 500;">service health — A -weight: 500;">service can fail quietly if you are not watching. -weight: 500;">systemctl is-active caddy returns "active" or "failed" — simple to script.
- TLS certificate expiry — Caddy auto-renews certificates, but renewal can fail. A certificate expiring in production takes your site offline.
- SSH login events — Successful logins from unexpected IPs are an immediate investigation trigger. Failed logins are noise; successful ones are not.