# -weight: 500;">docker-compose.yml
services: app: image: your-app:latest deploy: resources: limits: memory: 512M # hard ceiling — container gets killed past this cpus: '1.0' reservations: memory: 256M # guaranteed minimum -weight: 500;">restart: unless-stopped # this alone prevents most 3 AM incidents postgres: image: postgres:16 deploy: resources: limits: memory: 1G # tune shared_buffers to ~25% of memory limit environment: POSTGRES_SHARED_BUFFERS: 256MB volumes: - pgdata:/var/lib/postgresql/data -weight: 500;">restart: unless-stopped
# -weight: 500;">docker-compose.yml
services: app: image: your-app:latest deploy: resources: limits: memory: 512M # hard ceiling — container gets killed past this cpus: '1.0' reservations: memory: 256M # guaranteed minimum -weight: 500;">restart: unless-stopped # this alone prevents most 3 AM incidents postgres: image: postgres:16 deploy: resources: limits: memory: 1G # tune shared_buffers to ~25% of memory limit environment: POSTGRES_SHARED_BUFFERS: 256MB volumes: - pgdata:/var/lib/postgresql/data -weight: 500;">restart: unless-stopped
# -weight: 500;">docker-compose.yml
services: app: image: your-app:latest deploy: resources: limits: memory: 512M # hard ceiling — container gets killed past this cpus: '1.0' reservations: memory: 256M # guaranteed minimum -weight: 500;">restart: unless-stopped # this alone prevents most 3 AM incidents postgres: image: postgres:16 deploy: resources: limits: memory: 1G # tune shared_buffers to ~25% of memory limit environment: POSTGRES_SHARED_BUFFERS: 256MB volumes: - pgdata:/var/lib/postgresql/data -weight: 500;">restart: unless-stopped
// /etc/-weight: 500;">docker/daemon.json
{ "log-driver": "json-file", "log-opts": { "max-size": "10m", "max-file": "3" }
}
// /etc/-weight: 500;">docker/daemon.json
{ "log-driver": "json-file", "log-opts": { "max-size": "10m", "max-file": "3" }
}
// /etc/-weight: 500;">docker/daemon.json
{ "log-driver": "json-file", "log-opts": { "max-size": "10m", "max-file": "3" }
}
#!/bin/bash
# /usr/local/bin/disk-check.sh
# Alert when disk usage crosses 85% THRESHOLD=85
USAGE=$(df / | tail -1 | awk '{print $5}' | sed 's/%//') if [ "$USAGE" -gt "$THRESHOLD" ]; then # swap this for your preferred notification method -weight: 500;">curl -X POST "https://your-webhook-url" \ -H "Content-Type: application/json" \ -d "{\"text\": \"Disk usage at ${USAGE}% on $(hostname)\"}"
fi
#!/bin/bash
# /usr/local/bin/disk-check.sh
# Alert when disk usage crosses 85% THRESHOLD=85
USAGE=$(df / | tail -1 | awk '{print $5}' | sed 's/%//') if [ "$USAGE" -gt "$THRESHOLD" ]; then # swap this for your preferred notification method -weight: 500;">curl -X POST "https://your-webhook-url" \ -H "Content-Type: application/json" \ -d "{\"text\": \"Disk usage at ${USAGE}% on $(hostname)\"}"
fi
#!/bin/bash
# /usr/local/bin/disk-check.sh
# Alert when disk usage crosses 85% THRESHOLD=85
USAGE=$(df / | tail -1 | awk '{print $5}' | sed 's/%//') if [ "$USAGE" -gt "$THRESHOLD" ]; then # swap this for your preferred notification method -weight: 500;">curl -X POST "https://your-webhook-url" \ -H "Content-Type: application/json" \ -d "{\"text\": \"Disk usage at ${USAGE}% on $(hostname)\"}"
fi
# -weight: 500;">docker-compose.yml
services: app: image: your-app:latest healthcheck: test: ["CMD", "-weight: 500;">curl", "-f", "http://localhost:3000/health"] interval: 30s timeout: 5s retries: 3 start_period: 10s # grace period for startup -weight: 500;">restart: unless-stopped
# -weight: 500;">docker-compose.yml
services: app: image: your-app:latest healthcheck: test: ["CMD", "-weight: 500;">curl", "-f", "http://localhost:3000/health"] interval: 30s timeout: 5s retries: 3 start_period: 10s # grace period for startup -weight: 500;">restart: unless-stopped
# -weight: 500;">docker-compose.yml
services: app: image: your-app:latest healthcheck: test: ["CMD", "-weight: 500;">curl", "-f", "http://localhost:3000/health"] interval: 30s timeout: 5s retries: 3 start_period: 10s # grace period for startup -weight: 500;">restart: unless-stopped
// Express health check that actually means something
app.get('/health', async (req, res) => { try { // check database connection await db.query('SELECT 1'); // check redis if you use it await redis.ping(); res.-weight: 500;">status(200).json({ -weight: 500;">status: 'ok' }); } catch (err) { // returning 503 makes Docker mark container as unhealthy res.-weight: 500;">status(503).json({ -weight: 500;">status: 'degraded', error: err.message }); }
});
// Express health check that actually means something
app.get('/health', async (req, res) => { try { // check database connection await db.query('SELECT 1'); // check redis if you use it await redis.ping(); res.-weight: 500;">status(200).json({ -weight: 500;">status: 'ok' }); } catch (err) { // returning 503 makes Docker mark container as unhealthy res.-weight: 500;">status(503).json({ -weight: 500;">status: 'degraded', error: err.message }); }
});
// Express health check that actually means something
app.get('/health', async (req, res) => { try { // check database connection await db.query('SELECT 1'); // check redis if you use it await redis.ping(); res.-weight: 500;">status(200).json({ -weight: 500;">status: 'ok' }); } catch (err) { // returning 503 makes Docker mark container as unhealthy res.-weight: 500;">status(503).json({ -weight: 500;">status: 'degraded', error: err.message }); }
});
# Caddyfile
yourapp.example.com { reverse_proxy app:3000 { # passive health checks — -weight: 500;">stop sending traffic to dead upstreams health_uri /health health_interval 30s } # basic rate limiting to prevent abuse rate_limit { zone dynamic { key {remote_host} events 100 window 1m } } encode gzip log { output file /var/log/caddy/access.log { roll_size 50mb roll_keep 5 } }
}
# Caddyfile
yourapp.example.com { reverse_proxy app:3000 { # passive health checks — -weight: 500;">stop sending traffic to dead upstreams health_uri /health health_interval 30s } # basic rate limiting to prevent abuse rate_limit { zone dynamic { key {remote_host} events 100 window 1m } } encode gzip log { output file /var/log/caddy/access.log { roll_size 50mb roll_keep 5 } }
}
# Caddyfile
yourapp.example.com { reverse_proxy app:3000 { # passive health checks — -weight: 500;">stop sending traffic to dead upstreams health_uri /health health_interval 30s } # basic rate limiting to prevent abuse rate_limit { zone dynamic { key {remote_host} events 100 window 1m } } encode gzip log { output file /var/log/caddy/access.log { roll_size 50mb roll_keep 5 } }
}
#!/bin/bash
# /usr/local/bin/backup-db.sh BACKUP_DIR="/backups/postgres"
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
RETENTION_DAYS=7 # dump the database from the running container
-weight: 500;">docker exec postgres pg_dump -U appuser -Fc appdb > "${BACKUP_DIR}/app_${TIMESTAMP}.dump" # clean up old backups
find "$BACKUP_DIR" -name "*.dump" -mtime +$RETENTION_DAYS -delete # optional: sync to remote storage
# rclone copy "$BACKUP_DIR" remote:backups/postgres --max-age 24h
#!/bin/bash
# /usr/local/bin/backup-db.sh BACKUP_DIR="/backups/postgres"
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
RETENTION_DAYS=7 # dump the database from the running container
-weight: 500;">docker exec postgres pg_dump -U appuser -Fc appdb > "${BACKUP_DIR}/app_${TIMESTAMP}.dump" # clean up old backups
find "$BACKUP_DIR" -name "*.dump" -mtime +$RETENTION_DAYS -delete # optional: sync to remote storage
# rclone copy "$BACKUP_DIR" remote:backups/postgres --max-age 24h
#!/bin/bash
# /usr/local/bin/backup-db.sh BACKUP_DIR="/backups/postgres"
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
RETENTION_DAYS=7 # dump the database from the running container
-weight: 500;">docker exec postgres pg_dump -U appuser -Fc appdb > "${BACKUP_DIR}/app_${TIMESTAMP}.dump" # clean up old backups
find "$BACKUP_DIR" -name "*.dump" -mtime +$RETENTION_DAYS -delete # optional: sync to remote storage
# rclone copy "$BACKUP_DIR" remote:backups/postgres --max-age 24h - Memory exhaustion — your app (or its database) slowly ate all available RAM
- Disk full — logs or temp files filled the drive
- No automatic recovery — the process crashed and nothing restarted it - Resource limits set for every container
- Restart policies configured (unless-stopped at minimum)
- Log rotation enabled at the Docker daemon level
- Health checks that verify actual functionality, not just process liveness
- TLS termination via a reverse proxy with automatic cert renewal
- Automated backups with at least one off-server copy
- Disk and memory monitoring with alerts
- Firewall rules — only expose ports 80, 443, and your SSH port
- Unattended security updates enabled on the host OS