App process exits (status: 1 — error) ↓
Process manager receives exit event ↓
Check: is this process configured to restart? ↓
Yes → spawn new process ↓
Wait for process to be ready (health check or port listen) ↓
Resume serving traffic
App process exits (status: 1 — error) ↓
Process manager receives exit event ↓
Check: is this process configured to restart? ↓
Yes → spawn new process ↓
Wait for process to be ready (health check or port listen) ↓
Resume serving traffic
App process exits (status: 1 — error) ↓
Process manager receives exit event ↓
Check: is this process configured to restart? ↓
Yes → spawn new process ↓
Wait for process to be ready (health check or port listen) ↓
Resume serving traffic
[processes.api.health_check]
endpoint = "http://localhost:3000/health"
interval_secs = 2
timeout_secs = 5
[processes.api.health_check]
endpoint = "http://localhost:3000/health"
interval_secs = 2
timeout_secs = 5
[processes.api.health_check]
endpoint = "http://localhost:3000/health"
interval_secs = 2
timeout_secs = 5
[processes.api]
max_restarts = 10 # stop trying after 10 crashes
restart_delay_ms = 500 # wait 500ms before each restart
[processes.api]
max_restarts = 10 # stop trying after 10 crashes
restart_delay_ms = 500 # wait 500ms before each restart
[processes.api]
max_restarts = 10 # stop trying after 10 crashes
restart_delay_ms = 500 # wait 500ms before each restart
[processes.api]
instances = 3 # crash recovery on one instance doesn't affect the other 2
[processes.api]
instances = 3 # crash recovery on one instance doesn't affect the other 2
[processes.api]
instances = 3 # crash recovery on one instance doesn't affect the other 2
# Find your process PID
oxmgr status # Kill it hard (no graceful shutdown)
kill -9 <pid> # Measure how long until it responds again
time curl --retry 100 --retry-delay 0 --retry-connrefused http://localhost:3000/health
# Find your process PID
oxmgr status # Kill it hard (no graceful shutdown)
kill -9 <pid> # Measure how long until it responds again
time curl --retry 100 --retry-delay 0 --retry-connrefused http://localhost:3000/health
# Find your process PID
oxmgr status # Kill it hard (no graceful shutdown)
kill -9 <pid> # Measure how long until it responds again
time curl --retry 100 --retry-delay 0 --retry-connrefused http://localhost:3000/health
[processes.api]
command = "node dist/server.js"
restart_on_exit = true
restart_delay_ms = 0 # restart immediately
max_restarts = 20 # allow 20 restarts before giving up
instances = 2 # run 2 instances for redundancy [processes.api.health_check]
endpoint = "http://localhost:3000/health"
interval_secs = 10
timeout_secs = 3
[processes.api]
command = "node dist/server.js"
restart_on_exit = true
restart_delay_ms = 0 # restart immediately
max_restarts = 20 # allow 20 restarts before giving up
instances = 2 # run 2 instances for redundancy [processes.api.health_check]
endpoint = "http://localhost:3000/health"
interval_secs = 10
timeout_secs = 3
[processes.api]
command = "node dist/server.js"
restart_on_exit = true
restart_delay_ms = 0 # restart immediately
max_restarts = 20 # allow 20 restarts before giving up
instances = 2 # run 2 instances for redundancy [processes.api.health_check]
endpoint = "http://localhost:3000/health"
interval_secs = 10
timeout_secs = 3 - OS process creation: ~1–5ms
- Node.js startup: ~50–200ms (depending on module load time)
- Application initialization: varies - Crash 1: restart after 100ms
- Crash 2: restart after 200ms
- Crash 3: restart after 400ms