<?xml version="1.0" encoding="UTF-8"?>
<plist version="1.0">
<dict> <key>Label</key> <string>com.whoffagents.atlas</string> <key>ProgramArguments</key> <array> <string>/bin/zsh</string> <string>-c</string> <string>/Users/you/Desktop/Agents/Bootstrap/start-atlas.sh</string> </array> <key>KeepAlive</key> <true/> <key>ThrottleInterval</key> <integer>10</integer> <key>StandardOutPath</key> <string>/tmp/atlas.log</string> <key>StandardErrorPath</key> <string>/tmp/atlas-err.log</string>
</dict>
</plist>
<?xml version="1.0" encoding="UTF-8"?>
<plist version="1.0">
<dict> <key>Label</key> <string>com.whoffagents.atlas</string> <key>ProgramArguments</key> <array> <string>/bin/zsh</string> <string>-c</string> <string>/Users/you/Desktop/Agents/Bootstrap/start-atlas.sh</string> </array> <key>KeepAlive</key> <true/> <key>ThrottleInterval</key> <integer>10</integer> <key>StandardOutPath</key> <string>/tmp/atlas.log</string> <key>StandardErrorPath</key> <string>/tmp/atlas-err.log</string>
</dict>
</plist>
<?xml version="1.0" encoding="UTF-8"?>
<plist version="1.0">
<dict> <key>Label</key> <string>com.whoffagents.atlas</string> <key>ProgramArguments</key> <array> <string>/bin/zsh</string> <string>-c</string> <string>/Users/you/Desktop/Agents/Bootstrap/start-atlas.sh</string> </array> <key>KeepAlive</key> <true/> <key>ThrottleInterval</key> <integer>10</integer> <key>StandardOutPath</key> <string>/tmp/atlas.log</string> <key>StandardErrorPath</key> <string>/tmp/atlas-err.log</string>
</dict>
</plist>
launchctl load ~/Library/LaunchAgents/com.whoffagents.atlas.plist
launchctl start com.whoffagents.atlas
launchctl load ~/Library/LaunchAgents/com.whoffagents.atlas.plist
launchctl start com.whoffagents.atlas
launchctl load ~/Library/LaunchAgents/com.whoffagents.atlas.plist
launchctl start com.whoffagents.atlas
#!/bin/zsh
# start-atlas.sh
STATE_FILE="$HOME/Desktop/Agents/Bootstrap/atlas-state.json"
CRASH_LOG="$HOME/Desktop/Agents/Bootstrap/crash-history.log" echo "$(date -u +%Y-%m-%dT%H:%M:%SZ) Atlas restarting" >> "$CRASH_LOG" if [[ -f "$STATE_FILE" ]]; then LAST_WAVE=$(jq -r '.last_wave // "unknown"' "$STATE_FILE") echo "Resuming from wave $LAST_WAVE"
fi tmux new-session -d -s atlas 2>/dev/null || true
tmux send-keys -t atlas "claude --dangerously-skip-permissions" Enter
#!/bin/zsh
# start-atlas.sh
STATE_FILE="$HOME/Desktop/Agents/Bootstrap/atlas-state.json"
CRASH_LOG="$HOME/Desktop/Agents/Bootstrap/crash-history.log" echo "$(date -u +%Y-%m-%dT%H:%M:%SZ) Atlas restarting" >> "$CRASH_LOG" if [[ -f "$STATE_FILE" ]]; then LAST_WAVE=$(jq -r '.last_wave // "unknown"' "$STATE_FILE") echo "Resuming from wave $LAST_WAVE"
fi tmux new-session -d -s atlas 2>/dev/null || true
tmux send-keys -t atlas "claude --dangerously-skip-permissions" Enter
#!/bin/zsh
# start-atlas.sh
STATE_FILE="$HOME/Desktop/Agents/Bootstrap/atlas-state.json"
CRASH_LOG="$HOME/Desktop/Agents/Bootstrap/crash-history.log" echo "$(date -u +%Y-%m-%dT%H:%M:%SZ) Atlas restarting" >> "$CRASH_LOG" if [[ -f "$STATE_FILE" ]]; then LAST_WAVE=$(jq -r '.last_wave // "unknown"' "$STATE_FILE") echo "Resuming from wave $LAST_WAVE"
fi tmux new-session -d -s atlas 2>/dev/null || true
tmux send-keys -t atlas "claude --dangerously-skip-permissions" Enter
# memory-guard.sh — cron every 2 minutes
MEM_LIMIT_MB=3500
ATLAS_PID=$(pgrep -f "claude.*atlas" | head -1)
if [[ -z "$ATLAS_PID" ]]; then exit 0; fi MEM_MB=$(ps -o rss= -p "$ATLAS_PID" | awk '{print int($1/1024)}') if (( MEM_MB > MEM_LIMIT_MB )); then echo "$(date) OOM guard: Atlas ${MEM_MB}MB — triggering restart" >> /tmp/atlas-oom.log kill -TERM "$ATLAS_PID" # launchd restarts automatically
fi
# memory-guard.sh — cron every 2 minutes
MEM_LIMIT_MB=3500
ATLAS_PID=$(pgrep -f "claude.*atlas" | head -1)
if [[ -z "$ATLAS_PID" ]]; then exit 0; fi MEM_MB=$(ps -o rss= -p "$ATLAS_PID" | awk '{print int($1/1024)}') if (( MEM_MB > MEM_LIMIT_MB )); then echo "$(date) OOM guard: Atlas ${MEM_MB}MB — triggering restart" >> /tmp/atlas-oom.log kill -TERM "$ATLAS_PID" # launchd restarts automatically
fi
# memory-guard.sh — cron every 2 minutes
MEM_LIMIT_MB=3500
ATLAS_PID=$(pgrep -f "claude.*atlas" | head -1)
if [[ -z "$ATLAS_PID" ]]; then exit 0; fi MEM_MB=$(ps -o rss= -p "$ATLAS_PID" | awk '{print int($1/1024)}') if (( MEM_MB > MEM_LIMIT_MB )); then echo "$(date) OOM guard: Atlas ${MEM_MB}MB — triggering restart" >> /tmp/atlas-oom.log kill -TERM "$ATLAS_PID" # launchd restarts automatically
fi
{ "last_wave": 19, "active_gods": ["apollo", "hermes", "peitho"], "last_checkpoint": "2026-04-14T13:45:00Z", "tasks_completed": 47
}
{ "last_wave": 19, "active_gods": ["apollo", "hermes", "peitho"], "last_checkpoint": "2026-04-14T13:45:00Z", "tasks_completed": 47
}
{ "last_wave": 19, "active_gods": ["apollo", "hermes", "peitho"], "last_checkpoint": "2026-04-14T13:45:00Z", "tasks_completed": 47
} - Auto-restart on crash (exit code != 0)
- OOM protection before the kernel kills the process
- Dashboard recovery so the agent knows where it left off
- Zero human intervention overnight - 3 simulated crash tests: all recovered in under 15 seconds
- Memory guard fired once during a large codegen task
- Dashboard resumed correctly from wave 14 after forced kill - launchd handles restart — do not write your own loop
- State files handle recovery — agents must be resumable
- Memory guard handles OOM — proactive beats reactive
- Crash log handles observability — you need a paper trail