┌─────────────────────────────────────────────────────────────────────────────┐
│ SwiftDeploy — Full System Architecture │
├──────────────────┬──────────────────────────────────────┬───────────────────┤
│ ZONE 1 │ ZONE 2 │ ZONE 3 │
│ Operator │ Host Machine / Docker Engine │ Generated Files │
│ │ │ │
│ [Operator] │ ┌─── swiftdeploy-net (bridge) ───┐ │ nginx.conf │
│ │ │ │ │ │ docker-compose │
│ ▼ │ │ [nginx:8080]──────►[app:3000] │ │ history.jsonl │
│ manifest.yaml │ │ PUBLIC INTERNAL │ │ audit_report.md │
│ (source of │ │ │ │ │ │ │
│ truth) │ │ └──[logs vol]─────┘ │ │ │
│ │ │ │ │ │ │
│ ▼ │ │ [opa:8181] │ │ │
│ swiftdeploy │ │ localhost only │ │ │
│ CLI │ │ NOT via nginx ✗ │ │ │
│ ├─ init │ └────────────────────────────────┘ │ │
│ ├─ validate │ │ │
│ ├─ deploy ──────┼──► pre-deploy: OPA infra check │ │
│ ├─ promote ─────┼──► pre-promote: OPA canary check │ │
│ ├─ status ──────┼──► scrapes /metrics every 5s ───────►│ history.jsonl │
│ ├─ audit ───────┼────────────────────────────────────►│ audit_report.md │
│ └─ teardown │ │ │
└──────────────────┴──────────────────────────────────────┴───────────────────┘
┌─────────────────────────────────────────────────────────────────────────────┐
│ SwiftDeploy — Full System Architecture │
├──────────────────┬──────────────────────────────────────┬───────────────────┤
│ ZONE 1 │ ZONE 2 │ ZONE 3 │
│ Operator │ Host Machine / Docker Engine │ Generated Files │
│ │ │ │
│ [Operator] │ ┌─── swiftdeploy-net (bridge) ───┐ │ nginx.conf │
│ │ │ │ │ │ docker-compose │
│ ▼ │ │ [nginx:8080]──────►[app:3000] │ │ history.jsonl │
│ manifest.yaml │ │ PUBLIC INTERNAL │ │ audit_report.md │
│ (source of │ │ │ │ │ │ │
│ truth) │ │ └──[logs vol]─────┘ │ │ │
│ │ │ │ │ │ │
│ ▼ │ │ [opa:8181] │ │ │
│ swiftdeploy │ │ localhost only │ │ │
│ CLI │ │ NOT via nginx ✗ │ │ │
│ ├─ init │ └────────────────────────────────┘ │ │
│ ├─ validate │ │ │
│ ├─ deploy ──────┼──► pre-deploy: OPA infra check │ │
│ ├─ promote ─────┼──► pre-promote: OPA canary check │ │
│ ├─ status ──────┼──► scrapes /metrics every 5s ───────►│ history.jsonl │
│ ├─ audit ───────┼────────────────────────────────────►│ audit_report.md │
│ └─ teardown │ │ │
└──────────────────┴──────────────────────────────────────┴───────────────────┘
┌─────────────────────────────────────────────────────────────────────────────┐
│ SwiftDeploy — Full System Architecture │
├──────────────────┬──────────────────────────────────────┬───────────────────┤
│ ZONE 1 │ ZONE 2 │ ZONE 3 │
│ Operator │ Host Machine / Docker Engine │ Generated Files │
│ │ │ │
│ [Operator] │ ┌─── swiftdeploy-net (bridge) ───┐ │ nginx.conf │
│ │ │ │ │ │ docker-compose │
│ ▼ │ │ [nginx:8080]──────►[app:3000] │ │ history.jsonl │
│ manifest.yaml │ │ PUBLIC INTERNAL │ │ audit_report.md │
│ (source of │ │ │ │ │ │ │
│ truth) │ │ └──[logs vol]─────┘ │ │ │
│ │ │ │ │ │ │
│ ▼ │ │ [opa:8181] │ │ │
│ swiftdeploy │ │ localhost only │ │ │
│ CLI │ │ NOT via nginx ✗ │ │ │
│ ├─ init │ └────────────────────────────────┘ │ │
│ ├─ validate │ │ │
│ ├─ deploy ──────┼──► pre-deploy: OPA infra check │ │
│ ├─ promote ─────┼──► pre-promote: OPA canary check │ │
│ ├─ status ──────┼──► scrapes /metrics every 5s ───────►│ history.jsonl │
│ ├─ audit ───────┼────────────────────────────────────►│ audit_report.md │
│ └─ teardown │ │ │
└──────────────────┴──────────────────────────────────────┴───────────────────┘
swiftdeploy/
├── manifest.yaml ← the ONLY file you edit
├── swiftdeploy ← CLI executable
├── Dockerfile ← app image definition
├── app/
│ └── main.py ← Python HTTP service
├── templates/
│ ├── nginx.conf.tmpl ← nginx template
│ └── docker-compose.yml.tmpl ← compose template
├── policies/ ← Stage 4B addition
│ ├── infrastructure.rego
│ ├── canary.rego
│ └── data.json
├── nginx.conf ← generated (gitignored)
└── docker-compose.yml ← generated (gitignored)
swiftdeploy/
├── manifest.yaml ← the ONLY file you edit
├── swiftdeploy ← CLI executable
├── Dockerfile ← app image definition
├── app/
│ └── main.py ← Python HTTP service
├── templates/
│ ├── nginx.conf.tmpl ← nginx template
│ └── docker-compose.yml.tmpl ← compose template
├── policies/ ← Stage 4B addition
│ ├── infrastructure.rego
│ ├── canary.rego
│ └── data.json
├── nginx.conf ← generated (gitignored)
└── docker-compose.yml ← generated (gitignored)
swiftdeploy/
├── manifest.yaml ← the ONLY file you edit
├── swiftdeploy ← CLI executable
├── Dockerfile ← app image definition
├── app/
│ └── main.py ← Python HTTP service
├── templates/
│ ├── nginx.conf.tmpl ← nginx template
│ └── docker-compose.yml.tmpl ← compose template
├── policies/ ← Stage 4B addition
│ ├── infrastructure.rego
│ ├── canary.rego
│ └── data.json
├── nginx.conf ← generated (gitignored)
└── docker-compose.yml ← generated (gitignored)
services: image: swift-deploy-1-node:latest port: 3000 mode: stable # stable or canary version: "1.0.0" restart_policy: unless-stopped log_volume: swiftdeploy-logs nginx: image: nginx:latest port: 8080 proxy_timeout: 30 opa: image: openpolicyagent/opa:latest-static port: 8181 network: name: swiftdeploy-net driver_type: bridge contact: "[email protected]"
services: image: swift-deploy-1-node:latest port: 3000 mode: stable # stable or canary version: "1.0.0" restart_policy: unless-stopped log_volume: swiftdeploy-logs nginx: image: nginx:latest port: 8080 proxy_timeout: 30 opa: image: openpolicyagent/opa:latest-static port: 8181 network: name: swiftdeploy-net driver_type: bridge contact: "[email protected]"
services: image: swift-deploy-1-node:latest port: 3000 mode: stable # stable or canary version: "1.0.0" restart_policy: unless-stopped log_volume: swiftdeploy-logs nginx: image: nginx:latest port: 8080 proxy_timeout: 30 opa: image: openpolicyagent/opa:latest-static port: 8181 network: name: swiftdeploy-net driver_type: bridge contact: "[email protected]"
MODE = os.environ.get("MODE", "stable")
APP_VERSION = os.environ.get("APP_VERSION", "1.0.0")
APP_PORT = int(os.environ.get("APP_PORT", "3000"))
START_TIME = time.time()
MODE = os.environ.get("MODE", "stable")
APP_VERSION = os.environ.get("APP_VERSION", "1.0.0")
APP_PORT = int(os.environ.get("APP_PORT", "3000"))
START_TIME = time.time()
MODE = os.environ.get("MODE", "stable")
APP_VERSION = os.environ.get("APP_VERSION", "1.0.0")
APP_PORT = int(os.environ.get("APP_PORT", "3000"))
START_TIME = time.time()
chaos_lock = threading.Lock()
chaos_state = {"mode": None, "duration": None, "rate": None} def get_chaos(): with chaos_lock: return dict(chaos_state) # returns a copy — callers can't mutate internal state def set_chaos(state): with chaos_lock: chaos_state.update(state)
chaos_lock = threading.Lock()
chaos_state = {"mode": None, "duration": None, "rate": None} def get_chaos(): with chaos_lock: return dict(chaos_state) # returns a copy — callers can't mutate internal state def set_chaos(state): with chaos_lock: chaos_state.update(state)
chaos_lock = threading.Lock()
chaos_state = {"mode": None, "duration": None, "rate": None} def get_chaos(): with chaos_lock: return dict(chaos_state) # returns a copy — callers can't mutate internal state def set_chaos(state): with chaos_lock: chaos_state.update(state)
self.send_json(200, { "message": "Welcome to SwiftDeploy API", "mode": MODE, "version": APP_VERSION, "timestamp": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
})
self.send_json(200, { "message": "Welcome to SwiftDeploy API", "mode": MODE, "version": APP_VERSION, "timestamp": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
})
self.send_json(200, { "message": "Welcome to SwiftDeploy API", "mode": MODE, "version": APP_VERSION, "timestamp": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
})
uptime = round(time.time() - START_TIME, 2)
self.send_json(200, { "status": "ok", "mode": MODE, "version": APP_VERSION, "uptime_seconds": uptime,
})
uptime = round(time.time() - START_TIME, 2)
self.send_json(200, { "status": "ok", "mode": MODE, "version": APP_VERSION, "uptime_seconds": uptime,
})
uptime = round(time.time() - START_TIME, 2)
self.send_json(200, { "status": "ok", "mode": MODE, "version": APP_VERSION, "uptime_seconds": uptime,
})
if MODE != "canary": self.send_json(403, {"error": "chaos endpoint only available in canary mode"}) return length = int(self.headers.get("Content-Length", 0))
data = json.loads(self.rfile.read(length))
mode = data.get("mode") if mode == "slow": set_chaos({"mode": "slow", "duration": data.get("duration", 2), "rate": None})
elif mode == "error": set_chaos({"mode": "error", "duration": None, "rate": data.get("rate", 0.5)})
elif mode == "recover": set_chaos({"mode": None, "duration": None, "rate": None})
if MODE != "canary": self.send_json(403, {"error": "chaos endpoint only available in canary mode"}) return length = int(self.headers.get("Content-Length", 0))
data = json.loads(self.rfile.read(length))
mode = data.get("mode") if mode == "slow": set_chaos({"mode": "slow", "duration": data.get("duration", 2), "rate": None})
elif mode == "error": set_chaos({"mode": "error", "duration": None, "rate": data.get("rate", 0.5)})
elif mode == "recover": set_chaos({"mode": None, "duration": None, "rate": None})
if MODE != "canary": self.send_json(403, {"error": "chaos endpoint only available in canary mode"}) return length = int(self.headers.get("Content-Length", 0))
data = json.loads(self.rfile.read(length))
mode = data.get("mode") if mode == "slow": set_chaos({"mode": "slow", "duration": data.get("duration", 2), "rate": None})
elif mode == "error": set_chaos({"mode": "error", "duration": None, "rate": data.get("rate", 0.5)})
elif mode == "recover": set_chaos({"mode": None, "duration": None, "rate": None})
FROM python:3.12-alpine RUN addgroup -S appgroup && adduser -S appuser -G appgroup WORKDIR /app
COPY app/main.py .
RUN chown -R appuser:appgroup /app USER appuser ENV MODE=stable
ENV APP_VERSION=1.0.0
ENV APP_PORT=3000 EXPOSE 3000 HEALTHCHECK --interval=10s --timeout=5s --start-period=15s --retries=5 \ CMD python -c "import urllib.request; urllib.request.urlopen('http://127.0.0.1:3000/healthz', timeout=4)" || exit 1 CMD ["python", "main.py"]
FROM python:3.12-alpine RUN addgroup -S appgroup && adduser -S appuser -G appgroup WORKDIR /app
COPY app/main.py .
RUN chown -R appuser:appgroup /app USER appuser ENV MODE=stable
ENV APP_VERSION=1.0.0
ENV APP_PORT=3000 EXPOSE 3000 HEALTHCHECK --interval=10s --timeout=5s --start-period=15s --retries=5 \ CMD python -c "import urllib.request; urllib.request.urlopen('http://127.0.0.1:3000/healthz', timeout=4)" || exit 1 CMD ["python", "main.py"]
FROM python:3.12-alpine RUN addgroup -S appgroup && adduser -S appuser -G appgroup WORKDIR /app
COPY app/main.py .
RUN chown -R appuser:appgroup /app USER appuser ENV MODE=stable
ENV APP_VERSION=1.0.0
ENV APP_PORT=3000 EXPOSE 3000 HEALTHCHECK --interval=10s --timeout=5s --start-period=15s --retries=5 \ CMD python -c "import urllib.request; urllib.request.urlopen('http://127.0.0.1:3000/healthz', timeout=4)" || exit 1 CMD ["python", "main.py"]
upstream app_backend { server app:{{ service_port }}; keepalive 32;
} log_format swiftdeploy '$time_iso8601 | $status | ${request_time}s | $upstream_addr | $request'; server { listen {{ nginx_port }}; proxy_connect_timeout {{ proxy_timeout }}s; proxy_send_timeout {{ proxy_timeout }}s; proxy_read_timeout {{ proxy_timeout }}s; add_header X-Deployed-By swiftdeploy always; proxy_pass_header X-Mode; location @error502 { default_type application/json; return 502 '{"error":"Bad Gateway","code":502,"service":"app","contact":"{{ contact }}"}'; }
}
upstream app_backend { server app:{{ service_port }}; keepalive 32;
} log_format swiftdeploy '$time_iso8601 | $status | ${request_time}s | $upstream_addr | $request'; server { listen {{ nginx_port }}; proxy_connect_timeout {{ proxy_timeout }}s; proxy_send_timeout {{ proxy_timeout }}s; proxy_read_timeout {{ proxy_timeout }}s; add_header X-Deployed-By swiftdeploy always; proxy_pass_header X-Mode; location @error502 { default_type application/json; return 502 '{"error":"Bad Gateway","code":502,"service":"app","contact":"{{ contact }}"}'; }
}
upstream app_backend { server app:{{ service_port }}; keepalive 32;
} log_format swiftdeploy '$time_iso8601 | $status | ${request_time}s | $upstream_addr | $request'; server { listen {{ nginx_port }}; proxy_connect_timeout {{ proxy_timeout }}s; proxy_send_timeout {{ proxy_timeout }}s; proxy_read_timeout {{ proxy_timeout }}s; add_header X-Deployed-By swiftdeploy always; proxy_pass_header X-Mode; location @error502 { default_type application/json; return 502 '{"error":"Bad Gateway","code":502,"service":"app","contact":"{{ contact }}"}'; }
}
app: expose: - "{{ service_port }}" # container-to-container only, NEVER published to host nginx: ports: - "{{ nginx_port }}:{{ nginx_port }}" # only nginx faces the world depends_on: app: condition: service_healthy # nginx waits for app healthcheck to pass
app: expose: - "{{ service_port }}" # container-to-container only, NEVER published to host nginx: ports: - "{{ nginx_port }}:{{ nginx_port }}" # only nginx faces the world depends_on: app: condition: service_healthy # nginx waits for app healthcheck to pass
app: expose: - "{{ service_port }}" # container-to-container only, NEVER published to host nginx: ports: - "{{ nginx_port }}:{{ nginx_port }}" # only nginx faces the world depends_on: app: condition: service_healthy # nginx waits for app healthcheck to pass
def render_template(tmpl_path, context): with open(tmpl_path) as f: content = f.read() for key, val in context.items(): content = content.replace("{{ " + key + " }}", str(val)) return content
def render_template(tmpl_path, context): with open(tmpl_path) as f: content = f.read() for key, val in context.items(): content = content.replace("{{ " + key + " }}", str(val)) return content
def render_template(tmpl_path, context): with open(tmpl_path) as f: content = f.read() for key, val in context.items(): content = content.replace("{{ " + key + " }}", str(val)) return content
def cmd_init(): manifest = load_manifest() ctx = build_context(manifest) nginx_conf = render_template(NGINX_TMPL, ctx) compose_conf = render_template(COMPOSE_TMPL, ctx) with open(NGINX_OUT, "w") as f: f.write(nginx_conf) with open(COMPOSE_OUT, "w") as f: f.write(compose_conf)
def cmd_init(): manifest = load_manifest() ctx = build_context(manifest) nginx_conf = render_template(NGINX_TMPL, ctx) compose_conf = render_template(COMPOSE_TMPL, ctx) with open(NGINX_OUT, "w") as f: f.write(nginx_conf) with open(COMPOSE_OUT, "w") as f: f.write(compose_conf)
def cmd_init(): manifest = load_manifest() ctx = build_context(manifest) nginx_conf = render_template(NGINX_TMPL, ctx) compose_conf = render_template(COMPOSE_TMPL, ctx) with open(NGINX_OUT, "w") as f: f.write(nginx_conf) with open(COMPOSE_OUT, "w") as f: f.write(compose_conf)
# Check 1: manifest.yaml exists and is valid YAML
# Check 2: all required fields present and non-empty
# Check 3: docker image inspect — exits 0 if exists
# Check 4: ss -tlnp | grep :8080 — non-empty means port in use
# Check 5: nginx -t via isolated Docker container
# Check 1: manifest.yaml exists and is valid YAML
# Check 2: all required fields present and non-empty
# Check 3: docker image inspect — exits 0 if exists
# Check 4: ss -tlnp | grep :8080 — non-empty means port in use
# Check 5: nginx -t via isolated Docker container
# Check 1: manifest.yaml exists and is valid YAML
# Check 2: all required fields present and non-empty
# Check 3: docker image inspect — exits 0 if exists
# Check 4: ss -tlnp | grep :8080 — non-empty means port in use
# Check 5: nginx -t via isolated Docker container
test_content = data.replace("server app:", "server 127.0.0.1:")
test_content = data.replace("server app:", "server 127.0.0.1:")
test_content = data.replace("server app:", "server 127.0.0.1:")
deadline = time.time() + 60
while time.time() < deadline: try: with urllib.request.urlopen(f"http://localhost:{nginx_port}/healthz", timeout=3) as resp: if json.loads(resp.read()).get("status") == "ok": healthy = True break except Exception: pass # container still starting — connection refused is normal time.sleep(2)
deadline = time.time() + 60
while time.time() < deadline: try: with urllib.request.urlopen(f"http://localhost:{nginx_port}/healthz", timeout=3) as resp: if json.loads(resp.read()).get("status") == "ok": healthy = True break except Exception: pass # container still starting — connection refused is normal time.sleep(2)
deadline = time.time() + 60
while time.time() < deadline: try: with urllib.request.urlopen(f"http://localhost:{nginx_port}/healthz", timeout=3) as resp: if json.loads(resp.read()).get("status") == "ok": healthy = True break except Exception: pass # container still starting — connection refused is normal time.sleep(2)
# 1. Update manifest in-place
content = re.sub(r"(mode:\s*)(\S+)", f"\\g<1>{target_mode}", content, count=1) # 2. Regenerate docker-compose.yml with new MODE env var # 3. Restart ONLY the app container — nginx stays up
run(compose_cmd("up -d --no-deps app")) # 4. Confirm mode via /healthz
# 1. Update manifest in-place
content = re.sub(r"(mode:\s*)(\S+)", f"\\g<1>{target_mode}", content, count=1) # 2. Regenerate docker-compose.yml with new MODE env var # 3. Restart ONLY the app container — nginx stays up
run(compose_cmd("up -d --no-deps app")) # 4. Confirm mode via /healthz
# 1. Update manifest in-place
content = re.sub(r"(mode:\s*)(\S+)", f"\\g<1>{target_mode}", content, count=1) # 2. Regenerate docker-compose.yml with new MODE env var # 3. Restart ONLY the app container — nginx stays up
run(compose_cmd("up -d --no-deps app")) # 4. Confirm mode via /healthz
if MODE == "canary": self.send_header("X-Mode", "canary") # on EVERY response
if MODE == "canary": self.send_header("X-Mode", "canary") # on EVERY response
if MODE == "canary": self.send_header("X-Mode", "canary") # on EVERY response
# Counter: {(method, path, status_code): count}
request_counts = {} # Histogram state per path
HISTOGRAM_BUCKETS = [0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0]
request_durations = {} def record_request(method, path, status_code, duration_seconds): with metrics_lock: key = (method, path, str(status_code)) request_counts[key] = request_counts.get(key, 0) + 1 if path not in request_durations: request_durations[path] = { "buckets": {str(le): 0 for le in HISTOGRAM_BUCKETS}, "sum": 0.0, "count": 0, } hist = request_durations[path] hist["sum"] += duration_seconds hist["count"] += 1 for le in HISTOGRAM_BUCKETS: if duration_seconds <= le: hist["buckets"][str(le)] += 1
# Counter: {(method, path, status_code): count}
request_counts = {} # Histogram state per path
HISTOGRAM_BUCKETS = [0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0]
request_durations = {} def record_request(method, path, status_code, duration_seconds): with metrics_lock: key = (method, path, str(status_code)) request_counts[key] = request_counts.get(key, 0) + 1 if path not in request_durations: request_durations[path] = { "buckets": {str(le): 0 for le in HISTOGRAM_BUCKETS}, "sum": 0.0, "count": 0, } hist = request_durations[path] hist["sum"] += duration_seconds hist["count"] += 1 for le in HISTOGRAM_BUCKETS: if duration_seconds <= le: hist["buckets"][str(le)] += 1
# Counter: {(method, path, status_code): count}
request_counts = {} # Histogram state per path
HISTOGRAM_BUCKETS = [0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0]
request_durations = {} def record_request(method, path, status_code, duration_seconds): with metrics_lock: key = (method, path, str(status_code)) request_counts[key] = request_counts.get(key, 0) + 1 if path not in request_durations: request_durations[path] = { "buckets": {str(le): 0 for le in HISTOGRAM_BUCKETS}, "sum": 0.0, "count": 0, } hist = request_durations[path] hist["sum"] += duration_seconds hist["count"] += 1 for le in HISTOGRAM_BUCKETS: if duration_seconds <= le: hist["buckets"][str(le)] += 1
def do_GET(self): start = time.time() path = self.path.split("?")[0] status = self._handle_get() record_request("GET", path, status, time.time() - start)
def do_GET(self): start = time.time() path = self.path.split("?")[0] status = self._handle_get() record_request("GET", path, status, time.time() - start)
def do_GET(self): start = time.time() path = self.path.split("?")[0] status = self._handle_get() record_request("GET", path, status, time.time() - start)
# 1. http_requests_total — counter, labels: method, path, status_code
# 2. http_request_duration_seconds — histogram with standard buckets
# 3. app_uptime_seconds — gauge
# 4. app_mode — gauge: 0=stable, 1=canary
# 5. chaos_active — gauge: 0=none, 1=slow, 2=error
# 1. http_requests_total — counter, labels: method, path, status_code
# 2. http_request_duration_seconds — histogram with standard buckets
# 3. app_uptime_seconds — gauge
# 4. app_mode — gauge: 0=stable, 1=canary
# 5. chaos_active — gauge: 0=none, 1=slow, 2=error
# 1. http_requests_total — counter, labels: method, path, status_code
# 2. http_request_duration_seconds — histogram with standard buckets
# 3. app_uptime_seconds — gauge
# 4. app_mode — gauge: 0=stable, 1=canary
# 5. chaos_active — gauge: 0=none, 1=slow, 2=error
# HELP http_requests_total Total number of HTTP requests
# TYPE http_requests_total counter
http_requests_total{method="GET",path="/",status_code="200"} 42
http_requests_total{method="GET",path="/healthz",status_code="200"} 60
http_requests_total{method="GET",path="/",status_code="500"} 38 # HELP http_request_duration_seconds HTTP request latency in seconds
# TYPE http_request_duration_seconds histogram
http_request_duration_seconds_bucket{path="/",le="0.005"} 40
http_request_duration_seconds_bucket{path="/",le="+Inf"} 80
http_request_duration_seconds_sum{path="/"} 0.042381
http_request_duration_seconds_count{path="/"} 80 # HELP app_mode Current deployment mode (0=stable, 1=canary)
# TYPE app_mode gauge
app_mode 1 # HELP chaos_active Current chaos state (0=none, 1=slow, 2=error)
# TYPE chaos_active gauge
chaos_active 2
# HELP http_requests_total Total number of HTTP requests
# TYPE http_requests_total counter
http_requests_total{method="GET",path="/",status_code="200"} 42
http_requests_total{method="GET",path="/healthz",status_code="200"} 60
http_requests_total{method="GET",path="/",status_code="500"} 38 # HELP http_request_duration_seconds HTTP request latency in seconds
# TYPE http_request_duration_seconds histogram
http_request_duration_seconds_bucket{path="/",le="0.005"} 40
http_request_duration_seconds_bucket{path="/",le="+Inf"} 80
http_request_duration_seconds_sum{path="/"} 0.042381
http_request_duration_seconds_count{path="/"} 80 # HELP app_mode Current deployment mode (0=stable, 1=canary)
# TYPE app_mode gauge
app_mode 1 # HELP chaos_active Current chaos state (0=none, 1=slow, 2=error)
# TYPE chaos_active gauge
chaos_active 2
# HELP http_requests_total Total number of HTTP requests
# TYPE http_requests_total counter
http_requests_total{method="GET",path="/",status_code="200"} 42
http_requests_total{method="GET",path="/healthz",status_code="200"} 60
http_requests_total{method="GET",path="/",status_code="500"} 38 # HELP http_request_duration_seconds HTTP request latency in seconds
# TYPE http_request_duration_seconds histogram
http_request_duration_seconds_bucket{path="/",le="0.005"} 40
http_request_duration_seconds_bucket{path="/",le="+Inf"} 80
http_request_duration_seconds_sum{path="/"} 0.042381
http_request_duration_seconds_count{path="/"} 80 # HELP app_mode Current deployment mode (0=stable, 1=canary)
# TYPE app_mode gauge
app_mode 1 # HELP chaos_active Current chaos state (0=none, 1=slow, 2=error)
# TYPE chaos_active gauge
chaos_active 2
{ "thresholds": { "min_disk_free_gb": 10.0, "max_cpu_load": 2.0, "min_mem_free_percent": 10.0, "max_error_rate_percent": 1.0, "max_p99_latency_ms": 500.0 }
}
{ "thresholds": { "min_disk_free_gb": 10.0, "max_cpu_load": 2.0, "min_mem_free_percent": 10.0, "max_error_rate_percent": 1.0, "max_p99_latency_ms": 500.0 }
}
{ "thresholds": { "min_disk_free_gb": 10.0, "max_cpu_load": 2.0, "min_mem_free_percent": 10.0, "max_error_rate_percent": 1.0, "max_p99_latency_ms": 500.0 }
}
package swiftdeploy.infrastructure import future.keywords.if
import future.keywords.contains default allow := false allow if { disk_ok cpu_ok mem_ok
} disk_ok if { input.disk_free_gb >= data.thresholds.min_disk_free_gb }
cpu_ok if { input.cpu_load_1m <= data.thresholds.max_cpu_load }
mem_ok if { input.mem_free_percent >= data.thresholds.min_mem_free_percent } reasons contains msg if { not disk_ok msg := sprintf( "disk_free_gb is %.1f, minimum required is %.1f", [input.disk_free_gb, data.thresholds.min_disk_free_gb] )
} # decision is what the CLI reads — never a bare boolean
decision := { "allow": allow, "reasons": reasons, "domain": "infrastructure", "checked": { "disk_free_gb": input.disk_free_gb, "cpu_load_1m": input.cpu_load_1m, "mem_free_percent": input.mem_free_percent, },
}
package swiftdeploy.infrastructure import future.keywords.if
import future.keywords.contains default allow := false allow if { disk_ok cpu_ok mem_ok
} disk_ok if { input.disk_free_gb >= data.thresholds.min_disk_free_gb }
cpu_ok if { input.cpu_load_1m <= data.thresholds.max_cpu_load }
mem_ok if { input.mem_free_percent >= data.thresholds.min_mem_free_percent } reasons contains msg if { not disk_ok msg := sprintf( "disk_free_gb is %.1f, minimum required is %.1f", [input.disk_free_gb, data.thresholds.min_disk_free_gb] )
} # decision is what the CLI reads — never a bare boolean
decision := { "allow": allow, "reasons": reasons, "domain": "infrastructure", "checked": { "disk_free_gb": input.disk_free_gb, "cpu_load_1m": input.cpu_load_1m, "mem_free_percent": input.mem_free_percent, },
}
package swiftdeploy.infrastructure import future.keywords.if
import future.keywords.contains default allow := false allow if { disk_ok cpu_ok mem_ok
} disk_ok if { input.disk_free_gb >= data.thresholds.min_disk_free_gb }
cpu_ok if { input.cpu_load_1m <= data.thresholds.max_cpu_load }
mem_ok if { input.mem_free_percent >= data.thresholds.min_mem_free_percent } reasons contains msg if { not disk_ok msg := sprintf( "disk_free_gb is %.1f, minimum required is %.1f", [input.disk_free_gb, data.thresholds.min_disk_free_gb] )
} # decision is what the CLI reads — never a bare boolean
decision := { "allow": allow, "reasons": reasons, "domain": "infrastructure", "checked": { "disk_free_gb": input.disk_free_gb, "cpu_load_1m": input.cpu_load_1m, "mem_free_percent": input.mem_free_percent, },
}
package swiftdeploy.canary import future.keywords.if
import future.keywords.contains default allow := false allow if { error_rate_ok latency_ok
} error_rate_ok if { input.error_rate_percent <= data.thresholds.max_error_rate_percent }
latency_ok if { input.p99_latency_ms <= data.thresholds.max_p99_latency_ms } reasons contains msg if { not error_rate_ok msg := sprintf( "error_rate is %.2f%%, maximum allowed is %.2f%%", [input.error_rate_percent, data.thresholds.max_error_rate_percent] )
} decision := { "allow": allow, "reasons": reasons, "domain": "canary", "checked": { "error_rate_percent": input.error_rate_percent, "p99_latency_ms": input.p99_latency_ms, "window_seconds": input.window_seconds, },
}
package swiftdeploy.canary import future.keywords.if
import future.keywords.contains default allow := false allow if { error_rate_ok latency_ok
} error_rate_ok if { input.error_rate_percent <= data.thresholds.max_error_rate_percent }
latency_ok if { input.p99_latency_ms <= data.thresholds.max_p99_latency_ms } reasons contains msg if { not error_rate_ok msg := sprintf( "error_rate is %.2f%%, maximum allowed is %.2f%%", [input.error_rate_percent, data.thresholds.max_error_rate_percent] )
} decision := { "allow": allow, "reasons": reasons, "domain": "canary", "checked": { "error_rate_percent": input.error_rate_percent, "p99_latency_ms": input.p99_latency_ms, "window_seconds": input.window_seconds, },
}
package swiftdeploy.canary import future.keywords.if
import future.keywords.contains default allow := false allow if { error_rate_ok latency_ok
} error_rate_ok if { input.error_rate_percent <= data.thresholds.max_error_rate_percent }
latency_ok if { input.p99_latency_ms <= data.thresholds.max_p99_latency_ms } reasons contains msg if { not error_rate_ok msg := sprintf( "error_rate is %.2f%%, maximum allowed is %.2f%%", [input.error_rate_percent, data.thresholds.max_error_rate_percent] )
} decision := { "allow": allow, "reasons": reasons, "domain": "canary", "checked": { "error_rate_percent": input.error_rate_percent, "p99_latency_ms": input.p99_latency_ms, "window_seconds": input.window_seconds, },
}
opa: ports: - "127.0.0.1:{{ opa_port }}:8181" # localhost only — never 0.0.0.0
opa: ports: - "127.0.0.1:{{ opa_port }}:8181" # localhost only — never 0.0.0.0
opa: ports: - "127.0.0.1:{{ opa_port }}:8181" # localhost only — never 0.0.0.0
def query_opa(manifest, package, input_data): url = f"{opa_url(manifest)}/v1/data/{package.replace('.', '/')}/decision" payload = json.dumps({"input": input_data}).encode() try: req = urllib.request.Request(url, data=payload, headers={"Content-Type": "application/json"}, method="POST") with urllib.request.urlopen(req, timeout=5) as resp: body = json.loads(resp.read()) result = body.get("result") if result is None: return None, "OPA returned empty result — check policy package name" return result, None except urllib.error.URLError as e: return None, f"OPA unreachable: {e.reason}" except Exception as e: return None, f"OPA query failed: {e}"
def query_opa(manifest, package, input_data): url = f"{opa_url(manifest)}/v1/data/{package.replace('.', '/')}/decision" payload = json.dumps({"input": input_data}).encode() try: req = urllib.request.Request(url, data=payload, headers={"Content-Type": "application/json"}, method="POST") with urllib.request.urlopen(req, timeout=5) as resp: body = json.loads(resp.read()) result = body.get("result") if result is None: return None, "OPA returned empty result — check policy package name" return result, None except urllib.error.URLError as e: return None, f"OPA unreachable: {e.reason}" except Exception as e: return None, f"OPA query failed: {e}"
def query_opa(manifest, package, input_data): url = f"{opa_url(manifest)}/v1/data/{package.replace('.', '/')}/decision" payload = json.dumps({"input": input_data}).encode() try: req = urllib.request.Request(url, data=payload, headers={"Content-Type": "application/json"}, method="POST") with urllib.request.urlopen(req, timeout=5) as resp: body = json.loads(resp.read()) result = body.get("result") if result is None: return None, "OPA returned empty result — check policy package name" return result, None except urllib.error.URLError as e: return None, f"OPA unreachable: {e.reason}" except Exception as e: return None, f"OPA query failed: {e}"
def cmd_deploy(): manifest = load_manifest() host_stats = get_host_stats() # Collect host stats disk_free_gb = shutil.disk_usage("/").free / (1024 ** 3) cpu_load_1m = float(open("/proc/loadavg").read().split()[0]) mem_free_percent = (meminfo["MemAvailable"] / meminfo["MemTotal"]) * 100 # Send to OPA allowed = enforce_policy(manifest, "swiftdeploy.infrastructure", host_stats, "infrastructure") if not allowed: append_history({"event": "deploy_blocked", "reason": "infrastructure_policy"}) sys.exit(1) # Only reach here if OPA allows run(compose_cmd("up -d --build"))
def cmd_deploy(): manifest = load_manifest() host_stats = get_host_stats() # Collect host stats disk_free_gb = shutil.disk_usage("/").free / (1024 ** 3) cpu_load_1m = float(open("/proc/loadavg").read().split()[0]) mem_free_percent = (meminfo["MemAvailable"] / meminfo["MemTotal"]) * 100 # Send to OPA allowed = enforce_policy(manifest, "swiftdeploy.infrastructure", host_stats, "infrastructure") if not allowed: append_history({"event": "deploy_blocked", "reason": "infrastructure_policy"}) sys.exit(1) # Only reach here if OPA allows run(compose_cmd("up -d --build"))
def cmd_deploy(): manifest = load_manifest() host_stats = get_host_stats() # Collect host stats disk_free_gb = shutil.disk_usage("/").free / (1024 ** 3) cpu_load_1m = float(open("/proc/loadavg").read().split()[0]) mem_free_percent = (meminfo["MemAvailable"] / meminfo["MemTotal"]) * 100 # Send to OPA allowed = enforce_policy(manifest, "swiftdeploy.infrastructure", host_stats, "infrastructure") if not allowed: append_history({"event": "deploy_blocked", "reason": "infrastructure_policy"}) sys.exit(1) # Only reach here if OPA allows run(compose_cmd("up -d --build"))
✘ Policy [infrastructure] DENIED ! disk_free_gb is 8.2, minimum required is 10.0 Deployment blocked by policy: infrastructure
✘ Policy [infrastructure] DENIED ! disk_free_gb is 8.2, minimum required is 10.0 Deployment blocked by policy: infrastructure
✘ Policy [infrastructure] DENIED ! disk_free_gb is 8.2, minimum required is 10.0 Deployment blocked by policy: infrastructure
def cmd_promote(target_mode): if target_mode == "canary": raw = scrape_metrics(nginx_port) metrics = parse_prometheus(raw) error_rate = calculate_error_rate(metrics) p99_ms = calculate_p99_latency_ms(metrics) allowed = enforce_policy(manifest, "swiftdeploy.canary", {"error_rate_percent": error_rate, "p99_latency_ms": p99_ms, "window_seconds": 30}, "canary safety" ) if not allowed: sys.exit(1)
def cmd_promote(target_mode): if target_mode == "canary": raw = scrape_metrics(nginx_port) metrics = parse_prometheus(raw) error_rate = calculate_error_rate(metrics) p99_ms = calculate_p99_latency_ms(metrics) allowed = enforce_policy(manifest, "swiftdeploy.canary", {"error_rate_percent": error_rate, "p99_latency_ms": p99_ms, "window_seconds": 30}, "canary safety" ) if not allowed: sys.exit(1)
def cmd_promote(target_mode): if target_mode == "canary": raw = scrape_metrics(nginx_port) metrics = parse_prometheus(raw) error_rate = calculate_error_rate(metrics) p99_ms = calculate_p99_latency_ms(metrics) allowed = enforce_policy(manifest, "swiftdeploy.canary", {"error_rate_percent": error_rate, "p99_latency_ms": p99_ms, "window_seconds": 30}, "canary safety" ) if not allowed: sys.exit(1)
def calculate_p99_latency_ms(metrics, path_filter=None): buckets = {} total_count = 0 for entry in metrics.get("http_request_duration_seconds_bucket", []): le = entry["labels"].get("le", "") if le == "+Inf": total_count = max(total_count, entry["value"]) continue buckets[float(le)] = buckets.get(float(le), 0) + entry["value"] if total_count == 0: return 0.0 p99_threshold = total_count * 0.99 for le in sorted(buckets.keys()): if buckets[le] >= p99_threshold: return round(le * 1000, 2) # seconds → milliseconds return 10000.0
def calculate_p99_latency_ms(metrics, path_filter=None): buckets = {} total_count = 0 for entry in metrics.get("http_request_duration_seconds_bucket", []): le = entry["labels"].get("le", "") if le == "+Inf": total_count = max(total_count, entry["value"]) continue buckets[float(le)] = buckets.get(float(le), 0) + entry["value"] if total_count == 0: return 0.0 p99_threshold = total_count * 0.99 for le in sorted(buckets.keys()): if buckets[le] >= p99_threshold: return round(le * 1000, 2) # seconds → milliseconds return 10000.0
def calculate_p99_latency_ms(metrics, path_filter=None): buckets = {} total_count = 0 for entry in metrics.get("http_request_duration_seconds_bucket", []): le = entry["labels"].get("le", "") if le == "+Inf": total_count = max(total_count, entry["value"]) continue buckets[float(le)] = buckets.get(float(le), 0) + entry["value"] if total_count == 0: return 0.0 p99_threshold = total_count * 0.99 for le in sorted(buckets.keys()): if buckets[le] >= p99_threshold: return round(le * 1000, 2) # seconds → milliseconds return 10000.0
def cmd_status(): while True: raw = scrape_metrics(nginx_port) metrics = parse_prometheus(raw) error_rate = calculate_error_rate(metrics) p99_ms = calculate_p99_latency_ms(metrics) # Query OPA for live compliance infra_dec, _ = query_opa(manifest, "swiftdeploy.infrastructure", get_host_stats()) canary_dec, _ = query_opa(manifest, "swiftdeploy.canary", {"error_rate_percent": error_rate, "p99_latency_ms": p99_ms, "window_seconds": 30}) os.system("clear") # ... render dashboard ... append_history({ "event": "status_scrape", "error_rate_percent": error_rate, "p99_latency_ms": p99_ms, "mode": mode_str, "chaos": chaos_str, "policy_infra_pass": infra_dec.get("allow") if infra_dec else None, "policy_canary_pass": canary_dec.get("allow") if canary_dec else None, }) time.sleep(5)
def cmd_status(): while True: raw = scrape_metrics(nginx_port) metrics = parse_prometheus(raw) error_rate = calculate_error_rate(metrics) p99_ms = calculate_p99_latency_ms(metrics) # Query OPA for live compliance infra_dec, _ = query_opa(manifest, "swiftdeploy.infrastructure", get_host_stats()) canary_dec, _ = query_opa(manifest, "swiftdeploy.canary", {"error_rate_percent": error_rate, "p99_latency_ms": p99_ms, "window_seconds": 30}) os.system("clear") # ... render dashboard ... append_history({ "event": "status_scrape", "error_rate_percent": error_rate, "p99_latency_ms": p99_ms, "mode": mode_str, "chaos": chaos_str, "policy_infra_pass": infra_dec.get("allow") if infra_dec else None, "policy_canary_pass": canary_dec.get("allow") if canary_dec else None, }) time.sleep(5)
def cmd_status(): while True: raw = scrape_metrics(nginx_port) metrics = parse_prometheus(raw) error_rate = calculate_error_rate(metrics) p99_ms = calculate_p99_latency_ms(metrics) # Query OPA for live compliance infra_dec, _ = query_opa(manifest, "swiftdeploy.infrastructure", get_host_stats()) canary_dec, _ = query_opa(manifest, "swiftdeploy.canary", {"error_rate_percent": error_rate, "p99_latency_ms": p99_ms, "window_seconds": 30}) os.system("clear") # ... render dashboard ... append_history({ "event": "status_scrape", "error_rate_percent": error_rate, "p99_latency_ms": p99_ms, "mode": mode_str, "chaos": chaos_str, "policy_infra_pass": infra_dec.get("allow") if infra_dec else None, "policy_canary_pass": canary_dec.get("allow") if canary_dec else None, }) time.sleep(5)
SwiftDeploy Status Dashboard 2026-05-05T21:00:37Z ──────────────────────────────────────────────── ── Throughput ────────────────────────────────── req/s : 2.4 error rate : 56.45% ← red P99 latency : 5.0ms ── App State ─────────────────────────────────── mode : canary chaos : error ← red uptime : 316s ── Policy Compliance ─────────────────────────── ✔ infrastructure PASS ✘ canary FAIL ! error_rate is 56.45%, maximum allowed is 1.00% Refreshing every 5s — Ctrl+C to exit
SwiftDeploy Status Dashboard 2026-05-05T21:00:37Z ──────────────────────────────────────────────── ── Throughput ────────────────────────────────── req/s : 2.4 error rate : 56.45% ← red P99 latency : 5.0ms ── App State ─────────────────────────────────── mode : canary chaos : error ← red uptime : 316s ── Policy Compliance ─────────────────────────── ✔ infrastructure PASS ✘ canary FAIL ! error_rate is 56.45%, maximum allowed is 1.00% Refreshing every 5s — Ctrl+C to exit
SwiftDeploy Status Dashboard 2026-05-05T21:00:37Z ──────────────────────────────────────────────── ── Throughput ────────────────────────────────── req/s : 2.4 error rate : 56.45% ← red P99 latency : 5.0ms ── App State ─────────────────────────────────── mode : canary chaos : error ← red uptime : 316s ── Policy Compliance ─────────────────────────── ✔ infrastructure PASS ✘ canary FAIL ! error_rate is 56.45%, maximum allowed is 1.00% Refreshing every 5s — Ctrl+C to exit
{"timestamp":"2026-05-05T20:34:51Z","event":"deploy","mode":"stable"}
{"timestamp":"2026-05-05T20:55:22Z","event":"promote","target_mode":"canary"}
{"timestamp":"2026-05-05T20:55:23Z","event":"status_scrape","error_rate_percent":62.5,"chaos":"error","policy_canary_pass":false}
{"timestamp":"2026-05-05T21:01:01Z","event":"promote","target_mode":"stable"}
{"timestamp":"2026-05-05T20:34:51Z","event":"deploy","mode":"stable"}
{"timestamp":"2026-05-05T20:55:22Z","event":"promote","target_mode":"canary"}
{"timestamp":"2026-05-05T20:55:23Z","event":"status_scrape","error_rate_percent":62.5,"chaos":"error","policy_canary_pass":false}
{"timestamp":"2026-05-05T21:01:01Z","event":"promote","target_mode":"stable"}
{"timestamp":"2026-05-05T20:34:51Z","event":"deploy","mode":"stable"}
{"timestamp":"2026-05-05T20:55:22Z","event":"promote","target_mode":"canary"}
{"timestamp":"2026-05-05T20:55:23Z","event":"status_scrape","error_rate_percent":62.5,"chaos":"error","policy_canary_pass":false}
{"timestamp":"2026-05-05T21:01:01Z","event":"promote","target_mode":"stable"}