# Handle INVITE — main call processing if (is_method("INVITE")) { setflag(FLT_DLG); dlg_manage(); route(NATDETECT); route(DID_ROUTING); # <-- Look up DID first route(RTPENGINE_OFFER); route(DISPATCH); exit; }
# Handle INVITE — main call processing if (is_method("INVITE")) { setflag(FLT_DLG); dlg_manage(); route(NATDETECT); route(DID_ROUTING); # <-- Look up DID first route(RTPENGINE_OFFER); route(DISPATCH); exit; }
# Handle INVITE — main call processing if (is_method("INVITE")) { setflag(FLT_DLG); dlg_manage(); route(NATDETECT); route(DID_ROUTING); # <-- Look up DID first route(RTPENGINE_OFFER); route(DISPATCH); exit; }
<configuration name="xml_curl.conf" description="cURL XML Gateway"> <bindings> <binding name="directory"> <param name="gateway-url" value="http://YOUR_DB1_IP:8080/freeswitch/directory"/> <param name="gateway-credentials" value="freeswitch:YOUR_API_PASSWORD"/> <param name="auth-scheme" value="basic"/> <param name="timeout" value="5"/> <param name="-weight: 500;">disable-100-continue" value="true"/> <param name="-weight: 500;">enable-post-mapping" value="false"/> </binding> </bindings>
</configuration>
<configuration name="xml_curl.conf" description="cURL XML Gateway"> <bindings> <binding name="directory"> <param name="gateway-url" value="http://YOUR_DB1_IP:8080/freeswitch/directory"/> <param name="gateway-credentials" value="freeswitch:YOUR_API_PASSWORD"/> <param name="auth-scheme" value="basic"/> <param name="timeout" value="5"/> <param name="-weight: 500;">disable-100-continue" value="true"/> <param name="-weight: 500;">enable-post-mapping" value="false"/> </binding> </bindings>
</configuration>
<configuration name="xml_curl.conf" description="cURL XML Gateway"> <bindings> <binding name="directory"> <param name="gateway-url" value="http://YOUR_DB1_IP:8080/freeswitch/directory"/> <param name="gateway-credentials" value="freeswitch:YOUR_API_PASSWORD"/> <param name="auth-scheme" value="basic"/> <param name="timeout" value="5"/> <param name="-weight: 500;">disable-100-continue" value="true"/> <param name="-weight: 500;">enable-post-mapping" value="false"/> </binding> </bindings>
</configuration>
#!/usr/bin/env python3
"""
freeswitch_directory_api.py
Serves FreeSWITCH user directory from MariaDB
Run with: uvicorn freeswitch_directory_api:app --host 0.0.0.0 --port 8080
"""
from fastapi import FastAPI, Form, Response
import mysql.connector app = FastAPI()
DB_CONFIG = { "host": "YOUR_DB1_IP", "user": "freeswitch", "password": "YOUR_FS_DB_PASSWORD", "database": "kamailio"
} @app.post("/freeswitch/directory")
async def directory( section: str = Form(default="directory"), key_name: str = Form(default=""), key_value: str = Form(default=""), user: str = Form(default=""), domain: str = Form(default=""),
): """Return FreeSWITCH directory XML for a user lookup.""" if section != "directory" or not user or not domain: return Response( content='<?xml version="1.0"?><document type="freeswitch/xml"><section name="directory"></section></document>', media_type="text/xml" ) # Look up user in subscriber table conn = mysql.connector.connect(**DB_CONFIG) cursor = conn.cursor(dictionary=True) cursor.execute( "SELECT username, password, domain FROM subscriber WHERE username=%s AND domain=%s", (user, domain) ) row = cursor.fetchone() cursor.close() conn.close() if not row: return Response( content='<?xml version="1.0"?><document type="freeswitch/xml"><section name="directory"></section></document>', media_type="text/xml" ) xml = f'''<?xml version="1.0" encoding="UTF-8"?>
<document type="freeswitch/xml"> <section name="directory"> <domain name="{domain}"> <user id="{row["username"]}"> <params> <param name="password" value="{row["password"]}"/> <param name="vm-password" value="{row["password"]}"/> </params> <variables> <variable name="accountcode" value="{row["username"]}"/> <variable name="user_context" value="from-kamailio"/> <variable name="effective_caller_id_name" value="{row["username"]}"/> <variable name="effective_caller_id_number" value="{row["username"]}"/> </variables> </user> </domain> </section>
</document>''' return Response(content=xml, media_type="text/xml")
#!/usr/bin/env python3
"""
freeswitch_directory_api.py
Serves FreeSWITCH user directory from MariaDB
Run with: uvicorn freeswitch_directory_api:app --host 0.0.0.0 --port 8080
"""
from fastapi import FastAPI, Form, Response
import mysql.connector app = FastAPI()
DB_CONFIG = { "host": "YOUR_DB1_IP", "user": "freeswitch", "password": "YOUR_FS_DB_PASSWORD", "database": "kamailio"
} @app.post("/freeswitch/directory")
async def directory( section: str = Form(default="directory"), key_name: str = Form(default=""), key_value: str = Form(default=""), user: str = Form(default=""), domain: str = Form(default=""),
): """Return FreeSWITCH directory XML for a user lookup.""" if section != "directory" or not user or not domain: return Response( content='<?xml version="1.0"?><document type="freeswitch/xml"><section name="directory"></section></document>', media_type="text/xml" ) # Look up user in subscriber table conn = mysql.connector.connect(**DB_CONFIG) cursor = conn.cursor(dictionary=True) cursor.execute( "SELECT username, password, domain FROM subscriber WHERE username=%s AND domain=%s", (user, domain) ) row = cursor.fetchone() cursor.close() conn.close() if not row: return Response( content='<?xml version="1.0"?><document type="freeswitch/xml"><section name="directory"></section></document>', media_type="text/xml" ) xml = f'''<?xml version="1.0" encoding="UTF-8"?>
<document type="freeswitch/xml"> <section name="directory"> <domain name="{domain}"> <user id="{row["username"]}"> <params> <param name="password" value="{row["password"]}"/> <param name="vm-password" value="{row["password"]}"/> </params> <variables> <variable name="accountcode" value="{row["username"]}"/> <variable name="user_context" value="from-kamailio"/> <variable name="effective_caller_id_name" value="{row["username"]}"/> <variable name="effective_caller_id_number" value="{row["username"]}"/> </variables> </user> </domain> </section>
</document>''' return Response(content=xml, media_type="text/xml")
#!/usr/bin/env python3
"""
freeswitch_directory_api.py
Serves FreeSWITCH user directory from MariaDB
Run with: uvicorn freeswitch_directory_api:app --host 0.0.0.0 --port 8080
"""
from fastapi import FastAPI, Form, Response
import mysql.connector app = FastAPI()
DB_CONFIG = { "host": "YOUR_DB1_IP", "user": "freeswitch", "password": "YOUR_FS_DB_PASSWORD", "database": "kamailio"
} @app.post("/freeswitch/directory")
async def directory( section: str = Form(default="directory"), key_name: str = Form(default=""), key_value: str = Form(default=""), user: str = Form(default=""), domain: str = Form(default=""),
): """Return FreeSWITCH directory XML for a user lookup.""" if section != "directory" or not user or not domain: return Response( content='<?xml version="1.0"?><document type="freeswitch/xml"><section name="directory"></section></document>', media_type="text/xml" ) # Look up user in subscriber table conn = mysql.connector.connect(**DB_CONFIG) cursor = conn.cursor(dictionary=True) cursor.execute( "SELECT username, password, domain FROM subscriber WHERE username=%s AND domain=%s", (user, domain) ) row = cursor.fetchone() cursor.close() conn.close() if not row: return Response( content='<?xml version="1.0"?><document type="freeswitch/xml"><section name="directory"></section></document>', media_type="text/xml" ) xml = f'''<?xml version="1.0" encoding="UTF-8"?>
<document type="freeswitch/xml"> <section name="directory"> <domain name="{domain}"> <user id="{row["username"]}"> <params> <param name="password" value="{row["password"]}"/> <param name="vm-password" value="{row["password"]}"/> </params> <variables> <variable name="accountcode" value="{row["username"]}"/> <variable name="user_context" value="from-kamailio"/> <variable name="effective_caller_id_name" value="{row["username"]}"/> <variable name="effective_caller_id_number" value="{row["username"]}"/> </variables> </user> </domain> </section>
</document>''' return Response(content=xml, media_type="text/xml")
-- Tenant A: company-a.example.com
INSERT INTO did_routing (did, domain, destination, dest_type) VALUES
('+442012345678', 'company-a.example.com', '2000', 'ivr'),
('+442012345679', 'company-a.example.com', '3001', 'queue'); -- Tenant B: company-b.example.com
INSERT INTO did_routing (did, domain, destination, dest_type) VALUES
('+442087654321', 'company-b.example.com', '2000', 'ivr'),
('+442087654322', 'company-b.example.com', '3001', 'queue');
-- Tenant A: company-a.example.com
INSERT INTO did_routing (did, domain, destination, dest_type) VALUES
('+442012345678', 'company-a.example.com', '2000', 'ivr'),
('+442012345679', 'company-a.example.com', '3001', 'queue'); -- Tenant B: company-b.example.com
INSERT INTO did_routing (did, domain, destination, dest_type) VALUES
('+442087654321', 'company-b.example.com', '2000', 'ivr'),
('+442087654322', 'company-b.example.com', '3001', 'queue');
-- Tenant A: company-a.example.com
INSERT INTO did_routing (did, domain, destination, dest_type) VALUES
('+442012345678', 'company-a.example.com', '2000', 'ivr'),
('+442012345679', 'company-a.example.com', '3001', 'queue'); -- Tenant B: company-b.example.com
INSERT INTO did_routing (did, domain, destination, dest_type) VALUES
('+442087654321', 'company-b.example.com', '2000', 'ivr'),
('+442087654322', 'company-b.example.com', '3001', 'queue');
Browser (WebRTC) SIP Trunk │ │ │ WSS (SIP over WebSocket) │ UDP/TCP SIP │ DTLS-SRTP (encrypted media) │ RTP (unencrypted) ▼ ▼
┌──────────┐ SIP ┌──────────┐ SIP ┌──────────┐
│ Kamailio │◄────────►│ RTPEngine│◄────────►│FreeSWITCH│
│ (WSS) │ │(DTLS↔RTP)│ │ (media) │
└──────────┘ └──────────┘ └──────────┘ Kamailio: Terminates WebSocket, handles SIP-over-WS
RTPEngine: Bridges DTLS-SRTP (WebRTC) ↔ plain RTP (FreeSWITCH/trunks)
FreeSWITCH: Processes calls normally (does not know about WebRTC)
Browser (WebRTC) SIP Trunk │ │ │ WSS (SIP over WebSocket) │ UDP/TCP SIP │ DTLS-SRTP (encrypted media) │ RTP (unencrypted) ▼ ▼
┌──────────┐ SIP ┌──────────┐ SIP ┌──────────┐
│ Kamailio │◄────────►│ RTPEngine│◄────────►│FreeSWITCH│
│ (WSS) │ │(DTLS↔RTP)│ │ (media) │
└──────────┘ └──────────┘ └──────────┘ Kamailio: Terminates WebSocket, handles SIP-over-WS
RTPEngine: Bridges DTLS-SRTP (WebRTC) ↔ plain RTP (FreeSWITCH/trunks)
FreeSWITCH: Processes calls normally (does not know about WebRTC)
Browser (WebRTC) SIP Trunk │ │ │ WSS (SIP over WebSocket) │ UDP/TCP SIP │ DTLS-SRTP (encrypted media) │ RTP (unencrypted) ▼ ▼
┌──────────┐ SIP ┌──────────┐ SIP ┌──────────┐
│ Kamailio │◄────────►│ RTPEngine│◄────────►│FreeSWITCH│
│ (WSS) │ │(DTLS↔RTP)│ │ (media) │
└──────────┘ └──────────┘ └──────────┘ Kamailio: Terminates WebSocket, handles SIP-over-WS
RTPEngine: Bridges DTLS-SRTP (WebRTC) ↔ plain RTP (FreeSWITCH/trunks)
FreeSWITCH: Processes calls normally (does not know about WebRTC)
# Install certbot with DNS plugin (for wildcard certs)
-weight: 500;">apt-get -weight: 500;">install -y certbot python3-certbot-dns-cloudflare # Create credentials file (example for Cloudflare DNS)
mkdir -p /root/.secrets
cat > /root/.secrets/cloudflare.ini << 'EOF'
dns_cloudflare_api_token = YOUR_CLOUDFLARE_API_TOKEN
EOF
chmod 600 /root/.secrets/cloudflare.ini # Get wildcard certificate
certbot certonly \ --dns-cloudflare \ --dns-cloudflare-credentials /root/.secrets/cloudflare.ini \ -d "*.YOUR_DOMAIN" \ -d "YOUR_DOMAIN" \ --agree-tos \ -m admin@YOUR_DOMAIN # Link for Kamailio
ln -sf /etc/letsencrypt/live/YOUR_DOMAIN/fullchain.pem /etc/kamailio/tls/server.pem
ln -sf /etc/letsencrypt/live/YOUR_DOMAIN/privkey.pem /etc/kamailio/tls/server.key # Link for RTPEngine (DTLS)
mkdir -p /etc/rtpengine/tls
ln -sf /etc/letsencrypt/live/YOUR_DOMAIN/fullchain.pem /etc/rtpengine/tls/cert.pem
ln -sf /etc/letsencrypt/live/YOUR_DOMAIN/privkey.pem /etc/rtpengine/tls/key.pem # Auto-renewal cron (reload services after renewal)
cat > /etc/letsencrypt/renewal-hooks/deploy/reload-voip.sh << 'SCRIPT'
#!/bin/bash
-weight: 500;">systemctl reload kamailio 2>/dev/null || true
-weight: 500;">systemctl -weight: 500;">restart rtpengine 2>/dev/null || true
SCRIPT
chmod +x /etc/letsencrypt/renewal-hooks/deploy/reload-voip.sh
# Install certbot with DNS plugin (for wildcard certs)
-weight: 500;">apt-get -weight: 500;">install -y certbot python3-certbot-dns-cloudflare # Create credentials file (example for Cloudflare DNS)
mkdir -p /root/.secrets
cat > /root/.secrets/cloudflare.ini << 'EOF'
dns_cloudflare_api_token = YOUR_CLOUDFLARE_API_TOKEN
EOF
chmod 600 /root/.secrets/cloudflare.ini # Get wildcard certificate
certbot certonly \ --dns-cloudflare \ --dns-cloudflare-credentials /root/.secrets/cloudflare.ini \ -d "*.YOUR_DOMAIN" \ -d "YOUR_DOMAIN" \ --agree-tos \ -m admin@YOUR_DOMAIN # Link for Kamailio
ln -sf /etc/letsencrypt/live/YOUR_DOMAIN/fullchain.pem /etc/kamailio/tls/server.pem
ln -sf /etc/letsencrypt/live/YOUR_DOMAIN/privkey.pem /etc/kamailio/tls/server.key # Link for RTPEngine (DTLS)
mkdir -p /etc/rtpengine/tls
ln -sf /etc/letsencrypt/live/YOUR_DOMAIN/fullchain.pem /etc/rtpengine/tls/cert.pem
ln -sf /etc/letsencrypt/live/YOUR_DOMAIN/privkey.pem /etc/rtpengine/tls/key.pem # Auto-renewal cron (reload services after renewal)
cat > /etc/letsencrypt/renewal-hooks/deploy/reload-voip.sh << 'SCRIPT'
#!/bin/bash
-weight: 500;">systemctl reload kamailio 2>/dev/null || true
-weight: 500;">systemctl -weight: 500;">restart rtpengine 2>/dev/null || true
SCRIPT
chmod +x /etc/letsencrypt/renewal-hooks/deploy/reload-voip.sh
# Install certbot with DNS plugin (for wildcard certs)
-weight: 500;">apt-get -weight: 500;">install -y certbot python3-certbot-dns-cloudflare # Create credentials file (example for Cloudflare DNS)
mkdir -p /root/.secrets
cat > /root/.secrets/cloudflare.ini << 'EOF'
dns_cloudflare_api_token = YOUR_CLOUDFLARE_API_TOKEN
EOF
chmod 600 /root/.secrets/cloudflare.ini # Get wildcard certificate
certbot certonly \ --dns-cloudflare \ --dns-cloudflare-credentials /root/.secrets/cloudflare.ini \ -d "*.YOUR_DOMAIN" \ -d "YOUR_DOMAIN" \ --agree-tos \ -m admin@YOUR_DOMAIN # Link for Kamailio
ln -sf /etc/letsencrypt/live/YOUR_DOMAIN/fullchain.pem /etc/kamailio/tls/server.pem
ln -sf /etc/letsencrypt/live/YOUR_DOMAIN/privkey.pem /etc/kamailio/tls/server.key # Link for RTPEngine (DTLS)
mkdir -p /etc/rtpengine/tls
ln -sf /etc/letsencrypt/live/YOUR_DOMAIN/fullchain.pem /etc/rtpengine/tls/cert.pem
ln -sf /etc/letsencrypt/live/YOUR_DOMAIN/privkey.pem /etc/rtpengine/tls/key.pem # Auto-renewal cron (reload services after renewal)
cat > /etc/letsencrypt/renewal-hooks/deploy/reload-voip.sh << 'SCRIPT'
#!/bin/bash
-weight: 500;">systemctl reload kamailio 2>/dev/null || true
-weight: 500;">systemctl -weight: 500;">restart rtpengine 2>/dev/null || true
SCRIPT
chmod +x /etc/letsencrypt/renewal-hooks/deploy/reload-voip.sh
# Listeners (already defined)
listen=tls:MY_PUBLIC_IP:8443 # WSS direct # WebSocket module (already loaded)
loadmodule "websocket.so"
loadmodule "xhttp.so" # xhttp event route handles the WebSocket -weight: 500;">upgrade (already defined)
event_route[xhttp:request] { ... }
# Listeners (already defined)
listen=tls:MY_PUBLIC_IP:8443 # WSS direct # WebSocket module (already loaded)
loadmodule "websocket.so"
loadmodule "xhttp.so" # xhttp event route handles the WebSocket -weight: 500;">upgrade (already defined)
event_route[xhttp:request] { ... }
# Listeners (already defined)
listen=tls:MY_PUBLIC_IP:8443 # WSS direct # WebSocket module (already loaded)
loadmodule "websocket.so"
loadmodule "xhttp.so" # xhttp event route handles the WebSocket -weight: 500;">upgrade (already defined)
event_route[xhttp:request] { ... }
# ---- WebRTC-specific handling ---- if (proto == WS || proto == WSS) { # Force record-route with WebSocket transport if (is_method("INVITE|SUBSCRIBE")) { record_route_preset("MY_PUBLIC_IP:8443;transport=wss"); } # WebRTC clients use SIP Outbound (RFC 5626) if (is_method("REGISTER")) { # Add Path header so replies find the WebSocket connection add_path_received(); } }
# ---- WebRTC-specific handling ---- if (proto == WS || proto == WSS) { # Force record-route with WebSocket transport if (is_method("INVITE|SUBSCRIBE")) { record_route_preset("MY_PUBLIC_IP:8443;transport=wss"); } # WebRTC clients use SIP Outbound (RFC 5626) if (is_method("REGISTER")) { # Add Path header so replies find the WebSocket connection add_path_received(); } }
# ---- WebRTC-specific handling ---- if (proto == WS || proto == WSS) { # Force record-route with WebSocket transport if (is_method("INVITE|SUBSCRIBE")) { record_route_preset("MY_PUBLIC_IP:8443;transport=wss"); } # WebRTC clients use SIP Outbound (RFC 5626) if (is_method("REGISTER")) { # Add Path header so replies find the WebSocket connection add_path_received(); } }
# Add to [rtpengine] section
# DTLS certificate for WebRTC
dtls-cert = /etc/rtpengine/tls/cert.pem
dtls-key = /etc/rtpengine/tls/key.pem # Enable DTLS and ICE
ice-lite = true
# Add to [rtpengine] section
# DTLS certificate for WebRTC
dtls-cert = /etc/rtpengine/tls/cert.pem
dtls-key = /etc/rtpengine/tls/key.pem # Enable DTLS and ICE
ice-lite = true
# Add to [rtpengine] section
# DTLS certificate for WebRTC
dtls-cert = /etc/rtpengine/tls/cert.pem
dtls-key = /etc/rtpengine/tls/key.pem # Enable DTLS and ICE
ice-lite = true
# /etc/nginx/sites-available/webrtc-gateway
upstream kamailio_wss { server YOUR_KAM1_PRIVATE:8080; # WS (unencrypted) — Nginx handles TLS server YOUR_KAM2_PRIVATE:8080 backup;
} server { listen 443 ssl http2; server_name webrtc.YOUR_DOMAIN; ssl_certificate /etc/letsencrypt/live/YOUR_DOMAIN/fullchain.pem; ssl_certificate_key /etc/letsencrypt/live/YOUR_DOMAIN/privkey.pem; ssl_protocols TLSv1.2 TLSv1.3; ssl_ciphers HIGH:!aNULL:!MD5; # WebSocket proxy to Kamailio location /ws { proxy_pass http://kamailio_wss; proxy_http_version 1.1; proxy_set_header Upgrade $http_upgrade; proxy_set_header Connection "-weight: 500;">upgrade"; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; proxy_read_timeout 3600s; proxy_send_timeout 3600s; } # Serve the WebRTC web client location / { root /var/www/webrtc; index index.html; }
}
# /etc/nginx/sites-available/webrtc-gateway
upstream kamailio_wss { server YOUR_KAM1_PRIVATE:8080; # WS (unencrypted) — Nginx handles TLS server YOUR_KAM2_PRIVATE:8080 backup;
} server { listen 443 ssl http2; server_name webrtc.YOUR_DOMAIN; ssl_certificate /etc/letsencrypt/live/YOUR_DOMAIN/fullchain.pem; ssl_certificate_key /etc/letsencrypt/live/YOUR_DOMAIN/privkey.pem; ssl_protocols TLSv1.2 TLSv1.3; ssl_ciphers HIGH:!aNULL:!MD5; # WebSocket proxy to Kamailio location /ws { proxy_pass http://kamailio_wss; proxy_http_version 1.1; proxy_set_header Upgrade $http_upgrade; proxy_set_header Connection "-weight: 500;">upgrade"; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; proxy_read_timeout 3600s; proxy_send_timeout 3600s; } # Serve the WebRTC web client location / { root /var/www/webrtc; index index.html; }
}
# /etc/nginx/sites-available/webrtc-gateway
upstream kamailio_wss { server YOUR_KAM1_PRIVATE:8080; # WS (unencrypted) — Nginx handles TLS server YOUR_KAM2_PRIVATE:8080 backup;
} server { listen 443 ssl http2; server_name webrtc.YOUR_DOMAIN; ssl_certificate /etc/letsencrypt/live/YOUR_DOMAIN/fullchain.pem; ssl_certificate_key /etc/letsencrypt/live/YOUR_DOMAIN/privkey.pem; ssl_protocols TLSv1.2 TLSv1.3; ssl_ciphers HIGH:!aNULL:!MD5; # WebSocket proxy to Kamailio location /ws { proxy_pass http://kamailio_wss; proxy_http_version 1.1; proxy_set_header Upgrade $http_upgrade; proxy_set_header Connection "-weight: 500;">upgrade"; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; proxy_read_timeout 3600s; proxy_send_timeout 3600s; } # Serve the WebRTC web client location / { root /var/www/webrtc; index index.html; }
}
<!DOCTYPE html>
<html lang="en">
<head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>WebRTC Phone</title> <script src="https://cdn.jsdelivr.net/-weight: 500;">npm/[email protected]/lib/platform/web/sip.js"></script> <style> body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif; max-width: 500px; margin: 50px auto; padding: 20px; background: #1a1a2e; color: #e0e0e0; } h1 { color: #00d4ff; text-align: center; } .-weight: 500;">status { text-align: center; padding: 10px; margin: 20px 0; border-radius: 8px; background: #16213e; } .-weight: 500;">status.connected { border-left: 4px solid #00ff88; } .-weight: 500;">status.disconnected { border-left: 4px solid #ff4444; } .-weight: 500;">status.calling { border-left: 4px solid #ffaa00; } input, button { width: 100%; padding: 12px; margin: 5px 0; border: none; border-radius: 6px; font-size: 16px; box-sizing: border-box; } input { background: #16213e; color: #e0e0e0; border: 1px solid #333; } button { cursor: pointer; font-weight: bold; } .btn-call { background: #00ff88; color: #000; } .btn-hangup { background: #ff4444; color: #fff; } .btn-answer { background: #00d4ff; color: #000; } .btn-register { background: #9b59b6; color: #fff; } button:hover { opacity: 0.9; } button:disabled { opacity: 0.4; cursor: not-allowed; } .controls { margin: 20px 0; } audio { display: none; } </style>
</head>
<body> <h1>WebRTC Phone</h1> <div id="-weight: 500;">status" class="-weight: 500;">status disconnected">Disconnected</div> <div class="controls"> <input type="text" id="server" placeholder="WSS Server" value="wss://webrtc.YOUR_DOMAIN/ws"> <input type="text" id="username" placeholder="SIP Username (e.g., 1001)"> <input type="password" id="password" placeholder="SIP Password"> <input type="text" id="domain" placeholder="SIP Domain" value="YOUR_DOMAIN"> <button class="btn-register" onclick="doRegister()">Register</button> </div> <div class="controls"> <input type="text" id="target" placeholder="Number to call"> <button class="btn-call" id="btnCall" onclick="doCall()" disabled>Call</button> <button class="btn-answer" id="btnAnswer" onclick="doAnswer()" disabled>Answer</button> <button class="btn-hangup" id="btnHangup" onclick="doHangup()" disabled>Hang Up</button> </div> <audio id="remoteAudio" autoplay></audio> <script> let userAgent = null; let registerer = null; let currentSession = null; function setStatus(text, className) { const el = document.getElementById('-weight: 500;">status'); el.textContent = text; el.className = '-weight: 500;">status ' + className; } async function doRegister() { const server = document.getElementById('server').value; const username = document.getElementById('username').value; const password = document.getElementById('password').value; const domain = document.getElementById('domain').value; const uri = SIP.UserAgent.makeURI(`sip:${username}@${domain}`); const transportOptions = { server: server, traceSip: true }; userAgent = new SIP.UserAgent({ uri: uri, transportOptions: transportOptions, authorizationUsername: username, authorizationPassword: password, displayName: username, delegate: { onInvite: (invitation) => { currentSession = invitation; setStatus('Incoming call from ' + invitation.remoteIdentity.displayName, 'calling'); document.getElementById('btnAnswer').disabled = false; document.getElementById('btnHangup').disabled = false; } } }); await userAgent.-weight: 500;">start(); registerer = new SIP.Registerer(userAgent); registerer.stateChange.addListener((state) => { switch (state) { case SIP.RegistererState.Registered: setStatus('Registered as ' + username, 'connected'); document.getElementById('btnCall').disabled = false; break; case SIP.RegistererState.Unregistered: setStatus('Unregistered', 'disconnected'); document.getElementById('btnCall').disabled = true; break; } }); await registerer.register(); } async function doCall() { const target = document.getElementById('target').value; const domain = document.getElementById('domain').value; if (!target || !userAgent) return; const targetURI = SIP.UserAgent.makeURI(`sip:${target}@${domain}`); if (!targetURI) { alert('Invalid target'); return; } const inviter = new SIP.Inviter(userAgent, targetURI, { sessionDescriptionHandlerOptions: { constraints: { audio: true, video: false } } }); currentSession = inviter; setupSessionListeners(inviter); setStatus('Calling ' + target + '...', 'calling'); document.getElementById('btnHangup').disabled = false; document.getElementById('btnCall').disabled = true; await inviter.invite(); } async function doAnswer() { if (!currentSession) return; await currentSession.accept({ sessionDescriptionHandlerOptions: { constraints: { audio: true, video: false } } }); setupSessionListeners(currentSession); setStatus('In call', 'connected'); document.getElementById('btnAnswer').disabled = true; } function doHangup() { if (!currentSession) return; switch (currentSession.state) { case SIP.SessionState.Initial: case SIP.SessionState.Establishing: if (currentSession instanceof SIP.Inviter) { currentSession.cancel(); } else { currentSession.reject(); } break; case SIP.SessionState.Established: currentSession.bye(); break; } resetCallUI(); } function setupSessionListeners(session) { session.stateChange.addListener((state) => { switch (state) { case SIP.SessionState.Established: setStatus('In call', 'connected'); // Attach remote audio const remoteStream = new MediaStream(); session.sessionDescriptionHandler.peerConnection .getReceivers() .forEach((receiver) => { if (receiver.track) { remoteStream.addTrack(receiver.track); } }); document.getElementById('remoteAudio').srcObject = remoteStream; break; case SIP.SessionState.Terminated: setStatus('Call ended', 'disconnected'); resetCallUI(); break; } }); } function resetCallUI() { currentSession = null; document.getElementById('btnCall').disabled = false; document.getElementById('btnAnswer').disabled = true; document.getElementById('btnHangup').disabled = true; document.getElementById('remoteAudio').srcObject = null; setTimeout(() => setStatus('Registered', 'connected'), 2000); } </script>
</body>
</html>
<!DOCTYPE html>
<html lang="en">
<head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>WebRTC Phone</title> <script src="https://cdn.jsdelivr.net/-weight: 500;">npm/[email protected]/lib/platform/web/sip.js"></script> <style> body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif; max-width: 500px; margin: 50px auto; padding: 20px; background: #1a1a2e; color: #e0e0e0; } h1 { color: #00d4ff; text-align: center; } .-weight: 500;">status { text-align: center; padding: 10px; margin: 20px 0; border-radius: 8px; background: #16213e; } .-weight: 500;">status.connected { border-left: 4px solid #00ff88; } .-weight: 500;">status.disconnected { border-left: 4px solid #ff4444; } .-weight: 500;">status.calling { border-left: 4px solid #ffaa00; } input, button { width: 100%; padding: 12px; margin: 5px 0; border: none; border-radius: 6px; font-size: 16px; box-sizing: border-box; } input { background: #16213e; color: #e0e0e0; border: 1px solid #333; } button { cursor: pointer; font-weight: bold; } .btn-call { background: #00ff88; color: #000; } .btn-hangup { background: #ff4444; color: #fff; } .btn-answer { background: #00d4ff; color: #000; } .btn-register { background: #9b59b6; color: #fff; } button:hover { opacity: 0.9; } button:disabled { opacity: 0.4; cursor: not-allowed; } .controls { margin: 20px 0; } audio { display: none; } </style>
</head>
<body> <h1>WebRTC Phone</h1> <div id="-weight: 500;">status" class="-weight: 500;">status disconnected">Disconnected</div> <div class="controls"> <input type="text" id="server" placeholder="WSS Server" value="wss://webrtc.YOUR_DOMAIN/ws"> <input type="text" id="username" placeholder="SIP Username (e.g., 1001)"> <input type="password" id="password" placeholder="SIP Password"> <input type="text" id="domain" placeholder="SIP Domain" value="YOUR_DOMAIN"> <button class="btn-register" onclick="doRegister()">Register</button> </div> <div class="controls"> <input type="text" id="target" placeholder="Number to call"> <button class="btn-call" id="btnCall" onclick="doCall()" disabled>Call</button> <button class="btn-answer" id="btnAnswer" onclick="doAnswer()" disabled>Answer</button> <button class="btn-hangup" id="btnHangup" onclick="doHangup()" disabled>Hang Up</button> </div> <audio id="remoteAudio" autoplay></audio> <script> let userAgent = null; let registerer = null; let currentSession = null; function setStatus(text, className) { const el = document.getElementById('-weight: 500;">status'); el.textContent = text; el.className = '-weight: 500;">status ' + className; } async function doRegister() { const server = document.getElementById('server').value; const username = document.getElementById('username').value; const password = document.getElementById('password').value; const domain = document.getElementById('domain').value; const uri = SIP.UserAgent.makeURI(`sip:${username}@${domain}`); const transportOptions = { server: server, traceSip: true }; userAgent = new SIP.UserAgent({ uri: uri, transportOptions: transportOptions, authorizationUsername: username, authorizationPassword: password, displayName: username, delegate: { onInvite: (invitation) => { currentSession = invitation; setStatus('Incoming call from ' + invitation.remoteIdentity.displayName, 'calling'); document.getElementById('btnAnswer').disabled = false; document.getElementById('btnHangup').disabled = false; } } }); await userAgent.-weight: 500;">start(); registerer = new SIP.Registerer(userAgent); registerer.stateChange.addListener((state) => { switch (state) { case SIP.RegistererState.Registered: setStatus('Registered as ' + username, 'connected'); document.getElementById('btnCall').disabled = false; break; case SIP.RegistererState.Unregistered: setStatus('Unregistered', 'disconnected'); document.getElementById('btnCall').disabled = true; break; } }); await registerer.register(); } async function doCall() { const target = document.getElementById('target').value; const domain = document.getElementById('domain').value; if (!target || !userAgent) return; const targetURI = SIP.UserAgent.makeURI(`sip:${target}@${domain}`); if (!targetURI) { alert('Invalid target'); return; } const inviter = new SIP.Inviter(userAgent, targetURI, { sessionDescriptionHandlerOptions: { constraints: { audio: true, video: false } } }); currentSession = inviter; setupSessionListeners(inviter); setStatus('Calling ' + target + '...', 'calling'); document.getElementById('btnHangup').disabled = false; document.getElementById('btnCall').disabled = true; await inviter.invite(); } async function doAnswer() { if (!currentSession) return; await currentSession.accept({ sessionDescriptionHandlerOptions: { constraints: { audio: true, video: false } } }); setupSessionListeners(currentSession); setStatus('In call', 'connected'); document.getElementById('btnAnswer').disabled = true; } function doHangup() { if (!currentSession) return; switch (currentSession.state) { case SIP.SessionState.Initial: case SIP.SessionState.Establishing: if (currentSession instanceof SIP.Inviter) { currentSession.cancel(); } else { currentSession.reject(); } break; case SIP.SessionState.Established: currentSession.bye(); break; } resetCallUI(); } function setupSessionListeners(session) { session.stateChange.addListener((state) => { switch (state) { case SIP.SessionState.Established: setStatus('In call', 'connected'); // Attach remote audio const remoteStream = new MediaStream(); session.sessionDescriptionHandler.peerConnection .getReceivers() .forEach((receiver) => { if (receiver.track) { remoteStream.addTrack(receiver.track); } }); document.getElementById('remoteAudio').srcObject = remoteStream; break; case SIP.SessionState.Terminated: setStatus('Call ended', 'disconnected'); resetCallUI(); break; } }); } function resetCallUI() { currentSession = null; document.getElementById('btnCall').disabled = false; document.getElementById('btnAnswer').disabled = true; document.getElementById('btnHangup').disabled = true; document.getElementById('remoteAudio').srcObject = null; setTimeout(() => setStatus('Registered', 'connected'), 2000); } </script>
</body>
</html>
<!DOCTYPE html>
<html lang="en">
<head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>WebRTC Phone</title> <script src="https://cdn.jsdelivr.net/-weight: 500;">npm/[email protected]/lib/platform/web/sip.js"></script> <style> body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif; max-width: 500px; margin: 50px auto; padding: 20px; background: #1a1a2e; color: #e0e0e0; } h1 { color: #00d4ff; text-align: center; } .-weight: 500;">status { text-align: center; padding: 10px; margin: 20px 0; border-radius: 8px; background: #16213e; } .-weight: 500;">status.connected { border-left: 4px solid #00ff88; } .-weight: 500;">status.disconnected { border-left: 4px solid #ff4444; } .-weight: 500;">status.calling { border-left: 4px solid #ffaa00; } input, button { width: 100%; padding: 12px; margin: 5px 0; border: none; border-radius: 6px; font-size: 16px; box-sizing: border-box; } input { background: #16213e; color: #e0e0e0; border: 1px solid #333; } button { cursor: pointer; font-weight: bold; } .btn-call { background: #00ff88; color: #000; } .btn-hangup { background: #ff4444; color: #fff; } .btn-answer { background: #00d4ff; color: #000; } .btn-register { background: #9b59b6; color: #fff; } button:hover { opacity: 0.9; } button:disabled { opacity: 0.4; cursor: not-allowed; } .controls { margin: 20px 0; } audio { display: none; } </style>
</head>
<body> <h1>WebRTC Phone</h1> <div id="-weight: 500;">status" class="-weight: 500;">status disconnected">Disconnected</div> <div class="controls"> <input type="text" id="server" placeholder="WSS Server" value="wss://webrtc.YOUR_DOMAIN/ws"> <input type="text" id="username" placeholder="SIP Username (e.g., 1001)"> <input type="password" id="password" placeholder="SIP Password"> <input type="text" id="domain" placeholder="SIP Domain" value="YOUR_DOMAIN"> <button class="btn-register" onclick="doRegister()">Register</button> </div> <div class="controls"> <input type="text" id="target" placeholder="Number to call"> <button class="btn-call" id="btnCall" onclick="doCall()" disabled>Call</button> <button class="btn-answer" id="btnAnswer" onclick="doAnswer()" disabled>Answer</button> <button class="btn-hangup" id="btnHangup" onclick="doHangup()" disabled>Hang Up</button> </div> <audio id="remoteAudio" autoplay></audio> <script> let userAgent = null; let registerer = null; let currentSession = null; function setStatus(text, className) { const el = document.getElementById('-weight: 500;">status'); el.textContent = text; el.className = '-weight: 500;">status ' + className; } async function doRegister() { const server = document.getElementById('server').value; const username = document.getElementById('username').value; const password = document.getElementById('password').value; const domain = document.getElementById('domain').value; const uri = SIP.UserAgent.makeURI(`sip:${username}@${domain}`); const transportOptions = { server: server, traceSip: true }; userAgent = new SIP.UserAgent({ uri: uri, transportOptions: transportOptions, authorizationUsername: username, authorizationPassword: password, displayName: username, delegate: { onInvite: (invitation) => { currentSession = invitation; setStatus('Incoming call from ' + invitation.remoteIdentity.displayName, 'calling'); document.getElementById('btnAnswer').disabled = false; document.getElementById('btnHangup').disabled = false; } } }); await userAgent.-weight: 500;">start(); registerer = new SIP.Registerer(userAgent); registerer.stateChange.addListener((state) => { switch (state) { case SIP.RegistererState.Registered: setStatus('Registered as ' + username, 'connected'); document.getElementById('btnCall').disabled = false; break; case SIP.RegistererState.Unregistered: setStatus('Unregistered', 'disconnected'); document.getElementById('btnCall').disabled = true; break; } }); await registerer.register(); } async function doCall() { const target = document.getElementById('target').value; const domain = document.getElementById('domain').value; if (!target || !userAgent) return; const targetURI = SIP.UserAgent.makeURI(`sip:${target}@${domain}`); if (!targetURI) { alert('Invalid target'); return; } const inviter = new SIP.Inviter(userAgent, targetURI, { sessionDescriptionHandlerOptions: { constraints: { audio: true, video: false } } }); currentSession = inviter; setupSessionListeners(inviter); setStatus('Calling ' + target + '...', 'calling'); document.getElementById('btnHangup').disabled = false; document.getElementById('btnCall').disabled = true; await inviter.invite(); } async function doAnswer() { if (!currentSession) return; await currentSession.accept({ sessionDescriptionHandlerOptions: { constraints: { audio: true, video: false } } }); setupSessionListeners(currentSession); setStatus('In call', 'connected'); document.getElementById('btnAnswer').disabled = true; } function doHangup() { if (!currentSession) return; switch (currentSession.state) { case SIP.SessionState.Initial: case SIP.SessionState.Establishing: if (currentSession instanceof SIP.Inviter) { currentSession.cancel(); } else { currentSession.reject(); } break; case SIP.SessionState.Established: currentSession.bye(); break; } resetCallUI(); } function setupSessionListeners(session) { session.stateChange.addListener((state) => { switch (state) { case SIP.SessionState.Established: setStatus('In call', 'connected'); // Attach remote audio const remoteStream = new MediaStream(); session.sessionDescriptionHandler.peerConnection .getReceivers() .forEach((receiver) => { if (receiver.track) { remoteStream.addTrack(receiver.track); } }); document.getElementById('remoteAudio').srcObject = remoteStream; break; case SIP.SessionState.Terminated: setStatus('Call ended', 'disconnected'); resetCallUI(); break; } }); } function resetCallUI() { currentSession = null; document.getElementById('btnCall').disabled = false; document.getElementById('btnAnswer').disabled = true; document.getElementById('btnHangup').disabled = true; document.getElementById('remoteAudio').srcObject = null; setTimeout(() => setStatus('Registered', 'connected'), 2000); } </script>
</body>
</html>
# On Kamailio — watch WebSocket connections
kamcmd ws.dump # On RTPEngine — check DTLS sessions
rtpengine-ctl list sessions # On RTPEngine — verify DTLS is working
# Look for "DTLS" in the session details
rtpengine-ctl list totals # Browser — check WebRTC internals
# Chrome: chrome://webrtc-internals/
# Firefox: about:webrtc
# On Kamailio — watch WebSocket connections
kamcmd ws.dump # On RTPEngine — check DTLS sessions
rtpengine-ctl list sessions # On RTPEngine — verify DTLS is working
# Look for "DTLS" in the session details
rtpengine-ctl list totals # Browser — check WebRTC internals
# Chrome: chrome://webrtc-internals/
# Firefox: about:webrtc
# On Kamailio — watch WebSocket connections
kamcmd ws.dump # On RTPEngine — check DTLS sessions
rtpengine-ctl list sessions # On RTPEngine — verify DTLS is working
# Look for "DTLS" in the session details
rtpengine-ctl list totals # Browser — check WebRTC internals
# Chrome: chrome://webrtc-internals/
# Firefox: about:webrtc
Normal operation: VIP (YOUR_PUBLIC_VIP) → Kamailio-A (active) Kamailio-B (standby, idle) After Kamailio-A failure: VIP (YOUR_PUBLIC_VIP) → Kamailio-B (now active) Kamailio-A (down) Failover time: 3-6 seconds (VRRP advertisement interval + detection)
Normal operation: VIP (YOUR_PUBLIC_VIP) → Kamailio-A (active) Kamailio-B (standby, idle) After Kamailio-A failure: VIP (YOUR_PUBLIC_VIP) → Kamailio-B (now active) Kamailio-A (down) Failover time: 3-6 seconds (VRRP advertisement interval + detection)
Normal operation: VIP (YOUR_PUBLIC_VIP) → Kamailio-A (active) Kamailio-B (standby, idle) After Kamailio-A failure: VIP (YOUR_PUBLIC_VIP) → Kamailio-B (now active) Kamailio-A (down) Failover time: 3-6 seconds (VRRP advertisement interval + detection)
# On both kam01 and kam02
-weight: 500;">apt-get -weight: 500;">install -y keepalived
# On both kam01 and kam02
-weight: 500;">apt-get -weight: 500;">install -y keepalived
# On both kam01 and kam02
-weight: 500;">apt-get -weight: 500;">install -y keepalived
#!/bin/bash
#
# Kamailio health check for Keepalived
# Returns 0 (healthy) or 1 (unhealthy)
# Tests actual SIP responsiveness, not just process existence
# # Check 1: Is the process running?
if ! pgrep -x kamailio > /dev/null 2>&1; then echo "FAIL: Kamailio process not running" exit 1
fi # Check 2: Can it respond to SIP OPTIONS?
# Send OPTIONS to localhost and expect a response within 2 seconds
RESPONSE=$(sipsak -s sip:[email protected]:5060 -v --timeout 2 2>&1)
if [ $? -ne 0 ]; then echo "FAIL: Kamailio not responding to SIP OPTIONS" exit 1
fi # Check 3: Check that the control socket is responsive
if ! kamcmd core.uptime > /dev/null 2>&1; then echo "FAIL: Kamailio RPC not responding" exit 1
fi # Check 4: Verify at least one dispatcher destination is active
ACTIVE=$(kamcmd dispatcher.list 2>/dev/null | grep -c "FLAGS: AP")
if [ "$ACTIVE" -eq 0 ]; then echo "WARN: No active dispatcher destinations (not failing over for this)" # Don't fail for this — it might be a temporary condition # and failing over won't help if all FS servers are down
fi echo "OK: Kamailio healthy (${ACTIVE} active dispatchers)"
exit 0
#!/bin/bash
#
# Kamailio health check for Keepalived
# Returns 0 (healthy) or 1 (unhealthy)
# Tests actual SIP responsiveness, not just process existence
# # Check 1: Is the process running?
if ! pgrep -x kamailio > /dev/null 2>&1; then echo "FAIL: Kamailio process not running" exit 1
fi # Check 2: Can it respond to SIP OPTIONS?
# Send OPTIONS to localhost and expect a response within 2 seconds
RESPONSE=$(sipsak -s sip:[email protected]:5060 -v --timeout 2 2>&1)
if [ $? -ne 0 ]; then echo "FAIL: Kamailio not responding to SIP OPTIONS" exit 1
fi # Check 3: Check that the control socket is responsive
if ! kamcmd core.uptime > /dev/null 2>&1; then echo "FAIL: Kamailio RPC not responding" exit 1
fi # Check 4: Verify at least one dispatcher destination is active
ACTIVE=$(kamcmd dispatcher.list 2>/dev/null | grep -c "FLAGS: AP")
if [ "$ACTIVE" -eq 0 ]; then echo "WARN: No active dispatcher destinations (not failing over for this)" # Don't fail for this — it might be a temporary condition # and failing over won't help if all FS servers are down
fi echo "OK: Kamailio healthy (${ACTIVE} active dispatchers)"
exit 0
#!/bin/bash
#
# Kamailio health check for Keepalived
# Returns 0 (healthy) or 1 (unhealthy)
# Tests actual SIP responsiveness, not just process existence
# # Check 1: Is the process running?
if ! pgrep -x kamailio > /dev/null 2>&1; then echo "FAIL: Kamailio process not running" exit 1
fi # Check 2: Can it respond to SIP OPTIONS?
# Send OPTIONS to localhost and expect a response within 2 seconds
RESPONSE=$(sipsak -s sip:[email protected]:5060 -v --timeout 2 2>&1)
if [ $? -ne 0 ]; then echo "FAIL: Kamailio not responding to SIP OPTIONS" exit 1
fi # Check 3: Check that the control socket is responsive
if ! kamcmd core.uptime > /dev/null 2>&1; then echo "FAIL: Kamailio RPC not responding" exit 1
fi # Check 4: Verify at least one dispatcher destination is active
ACTIVE=$(kamcmd dispatcher.list 2>/dev/null | grep -c "FLAGS: AP")
if [ "$ACTIVE" -eq 0 ]; then echo "WARN: No active dispatcher destinations (not failing over for this)" # Don't fail for this — it might be a temporary condition # and failing over won't help if all FS servers are down
fi echo "OK: Kamailio healthy (${ACTIVE} active dispatchers)"
exit 0
chmod +x /etc/keepalived/check_kamailio.sh
-weight: 500;">apt-get -weight: 500;">install -y sipsak # Needed for the health check
chmod +x /etc/keepalived/check_kamailio.sh
-weight: 500;">apt-get -weight: 500;">install -y sipsak # Needed for the health check
chmod +x /etc/keepalived/check_kamailio.sh
-weight: 500;">apt-get -weight: 500;">install -y sipsak # Needed for the health check
# /etc/keepalived/keepalived.conf — Kamailio-A (MASTER) global_defs { router_id KAM01 script_user root enable_script_security # Notification emails (optional) # notification_email { # admin@YOUR_DOMAIN # } # notification_email_from keepalived@kam01 # smtp_server localhost
} # Health check script
vrrp_script check_kamailio { script "/etc/keepalived/check_kamailio.sh" interval 3 # Check every 3 seconds weight -20 # Subtract 20 from priority on failure fall 2 # 2 consecutive failures = unhealthy rise 2 # 2 consecutive successes = healthy
} # VRRP instance for SIP VIP
vrrp_instance VI_SIP { state MASTER interface eth0 # Change to your network interface virtual_router_id 51 # Must be same on both nodes priority 100 # Higher = preferred (kam01 is preferred) advert_int 1 # VRRP advertisement every 1 second authentication { auth_type PASS auth_pass YOUR_VRRP_PASSWORD # Same on both nodes } virtual_ipaddress { YOUR_PUBLIC_VIP/32 dev eth0 # The floating VIP } track_script { check_kamailio } # Notify scripts (optional — for logging/alerting) notify_master "/bin/bash -c 'logger -t keepalived MASTER — VIP acquired on kam01'" notify_backup "/bin/bash -c 'logger -t keepalived BACKUP — VIP released on kam01'" notify_fault "/bin/bash -c 'logger -t keepalived FAULT — health check failing on kam01'"
}
# /etc/keepalived/keepalived.conf — Kamailio-A (MASTER) global_defs { router_id KAM01 script_user root enable_script_security # Notification emails (optional) # notification_email { # admin@YOUR_DOMAIN # } # notification_email_from keepalived@kam01 # smtp_server localhost
} # Health check script
vrrp_script check_kamailio { script "/etc/keepalived/check_kamailio.sh" interval 3 # Check every 3 seconds weight -20 # Subtract 20 from priority on failure fall 2 # 2 consecutive failures = unhealthy rise 2 # 2 consecutive successes = healthy
} # VRRP instance for SIP VIP
vrrp_instance VI_SIP { state MASTER interface eth0 # Change to your network interface virtual_router_id 51 # Must be same on both nodes priority 100 # Higher = preferred (kam01 is preferred) advert_int 1 # VRRP advertisement every 1 second authentication { auth_type PASS auth_pass YOUR_VRRP_PASSWORD # Same on both nodes } virtual_ipaddress { YOUR_PUBLIC_VIP/32 dev eth0 # The floating VIP } track_script { check_kamailio } # Notify scripts (optional — for logging/alerting) notify_master "/bin/bash -c 'logger -t keepalived MASTER — VIP acquired on kam01'" notify_backup "/bin/bash -c 'logger -t keepalived BACKUP — VIP released on kam01'" notify_fault "/bin/bash -c 'logger -t keepalived FAULT — health check failing on kam01'"
}
# /etc/keepalived/keepalived.conf — Kamailio-A (MASTER) global_defs { router_id KAM01 script_user root enable_script_security # Notification emails (optional) # notification_email { # admin@YOUR_DOMAIN # } # notification_email_from keepalived@kam01 # smtp_server localhost
} # Health check script
vrrp_script check_kamailio { script "/etc/keepalived/check_kamailio.sh" interval 3 # Check every 3 seconds weight -20 # Subtract 20 from priority on failure fall 2 # 2 consecutive failures = unhealthy rise 2 # 2 consecutive successes = healthy
} # VRRP instance for SIP VIP
vrrp_instance VI_SIP { state MASTER interface eth0 # Change to your network interface virtual_router_id 51 # Must be same on both nodes priority 100 # Higher = preferred (kam01 is preferred) advert_int 1 # VRRP advertisement every 1 second authentication { auth_type PASS auth_pass YOUR_VRRP_PASSWORD # Same on both nodes } virtual_ipaddress { YOUR_PUBLIC_VIP/32 dev eth0 # The floating VIP } track_script { check_kamailio } # Notify scripts (optional — for logging/alerting) notify_master "/bin/bash -c 'logger -t keepalived MASTER — VIP acquired on kam01'" notify_backup "/bin/bash -c 'logger -t keepalived BACKUP — VIP released on kam01'" notify_fault "/bin/bash -c 'logger -t keepalived FAULT — health check failing on kam01'"
}
# /etc/keepalived/keepalived.conf — Kamailio-B (BACKUP) global_defs { router_id KAM02 script_user root enable_script_security
} vrrp_script check_kamailio { script "/etc/keepalived/check_kamailio.sh" interval 3 weight -20 fall 2 rise 2
} vrrp_instance VI_SIP { state BACKUP # <-- BACKUP (not MASTER) interface eth0 virtual_router_id 51 # Must match kam01 priority 90 # <-- Lower priority (kam01 preferred) advert_int 1 authentication { auth_type PASS auth_pass YOUR_VRRP_PASSWORD # Must match kam01 } virtual_ipaddress { YOUR_PUBLIC_VIP/32 dev eth0 } track_script { check_kamailio } notify_master "/bin/bash -c 'logger -t keepalived MASTER — VIP acquired on kam02'" notify_backup "/bin/bash -c 'logger -t keepalived BACKUP — VIP released on kam02'" notify_fault "/bin/bash -c 'logger -t keepalived FAULT — health check failing on kam02'"
}
# /etc/keepalived/keepalived.conf — Kamailio-B (BACKUP) global_defs { router_id KAM02 script_user root enable_script_security
} vrrp_script check_kamailio { script "/etc/keepalived/check_kamailio.sh" interval 3 weight -20 fall 2 rise 2
} vrrp_instance VI_SIP { state BACKUP # <-- BACKUP (not MASTER) interface eth0 virtual_router_id 51 # Must match kam01 priority 90 # <-- Lower priority (kam01 preferred) advert_int 1 authentication { auth_type PASS auth_pass YOUR_VRRP_PASSWORD # Must match kam01 } virtual_ipaddress { YOUR_PUBLIC_VIP/32 dev eth0 } track_script { check_kamailio } notify_master "/bin/bash -c 'logger -t keepalived MASTER — VIP acquired on kam02'" notify_backup "/bin/bash -c 'logger -t keepalived BACKUP — VIP released on kam02'" notify_fault "/bin/bash -c 'logger -t keepalived FAULT — health check failing on kam02'"
}
# /etc/keepalived/keepalived.conf — Kamailio-B (BACKUP) global_defs { router_id KAM02 script_user root enable_script_security
} vrrp_script check_kamailio { script "/etc/keepalived/check_kamailio.sh" interval 3 weight -20 fall 2 rise 2
} vrrp_instance VI_SIP { state BACKUP # <-- BACKUP (not MASTER) interface eth0 virtual_router_id 51 # Must match kam01 priority 90 # <-- Lower priority (kam01 preferred) advert_int 1 authentication { auth_type PASS auth_pass YOUR_VRRP_PASSWORD # Must match kam01 } virtual_ipaddress { YOUR_PUBLIC_VIP/32 dev eth0 } track_script { check_kamailio } notify_master "/bin/bash -c 'logger -t keepalived MASTER — VIP acquired on kam02'" notify_backup "/bin/bash -c 'logger -t keepalived BACKUP — VIP released on kam02'" notify_fault "/bin/bash -c 'logger -t keepalived FAULT — health check failing on kam02'"
}
# On both nodes
-weight: 500;">systemctl -weight: 500;">enable --now keepalived # Verify VIP is on kam01 (the master)
ip addr show eth0 | grep YOUR_PUBLIC_VIP # Check keepalived -weight: 500;">status
-weight: 500;">systemctl -weight: 500;">status keepalived
journalctl -u keepalived -f # Test failover: -weight: 500;">stop Kamailio on kam01
-weight: 500;">systemctl -weight: 500;">stop kamailio
# Within 3-6 seconds, VIP should move to kam02:
# On kam02: ip addr show eth0 | grep YOUR_PUBLIC_VIP # Restore kam01
-weight: 500;">systemctl -weight: 500;">start kamailio
# VIP moves back to kam01 (higher priority, preemption)
# On both nodes
-weight: 500;">systemctl -weight: 500;">enable --now keepalived # Verify VIP is on kam01 (the master)
ip addr show eth0 | grep YOUR_PUBLIC_VIP # Check keepalived -weight: 500;">status
-weight: 500;">systemctl -weight: 500;">status keepalived
journalctl -u keepalived -f # Test failover: -weight: 500;">stop Kamailio on kam01
-weight: 500;">systemctl -weight: 500;">stop kamailio
# Within 3-6 seconds, VIP should move to kam02:
# On kam02: ip addr show eth0 | grep YOUR_PUBLIC_VIP # Restore kam01
-weight: 500;">systemctl -weight: 500;">start kamailio
# VIP moves back to kam01 (higher priority, preemption)
# On both nodes
-weight: 500;">systemctl -weight: 500;">enable --now keepalived # Verify VIP is on kam01 (the master)
ip addr show eth0 | grep YOUR_PUBLIC_VIP # Check keepalived -weight: 500;">status
-weight: 500;">systemctl -weight: 500;">status keepalived
journalctl -u keepalived -f # Test failover: -weight: 500;">stop Kamailio on kam01
-weight: 500;">systemctl -weight: 500;">stop kamailio
# Within 3-6 seconds, VIP should move to kam02:
# On kam02: ip addr show eth0 | grep YOUR_PUBLIC_VIP # Restore kam01
-weight: 500;">systemctl -weight: 500;">start kamailio
# VIP moves back to kam01 (higher priority, preemption)
modparam("usrloc", "db_url", DBURL)
modparam("usrloc", "db_mode", 2) # Write-through: every registration written to DB immediately
modparam("usrloc", "db_url", DBURL)
modparam("usrloc", "db_mode", 2) # Write-through: every registration written to DB immediately
modparam("usrloc", "db_url", DBURL)
modparam("usrloc", "db_mode", 2) # Write-through: every registration written to DB immediately
# Load DMQ module
loadmodule "dmq.so" # DMQ parameters
modparam("dmq", "server_address", "sip:MY_PRIVATE_IP:5062")
modparam("dmq", "notification_address", "sip:10.0.1.10:5062") # Use kam01 as notification peer
modparam("dmq", "multi_notify", 1)
modparam("dmq", "num_workers", 4)
modparam("dmq", "ping_interval", 15) # Add DMQ listener
listen=udp:MY_PRIVATE_IP:5062 # Enable dialog replication via DMQ
modparam("dialog", "enable_dmq", 1)
# Load DMQ module
loadmodule "dmq.so" # DMQ parameters
modparam("dmq", "server_address", "sip:MY_PRIVATE_IP:5062")
modparam("dmq", "notification_address", "sip:10.0.1.10:5062") # Use kam01 as notification peer
modparam("dmq", "multi_notify", 1)
modparam("dmq", "num_workers", 4)
modparam("dmq", "ping_interval", 15) # Add DMQ listener
listen=udp:MY_PRIVATE_IP:5062 # Enable dialog replication via DMQ
modparam("dialog", "enable_dmq", 1)
# Load DMQ module
loadmodule "dmq.so" # DMQ parameters
modparam("dmq", "server_address", "sip:MY_PRIVATE_IP:5062")
modparam("dmq", "notification_address", "sip:10.0.1.10:5062") # Use kam01 as notification peer
modparam("dmq", "multi_notify", 1)
modparam("dmq", "num_workers", 4)
modparam("dmq", "ping_interval", 15) # Add DMQ listener
listen=udp:MY_PRIVATE_IP:5062 # Enable dialog replication via DMQ
modparam("dialog", "enable_dmq", 1)
# DMQ traffic — handle before anything else if ($rm == "KDMQ" && $rP == "udp" && $sp == 5062) { dmq_handle_message(); exit; }
# DMQ traffic — handle before anything else if ($rm == "KDMQ" && $rP == "udp" && $sp == 5062) { dmq_handle_message(); exit; }
# DMQ traffic — handle before anything else if ($rm == "KDMQ" && $rP == "udp" && $sp == 5062) { dmq_handle_message(); exit; }
#!/bin/bash
# drain-freeswitch.sh — Gracefully drain a FreeSWITCH instance
# Usage: ./drain-freeswitch.sh fs01 YOUR_FS1_IP FS_NAME=$1
FS_IP=$2
KAM_HOST="YOUR_KAM1_PRIVATE" echo "=== Draining FreeSWITCH: $FS_NAME ($FS_IP) ===" # Step 1: Mark as inactive in Kamailio dispatcher (no new calls)
echo "Step 1: Removing from dispatcher..."
ssh $KAM_HOST "kamcmd dispatcher.set_state i 1 sip:${FS_IP}:5060"
echo " Done. No new calls will be sent to $FS_NAME." # Step 2: Wait for existing calls to finish
echo "Step 2: Waiting for active calls to finish..."
while true; do CALLS=$(ssh $FS_IP "fs_cli -x 'show calls count' 2>/dev/null" | grep -oP '\d+(?= total)') CALLS=${CALLS:-0} echo " Active calls: $CALLS" if [ "$CALLS" -eq 0 ]; then break fi sleep 10
done echo " All calls finished." # Step 3: Now safe to perform maintenance
echo "Step 3: $FS_NAME is fully drained. Safe to -weight: 500;">stop/-weight: 500;">upgrade."
echo ""
echo " When done, re--weight: 500;">enable with:"
echo " ssh $KAM_HOST 'kamcmd dispatcher.set_state a 1 sip:${FS_IP}:5060'"
#!/bin/bash
# drain-freeswitch.sh — Gracefully drain a FreeSWITCH instance
# Usage: ./drain-freeswitch.sh fs01 YOUR_FS1_IP FS_NAME=$1
FS_IP=$2
KAM_HOST="YOUR_KAM1_PRIVATE" echo "=== Draining FreeSWITCH: $FS_NAME ($FS_IP) ===" # Step 1: Mark as inactive in Kamailio dispatcher (no new calls)
echo "Step 1: Removing from dispatcher..."
ssh $KAM_HOST "kamcmd dispatcher.set_state i 1 sip:${FS_IP}:5060"
echo " Done. No new calls will be sent to $FS_NAME." # Step 2: Wait for existing calls to finish
echo "Step 2: Waiting for active calls to finish..."
while true; do CALLS=$(ssh $FS_IP "fs_cli -x 'show calls count' 2>/dev/null" | grep -oP '\d+(?= total)') CALLS=${CALLS:-0} echo " Active calls: $CALLS" if [ "$CALLS" -eq 0 ]; then break fi sleep 10
done echo " All calls finished." # Step 3: Now safe to perform maintenance
echo "Step 3: $FS_NAME is fully drained. Safe to -weight: 500;">stop/-weight: 500;">upgrade."
echo ""
echo " When done, re--weight: 500;">enable with:"
echo " ssh $KAM_HOST 'kamcmd dispatcher.set_state a 1 sip:${FS_IP}:5060'"
#!/bin/bash
# drain-freeswitch.sh — Gracefully drain a FreeSWITCH instance
# Usage: ./drain-freeswitch.sh fs01 YOUR_FS1_IP FS_NAME=$1
FS_IP=$2
KAM_HOST="YOUR_KAM1_PRIVATE" echo "=== Draining FreeSWITCH: $FS_NAME ($FS_IP) ===" # Step 1: Mark as inactive in Kamailio dispatcher (no new calls)
echo "Step 1: Removing from dispatcher..."
ssh $KAM_HOST "kamcmd dispatcher.set_state i 1 sip:${FS_IP}:5060"
echo " Done. No new calls will be sent to $FS_NAME." # Step 2: Wait for existing calls to finish
echo "Step 2: Waiting for active calls to finish..."
while true; do CALLS=$(ssh $FS_IP "fs_cli -x 'show calls count' 2>/dev/null" | grep -oP '\d+(?= total)') CALLS=${CALLS:-0} echo " Active calls: $CALLS" if [ "$CALLS" -eq 0 ]; then break fi sleep 10
done echo " All calls finished." # Step 3: Now safe to perform maintenance
echo "Step 3: $FS_NAME is fully drained. Safe to -weight: 500;">stop/-weight: 500;">upgrade."
echo ""
echo " When done, re--weight: 500;">enable with:"
echo " ssh $KAM_HOST 'kamcmd dispatcher.set_state a 1 sip:${FS_IP}:5060'"
#!/bin/bash
# -weight: 500;">upgrade-freeswitch.sh — Zero-downtime FreeSWITCH -weight: 500;">upgrade
# Upgrades one instance at a time (rolling -weight: 500;">upgrade) INSTANCES=("fs01:YOUR_FS1_IP" "fs02:YOUR_FS2_IP" "fs03:YOUR_FS3_IP")
KAM_HOST="YOUR_KAM1_PRIVATE" for instance in "${INSTANCES[@]}"; do IFS=':' read -r name ip <<< "$instance" echo "============================================" echo "Upgrading $name ($ip)" echo "============================================" # 1. Drain echo " Draining..." ssh $KAM_HOST "kamcmd dispatcher.set_state i 1 sip:${ip}:5060" # Wait for calls to finish (max 30 minutes) TIMEOUT=1800 ELAPSED=0 while [ $ELAPSED -lt $TIMEOUT ]; do CALLS=$(ssh $ip "fs_cli -x 'show calls count' 2>/dev/null" | grep -oP '\d+(?= total)') CALLS=${CALLS:-0} if [ "$CALLS" -eq 0 ]; then break; fi echo " $CALLS calls remaining (${ELAPSED}s elapsed)..." sleep 15 ELAPSED=$((ELAPSED + 15)) done # 2. Stop FreeSWITCH echo " Stopping FreeSWITCH..." ssh $ip "-weight: 500;">systemctl -weight: 500;">stop freeswitch" # 3. Upgrade echo " Upgrading..." ssh $ip "-weight: 500;">apt-get -weight: 500;">update && -weight: 500;">apt-get -weight: 500;">upgrade -y freeswitch*" # 4. Start FreeSWITCH echo " Starting FreeSWITCH..." ssh $ip "-weight: 500;">systemctl -weight: 500;">start freeswitch" sleep 5 # Wait for SIP profile to register # 5. Verify it responds echo " Verifying..." ssh $ip "fs_cli -x 'sofia -weight: 500;">status'" || { echo "FAILED to -weight: 500;">start $name!"; exit 1; } # 6. Re--weight: 500;">enable in dispatcher echo " Re-enabling in dispatcher..." ssh $KAM_HOST "kamcmd dispatcher.set_state a 1 sip:${ip}:5060" echo " $name upgraded successfully." echo "" # Wait before upgrading next instance (let it stabilize) sleep 30
done echo "All instances upgraded. Verifying dispatcher state..."
ssh $KAM_HOST "kamcmd dispatcher.list"
#!/bin/bash
# -weight: 500;">upgrade-freeswitch.sh — Zero-downtime FreeSWITCH -weight: 500;">upgrade
# Upgrades one instance at a time (rolling -weight: 500;">upgrade) INSTANCES=("fs01:YOUR_FS1_IP" "fs02:YOUR_FS2_IP" "fs03:YOUR_FS3_IP")
KAM_HOST="YOUR_KAM1_PRIVATE" for instance in "${INSTANCES[@]}"; do IFS=':' read -r name ip <<< "$instance" echo "============================================" echo "Upgrading $name ($ip)" echo "============================================" # 1. Drain echo " Draining..." ssh $KAM_HOST "kamcmd dispatcher.set_state i 1 sip:${ip}:5060" # Wait for calls to finish (max 30 minutes) TIMEOUT=1800 ELAPSED=0 while [ $ELAPSED -lt $TIMEOUT ]; do CALLS=$(ssh $ip "fs_cli -x 'show calls count' 2>/dev/null" | grep -oP '\d+(?= total)') CALLS=${CALLS:-0} if [ "$CALLS" -eq 0 ]; then break; fi echo " $CALLS calls remaining (${ELAPSED}s elapsed)..." sleep 15 ELAPSED=$((ELAPSED + 15)) done # 2. Stop FreeSWITCH echo " Stopping FreeSWITCH..." ssh $ip "-weight: 500;">systemctl -weight: 500;">stop freeswitch" # 3. Upgrade echo " Upgrading..." ssh $ip "-weight: 500;">apt-get -weight: 500;">update && -weight: 500;">apt-get -weight: 500;">upgrade -y freeswitch*" # 4. Start FreeSWITCH echo " Starting FreeSWITCH..." ssh $ip "-weight: 500;">systemctl -weight: 500;">start freeswitch" sleep 5 # Wait for SIP profile to register # 5. Verify it responds echo " Verifying..." ssh $ip "fs_cli -x 'sofia -weight: 500;">status'" || { echo "FAILED to -weight: 500;">start $name!"; exit 1; } # 6. Re--weight: 500;">enable in dispatcher echo " Re-enabling in dispatcher..." ssh $KAM_HOST "kamcmd dispatcher.set_state a 1 sip:${ip}:5060" echo " $name upgraded successfully." echo "" # Wait before upgrading next instance (let it stabilize) sleep 30
done echo "All instances upgraded. Verifying dispatcher state..."
ssh $KAM_HOST "kamcmd dispatcher.list"
#!/bin/bash
# -weight: 500;">upgrade-freeswitch.sh — Zero-downtime FreeSWITCH -weight: 500;">upgrade
# Upgrades one instance at a time (rolling -weight: 500;">upgrade) INSTANCES=("fs01:YOUR_FS1_IP" "fs02:YOUR_FS2_IP" "fs03:YOUR_FS3_IP")
KAM_HOST="YOUR_KAM1_PRIVATE" for instance in "${INSTANCES[@]}"; do IFS=':' read -r name ip <<< "$instance" echo "============================================" echo "Upgrading $name ($ip)" echo "============================================" # 1. Drain echo " Draining..." ssh $KAM_HOST "kamcmd dispatcher.set_state i 1 sip:${ip}:5060" # Wait for calls to finish (max 30 minutes) TIMEOUT=1800 ELAPSED=0 while [ $ELAPSED -lt $TIMEOUT ]; do CALLS=$(ssh $ip "fs_cli -x 'show calls count' 2>/dev/null" | grep -oP '\d+(?= total)') CALLS=${CALLS:-0} if [ "$CALLS" -eq 0 ]; then break; fi echo " $CALLS calls remaining (${ELAPSED}s elapsed)..." sleep 15 ELAPSED=$((ELAPSED + 15)) done # 2. Stop FreeSWITCH echo " Stopping FreeSWITCH..." ssh $ip "-weight: 500;">systemctl -weight: 500;">stop freeswitch" # 3. Upgrade echo " Upgrading..." ssh $ip "-weight: 500;">apt-get -weight: 500;">update && -weight: 500;">apt-get -weight: 500;">upgrade -y freeswitch*" # 4. Start FreeSWITCH echo " Starting FreeSWITCH..." ssh $ip "-weight: 500;">systemctl -weight: 500;">start freeswitch" sleep 5 # Wait for SIP profile to register # 5. Verify it responds echo " Verifying..." ssh $ip "fs_cli -x 'sofia -weight: 500;">status'" || { echo "FAILED to -weight: 500;">start $name!"; exit 1; } # 6. Re--weight: 500;">enable in dispatcher echo " Re-enabling in dispatcher..." ssh $KAM_HOST "kamcmd dispatcher.set_state a 1 sip:${ip}:5060" echo " $name upgraded successfully." echo "" # Wait before upgrading next instance (let it stabilize) sleep 30
done echo "All instances upgraded. Verifying dispatcher state..."
ssh $KAM_HOST "kamcmd dispatcher.list"
# On NFS server (db01 or dedicated storage)
-weight: 500;">apt-get -weight: 500;">install -y nfs-kernel-server
mkdir -p /srv/recordings
chown freeswitch:freeswitch /srv/recordings
echo "/srv/recordings 10.0.1.0/24(rw,sync,no_subtree_check,no_root_squash)" >> /etc/exports
exportfs -ra # On each FreeSWITCH server
-weight: 500;">apt-get -weight: 500;">install -y nfs-common
mkdir -p /var/lib/freeswitch/recordings
echo "YOUR_DB1_IP:/srv/recordings /var/lib/freeswitch/recordings nfs defaults,soft,timeo=50 0 0" >> /etc/fstab
mount -a
# On NFS server (db01 or dedicated storage)
-weight: 500;">apt-get -weight: 500;">install -y nfs-kernel-server
mkdir -p /srv/recordings
chown freeswitch:freeswitch /srv/recordings
echo "/srv/recordings 10.0.1.0/24(rw,sync,no_subtree_check,no_root_squash)" >> /etc/exports
exportfs -ra # On each FreeSWITCH server
-weight: 500;">apt-get -weight: 500;">install -y nfs-common
mkdir -p /var/lib/freeswitch/recordings
echo "YOUR_DB1_IP:/srv/recordings /var/lib/freeswitch/recordings nfs defaults,soft,timeo=50 0 0" >> /etc/fstab
mount -a
# On NFS server (db01 or dedicated storage)
-weight: 500;">apt-get -weight: 500;">install -y nfs-kernel-server
mkdir -p /srv/recordings
chown freeswitch:freeswitch /srv/recordings
echo "/srv/recordings 10.0.1.0/24(rw,sync,no_subtree_check,no_root_squash)" >> /etc/exports
exportfs -ra # On each FreeSWITCH server
-weight: 500;">apt-get -weight: 500;">install -y nfs-common
mkdir -p /var/lib/freeswitch/recordings
echo "YOUR_DB1_IP:/srv/recordings /var/lib/freeswitch/recordings nfs defaults,soft,timeo=50 0 0" >> /etc/fstab
mount -a
#!/bin/bash
# /usr/local/bin/upload-recording.sh
# Called by FreeSWITCH after each recording completes FILE=$1
BUCKET="s3://your-recordings-bucket" if [ -f "$FILE" ]; then aws s3 cp "$FILE" "$BUCKET/$(date +%Y/%m/%d)/$(basename $FILE)" \ --storage-class STANDARD_IA # Optionally delete local file after upload # rm -f "$FILE"
fi
#!/bin/bash
# /usr/local/bin/upload-recording.sh
# Called by FreeSWITCH after each recording completes FILE=$1
BUCKET="s3://your-recordings-bucket" if [ -f "$FILE" ]; then aws s3 cp "$FILE" "$BUCKET/$(date +%Y/%m/%d)/$(basename $FILE)" \ --storage-class STANDARD_IA # Optionally delete local file after upload # rm -f "$FILE"
fi
#!/bin/bash
# /usr/local/bin/upload-recording.sh
# Called by FreeSWITCH after each recording completes FILE=$1
BUCKET="s3://your-recordings-bucket" if [ -f "$FILE" ]; then aws s3 cp "$FILE" "$BUCKET/$(date +%Y/%m/%d)/$(basename $FILE)" \ --storage-class STANDARD_IA # Optionally delete local file after upload # rm -f "$FILE"
fi
┌─────────────────────┐ │ Global DNS (SRV) │ │ sip.YOUR_DOMAIN │ └──────────┬──────────┘ │ ┌────────────────┼────────────────┐ │ │ │ ┌─────────▼──────┐ ┌─────▼──────┐ ┌──────▼────────┐ │ DC Europe │ │ DC US-East │ │ DC US-West │ │ (London) │ │ (Virginia) │ │ (Oregon) │ │ │ │ │ │ │ │ Kam+FS+RTP │ │ Kam+FS+RTP │ │ Kam+FS+RTP │ │ Galera node │ │ Galera node│ │ Galera node │ └────────────────┘ └─────────────┘ └───────────────┘ │ │ │ └────────────────┼────────────────┘ │ ┌──────────▼──────────┐ │ Galera WAN Cluster │ │ (async replication)│ └─────────────────────┘
┌─────────────────────┐ │ Global DNS (SRV) │ │ sip.YOUR_DOMAIN │ └──────────┬──────────┘ │ ┌────────────────┼────────────────┐ │ │ │ ┌─────────▼──────┐ ┌─────▼──────┐ ┌──────▼────────┐ │ DC Europe │ │ DC US-East │ │ DC US-West │ │ (London) │ │ (Virginia) │ │ (Oregon) │ │ │ │ │ │ │ │ Kam+FS+RTP │ │ Kam+FS+RTP │ │ Kam+FS+RTP │ │ Galera node │ │ Galera node│ │ Galera node │ └────────────────┘ └─────────────┘ └───────────────┘ │ │ │ └────────────────┼────────────────┘ │ ┌──────────▼──────────┐ │ Galera WAN Cluster │ │ (async replication)│ └─────────────────────┘
┌─────────────────────┐ │ Global DNS (SRV) │ │ sip.YOUR_DOMAIN │ └──────────┬──────────┘ │ ┌────────────────┼────────────────┐ │ │ │ ┌─────────▼──────┐ ┌─────▼──────┐ ┌──────▼────────┐ │ DC Europe │ │ DC US-East │ │ DC US-West │ │ (London) │ │ (Virginia) │ │ (Oregon) │ │ │ │ │ │ │ │ Kam+FS+RTP │ │ Kam+FS+RTP │ │ Kam+FS+RTP │ │ Galera node │ │ Galera node│ │ Galera node │ └────────────────┘ └─────────────┘ └───────────────┘ │ │ │ └────────────────┼────────────────┘ │ ┌──────────▼──────────┐ │ Galera WAN Cluster │ │ (async replication)│ └─────────────────────┘
; NAPTR records — tell SIP clients which transports are available
YOUR_DOMAIN. IN NAPTR 10 10 "S" "SIP+D2U" "" _sip._udp.YOUR_DOMAIN.
YOUR_DOMAIN. IN NAPTR 20 10 "S" "SIP+D2T" "" _sip._tcp.YOUR_DOMAIN.
YOUR_DOMAIN. IN NAPTR 30 10 "S" "SIPS+D2T" "" _sips._tcp.YOUR_DOMAIN. ; SRV records — specify servers and priorities per transport
; Lower priority number = preferred. Same priority = load balance by weight. ; UDP SIP
_sip._udp.YOUR_DOMAIN. IN SRV 10 60 5060 sip-eu.YOUR_DOMAIN. ; EU primary
_sip._udp.YOUR_DOMAIN. IN SRV 10 40 5060 sip-us.YOUR_DOMAIN. ; US secondary
_sip._udp.YOUR_DOMAIN. IN SRV 20 50 5060 sip-eu2.YOUR_DOMAIN. ; EU backup
_sip._udp.YOUR_DOMAIN. IN SRV 20 50 5060 sip-us2.YOUR_DOMAIN. ; US backup ; TCP SIP
_sip._tcp.YOUR_DOMAIN. IN SRV 10 60 5060 sip-eu.YOUR_DOMAIN.
_sip._tcp.YOUR_DOMAIN. IN SRV 10 40 5060 sip-us.YOUR_DOMAIN. ; TLS SIP
_sips._tcp.YOUR_DOMAIN. IN SRV 10 60 5061 sip-eu.YOUR_DOMAIN.
_sips._tcp.YOUR_DOMAIN. IN SRV 10 40 5061 sip-us.YOUR_DOMAIN. ; A records for each SIP edge
sip-eu.YOUR_DOMAIN. IN A YOUR_EU_VIP
sip-us.YOUR_DOMAIN. IN A YOUR_US_VIP
sip-eu2.YOUR_DOMAIN. IN A YOUR_EU2_VIP
sip-us2.YOUR_DOMAIN. IN A YOUR_US2_VIP
; NAPTR records — tell SIP clients which transports are available
YOUR_DOMAIN. IN NAPTR 10 10 "S" "SIP+D2U" "" _sip._udp.YOUR_DOMAIN.
YOUR_DOMAIN. IN NAPTR 20 10 "S" "SIP+D2T" "" _sip._tcp.YOUR_DOMAIN.
YOUR_DOMAIN. IN NAPTR 30 10 "S" "SIPS+D2T" "" _sips._tcp.YOUR_DOMAIN. ; SRV records — specify servers and priorities per transport
; Lower priority number = preferred. Same priority = load balance by weight. ; UDP SIP
_sip._udp.YOUR_DOMAIN. IN SRV 10 60 5060 sip-eu.YOUR_DOMAIN. ; EU primary
_sip._udp.YOUR_DOMAIN. IN SRV 10 40 5060 sip-us.YOUR_DOMAIN. ; US secondary
_sip._udp.YOUR_DOMAIN. IN SRV 20 50 5060 sip-eu2.YOUR_DOMAIN. ; EU backup
_sip._udp.YOUR_DOMAIN. IN SRV 20 50 5060 sip-us2.YOUR_DOMAIN. ; US backup ; TCP SIP
_sip._tcp.YOUR_DOMAIN. IN SRV 10 60 5060 sip-eu.YOUR_DOMAIN.
_sip._tcp.YOUR_DOMAIN. IN SRV 10 40 5060 sip-us.YOUR_DOMAIN. ; TLS SIP
_sips._tcp.YOUR_DOMAIN. IN SRV 10 60 5061 sip-eu.YOUR_DOMAIN.
_sips._tcp.YOUR_DOMAIN. IN SRV 10 40 5061 sip-us.YOUR_DOMAIN. ; A records for each SIP edge
sip-eu.YOUR_DOMAIN. IN A YOUR_EU_VIP
sip-us.YOUR_DOMAIN. IN A YOUR_US_VIP
sip-eu2.YOUR_DOMAIN. IN A YOUR_EU2_VIP
sip-us2.YOUR_DOMAIN. IN A YOUR_US2_VIP
; NAPTR records — tell SIP clients which transports are available
YOUR_DOMAIN. IN NAPTR 10 10 "S" "SIP+D2U" "" _sip._udp.YOUR_DOMAIN.
YOUR_DOMAIN. IN NAPTR 20 10 "S" "SIP+D2T" "" _sip._tcp.YOUR_DOMAIN.
YOUR_DOMAIN. IN NAPTR 30 10 "S" "SIPS+D2T" "" _sips._tcp.YOUR_DOMAIN. ; SRV records — specify servers and priorities per transport
; Lower priority number = preferred. Same priority = load balance by weight. ; UDP SIP
_sip._udp.YOUR_DOMAIN. IN SRV 10 60 5060 sip-eu.YOUR_DOMAIN. ; EU primary
_sip._udp.YOUR_DOMAIN. IN SRV 10 40 5060 sip-us.YOUR_DOMAIN. ; US secondary
_sip._udp.YOUR_DOMAIN. IN SRV 20 50 5060 sip-eu2.YOUR_DOMAIN. ; EU backup
_sip._udp.YOUR_DOMAIN. IN SRV 20 50 5060 sip-us2.YOUR_DOMAIN. ; US backup ; TCP SIP
_sip._tcp.YOUR_DOMAIN. IN SRV 10 60 5060 sip-eu.YOUR_DOMAIN.
_sip._tcp.YOUR_DOMAIN. IN SRV 10 40 5060 sip-us.YOUR_DOMAIN. ; TLS SIP
_sips._tcp.YOUR_DOMAIN. IN SRV 10 60 5061 sip-eu.YOUR_DOMAIN.
_sips._tcp.YOUR_DOMAIN. IN SRV 10 40 5061 sip-us.YOUR_DOMAIN. ; A records for each SIP edge
sip-eu.YOUR_DOMAIN. IN A YOUR_EU_VIP
sip-us.YOUR_DOMAIN. IN A YOUR_US_VIP
sip-eu2.YOUR_DOMAIN. IN A YOUR_EU2_VIP
sip-us2.YOUR_DOMAIN. IN A YOUR_US2_VIP
# Load GeoIP2 module
loadmodule "geoip2.so"
modparam("geoip2", "path", "/usr/share/GeoIP/GeoLite2-City.mmdb") # Geographic routing route
route[GEO_ROUTE] { # Look up caller's country if (geoip2_match("$si", "src")) { $var(country) = $gip2(src=>cc); $var(continent) = $gip2(src=>cont); xlog("L_INFO", "GEO: Caller from $si — country=$var(country), continent=$var(continent)\n"); # Route to closest DC based on continent switch ($var(continent)) { case "EU": # European callers → EU FreeSWITCH pool (set 10) if (!ds_select_dst("10", "0", "6")) { # Fallback to US pool ds_select_dst("20", "0", "6"); } break; case "NA": # North American callers → US-East pool (set 20) if (!ds_select_dst("20", "0", "6")) { ds_select_dst("10", "0", "6"); } break; default: # Everyone else → round-robin across all DCs ds_select_dst("1", "4", "6"); break; } } else { # GeoIP lookup failed — use default pool ds_select_dst("1", "0", "6"); }
}
# Load GeoIP2 module
loadmodule "geoip2.so"
modparam("geoip2", "path", "/usr/share/GeoIP/GeoLite2-City.mmdb") # Geographic routing route
route[GEO_ROUTE] { # Look up caller's country if (geoip2_match("$si", "src")) { $var(country) = $gip2(src=>cc); $var(continent) = $gip2(src=>cont); xlog("L_INFO", "GEO: Caller from $si — country=$var(country), continent=$var(continent)\n"); # Route to closest DC based on continent switch ($var(continent)) { case "EU": # European callers → EU FreeSWITCH pool (set 10) if (!ds_select_dst("10", "0", "6")) { # Fallback to US pool ds_select_dst("20", "0", "6"); } break; case "NA": # North American callers → US-East pool (set 20) if (!ds_select_dst("20", "0", "6")) { ds_select_dst("10", "0", "6"); } break; default: # Everyone else → round-robin across all DCs ds_select_dst("1", "4", "6"); break; } } else { # GeoIP lookup failed — use default pool ds_select_dst("1", "0", "6"); }
}
# Load GeoIP2 module
loadmodule "geoip2.so"
modparam("geoip2", "path", "/usr/share/GeoIP/GeoLite2-City.mmdb") # Geographic routing route
route[GEO_ROUTE] { # Look up caller's country if (geoip2_match("$si", "src")) { $var(country) = $gip2(src=>cc); $var(continent) = $gip2(src=>cont); xlog("L_INFO", "GEO: Caller from $si — country=$var(country), continent=$var(continent)\n"); # Route to closest DC based on continent switch ($var(continent)) { case "EU": # European callers → EU FreeSWITCH pool (set 10) if (!ds_select_dst("10", "0", "6")) { # Fallback to US pool ds_select_dst("20", "0", "6"); } break; case "NA": # North American callers → US-East pool (set 20) if (!ds_select_dst("20", "0", "6")) { ds_select_dst("10", "0", "6"); } break; default: # Everyone else → round-robin across all DCs ds_select_dst("1", "4", "6"); break; } } else { # GeoIP lookup failed — use default pool ds_select_dst("1", "0", "6"); }
}
# On each Galera node, add WAN-specific settings:
[galera]
wsrep_cluster_address = "gcomm://EU_DB_IP,US_EAST_DB_IP,US_WEST_DB_IP" # WAN optimizations
wsrep_provider_options = "evs.send_window=256; evs.user_send_window=128; evs.keepalive_period=PT3S; evs.suspect_timeout=PT30S; evs.inactive_timeout=PT1M; gcache.size=1G" # Segment-aware replication (reduces cross-DC traffic)
# EU nodes: gmcast.segment=0
# US-East nodes: gmcast.segment=1
# US-West nodes: gmcast.segment=2
wsrep_provider_options = "gmcast.segment=0" # Change per DC
# On each Galera node, add WAN-specific settings:
[galera]
wsrep_cluster_address = "gcomm://EU_DB_IP,US_EAST_DB_IP,US_WEST_DB_IP" # WAN optimizations
wsrep_provider_options = "evs.send_window=256; evs.user_send_window=128; evs.keepalive_period=PT3S; evs.suspect_timeout=PT30S; evs.inactive_timeout=PT1M; gcache.size=1G" # Segment-aware replication (reduces cross-DC traffic)
# EU nodes: gmcast.segment=0
# US-East nodes: gmcast.segment=1
# US-West nodes: gmcast.segment=2
wsrep_provider_options = "gmcast.segment=0" # Change per DC
# On each Galera node, add WAN-specific settings:
[galera]
wsrep_cluster_address = "gcomm://EU_DB_IP,US_EAST_DB_IP,US_WEST_DB_IP" # WAN optimizations
wsrep_provider_options = "evs.send_window=256; evs.user_send_window=128; evs.keepalive_period=PT3S; evs.suspect_timeout=PT30S; evs.inactive_timeout=PT1M; gcache.size=1G" # Segment-aware replication (reduces cross-DC traffic)
# EU nodes: gmcast.segment=0
# US-East nodes: gmcast.segment=1
# US-West nodes: gmcast.segment=2
wsrep_provider_options = "gmcast.segment=0" # Change per DC
# Select RTPEngine based on caller geography
route[SELECT_RTPENGINE] { if ($var(continent) == "EU") { # Use EU RTPEngine modparam("rtpengine", "rtpengine_sock", "udp:EU_RTP_IP:2223"); } else { # Use US RTPEngine modparam("rtpengine", "rtpengine_sock", "udp:US_RTP_IP:2223"); }
}
# Select RTPEngine based on caller geography
route[SELECT_RTPENGINE] { if ($var(continent) == "EU") { # Use EU RTPEngine modparam("rtpengine", "rtpengine_sock", "udp:EU_RTP_IP:2223"); } else { # Use US RTPEngine modparam("rtpengine", "rtpengine_sock", "udp:US_RTP_IP:2223"); }
}
# Select RTPEngine based on caller geography
route[SELECT_RTPENGINE] { if ($var(continent) == "EU") { # Use EU RTPEngine modparam("rtpengine", "rtpengine_sock", "udp:EU_RTP_IP:2223"); } else { # Use US RTPEngine modparam("rtpengine", "rtpengine_sock", "udp:US_RTP_IP:2223"); }
}
# Install kamailio-exporter
# Option 1: Pre-built binary
-weight: 500;">wget https://github.com/florentchauveau/kamailio_exporter/releases/latest/download/kamailio_exporter_linux_amd64 \ -O /usr/local/bin/kamailio_exporter
chmod +x /usr/local/bin/kamailio_exporter # Create systemd -weight: 500;">service
cat > /etc/systemd/system/kamailio-exporter.-weight: 500;">service << 'EOF'
[Unit]
Description=Kamailio Prometheus Exporter
After=kamailio.-weight: 500;">service [Service]
ExecStart=/usr/local/bin/kamailio_exporter \ --kamailio.address=unix:/var/run/kamailio/kamailio_ctl \ --web.listen-address=:9494
Restart=on-failure [Install]
WantedBy=multi-user.target
EOF -weight: 500;">systemctl daemon-reload
-weight: 500;">systemctl -weight: 500;">enable --now kamailio-exporter
# Install kamailio-exporter
# Option 1: Pre-built binary
-weight: 500;">wget https://github.com/florentchauveau/kamailio_exporter/releases/latest/download/kamailio_exporter_linux_amd64 \ -O /usr/local/bin/kamailio_exporter
chmod +x /usr/local/bin/kamailio_exporter # Create systemd -weight: 500;">service
cat > /etc/systemd/system/kamailio-exporter.-weight: 500;">service << 'EOF'
[Unit]
Description=Kamailio Prometheus Exporter
After=kamailio.-weight: 500;">service [Service]
ExecStart=/usr/local/bin/kamailio_exporter \ --kamailio.address=unix:/var/run/kamailio/kamailio_ctl \ --web.listen-address=:9494
Restart=on-failure [Install]
WantedBy=multi-user.target
EOF -weight: 500;">systemctl daemon-reload
-weight: 500;">systemctl -weight: 500;">enable --now kamailio-exporter
# Install kamailio-exporter
# Option 1: Pre-built binary
-weight: 500;">wget https://github.com/florentchauveau/kamailio_exporter/releases/latest/download/kamailio_exporter_linux_amd64 \ -O /usr/local/bin/kamailio_exporter
chmod +x /usr/local/bin/kamailio_exporter # Create systemd -weight: 500;">service
cat > /etc/systemd/system/kamailio-exporter.-weight: 500;">service << 'EOF'
[Unit]
Description=Kamailio Prometheus Exporter
After=kamailio.-weight: 500;">service [Service]
ExecStart=/usr/local/bin/kamailio_exporter \ --kamailio.address=unix:/var/run/kamailio/kamailio_ctl \ --web.listen-address=:9494
Restart=on-failure [Install]
WantedBy=multi-user.target
EOF -weight: 500;">systemctl daemon-reload
-weight: 500;">systemctl -weight: 500;">enable --now kamailio-exporter
# Install freeswitch-exporter
pip3 -weight: 500;">install freeswitch-exporter # Or use a custom script via ESL
cat > /usr/local/bin/freeswitch_exporter.py << 'PYEOF'
#!/usr/bin/env python3
"""FreeSWITCH Prometheus exporter via ESL."""
import subprocess
import time
from prometheus_client import start_http_server, Gauge # Metrics
calls_active = Gauge('freeswitch_calls_active', 'Active calls')
channels_active = Gauge('freeswitch_channels_active', 'Active channels')
registrations = Gauge('freeswitch_registrations_active', 'Active registrations')
cpu_idle = Gauge('freeswitch_cpu_idle_percent', 'CPU idle percentage')
sessions_peak = Gauge('freeswitch_sessions_peak', 'Peak sessions since -weight: 500;">start')
sessions_per_sec = Gauge('freeswitch_sessions_per_second', 'Current sessions per second')
uptime = Gauge('freeswitch_uptime_seconds', 'Uptime in seconds') def collect(): try: # Active calls out = subprocess.check_output(["fs_cli", "-x", "show calls count"], text=True) calls_active.set(int(out.strip().split()[0])) # Channels out = subprocess.check_output(["fs_cli", "-x", "show channels count"], text=True) channels_active.set(int(out.strip().split()[0])) # Registrations out = subprocess.check_output(["fs_cli", "-x", "show registrations count"], text=True) registrations.set(int(out.strip().split()[0])) # Status out = subprocess.check_output(["fs_cli", "-x", "-weight: 500;">status"], text=True) for line in out.split('\n'): if 'session(s) - peak' in line: parts = line.split() sessions_peak.set(int(parts[0])) if 'session(s) per Sec' in line: parts = line.split() sessions_per_sec.set(float(parts[0])) if 'years' in line or 'days' in line or 'hours' in line: # Parse uptime — simplified pass except Exception as e: print(f"Collection error: {e}") if __name__ == '__main__': start_http_server(9282) while True: collect() time.sleep(15)
PYEOF
chmod +x /usr/local/bin/freeswitch_exporter.py # Create systemd -weight: 500;">service
cat > /etc/systemd/system/freeswitch-exporter.-weight: 500;">service << 'EOF'
[Unit]
Description=FreeSWITCH Prometheus Exporter
After=freeswitch.-weight: 500;">service [Service]
ExecStart=/usr/local/bin/freeswitch_exporter.py
Restart=on-failure [Install]
WantedBy=multi-user.target
EOF -weight: 500;">systemctl daemon-reload
-weight: 500;">systemctl -weight: 500;">enable --now freeswitch-exporter
# Install freeswitch-exporter
pip3 -weight: 500;">install freeswitch-exporter # Or use a custom script via ESL
cat > /usr/local/bin/freeswitch_exporter.py << 'PYEOF'
#!/usr/bin/env python3
"""FreeSWITCH Prometheus exporter via ESL."""
import subprocess
import time
from prometheus_client import start_http_server, Gauge # Metrics
calls_active = Gauge('freeswitch_calls_active', 'Active calls')
channels_active = Gauge('freeswitch_channels_active', 'Active channels')
registrations = Gauge('freeswitch_registrations_active', 'Active registrations')
cpu_idle = Gauge('freeswitch_cpu_idle_percent', 'CPU idle percentage')
sessions_peak = Gauge('freeswitch_sessions_peak', 'Peak sessions since -weight: 500;">start')
sessions_per_sec = Gauge('freeswitch_sessions_per_second', 'Current sessions per second')
uptime = Gauge('freeswitch_uptime_seconds', 'Uptime in seconds') def collect(): try: # Active calls out = subprocess.check_output(["fs_cli", "-x", "show calls count"], text=True) calls_active.set(int(out.strip().split()[0])) # Channels out = subprocess.check_output(["fs_cli", "-x", "show channels count"], text=True) channels_active.set(int(out.strip().split()[0])) # Registrations out = subprocess.check_output(["fs_cli", "-x", "show registrations count"], text=True) registrations.set(int(out.strip().split()[0])) # Status out = subprocess.check_output(["fs_cli", "-x", "-weight: 500;">status"], text=True) for line in out.split('\n'): if 'session(s) - peak' in line: parts = line.split() sessions_peak.set(int(parts[0])) if 'session(s) per Sec' in line: parts = line.split() sessions_per_sec.set(float(parts[0])) if 'years' in line or 'days' in line or 'hours' in line: # Parse uptime — simplified pass except Exception as e: print(f"Collection error: {e}") if __name__ == '__main__': start_http_server(9282) while True: collect() time.sleep(15)
PYEOF
chmod +x /usr/local/bin/freeswitch_exporter.py # Create systemd -weight: 500;">service
cat > /etc/systemd/system/freeswitch-exporter.-weight: 500;">service << 'EOF'
[Unit]
Description=FreeSWITCH Prometheus Exporter
After=freeswitch.-weight: 500;">service [Service]
ExecStart=/usr/local/bin/freeswitch_exporter.py
Restart=on-failure [Install]
WantedBy=multi-user.target
EOF -weight: 500;">systemctl daemon-reload
-weight: 500;">systemctl -weight: 500;">enable --now freeswitch-exporter
# Install freeswitch-exporter
pip3 -weight: 500;">install freeswitch-exporter # Or use a custom script via ESL
cat > /usr/local/bin/freeswitch_exporter.py << 'PYEOF'
#!/usr/bin/env python3
"""FreeSWITCH Prometheus exporter via ESL."""
import subprocess
import time
from prometheus_client import start_http_server, Gauge # Metrics
calls_active = Gauge('freeswitch_calls_active', 'Active calls')
channels_active = Gauge('freeswitch_channels_active', 'Active channels')
registrations = Gauge('freeswitch_registrations_active', 'Active registrations')
cpu_idle = Gauge('freeswitch_cpu_idle_percent', 'CPU idle percentage')
sessions_peak = Gauge('freeswitch_sessions_peak', 'Peak sessions since -weight: 500;">start')
sessions_per_sec = Gauge('freeswitch_sessions_per_second', 'Current sessions per second')
uptime = Gauge('freeswitch_uptime_seconds', 'Uptime in seconds') def collect(): try: # Active calls out = subprocess.check_output(["fs_cli", "-x", "show calls count"], text=True) calls_active.set(int(out.strip().split()[0])) # Channels out = subprocess.check_output(["fs_cli", "-x", "show channels count"], text=True) channels_active.set(int(out.strip().split()[0])) # Registrations out = subprocess.check_output(["fs_cli", "-x", "show registrations count"], text=True) registrations.set(int(out.strip().split()[0])) # Status out = subprocess.check_output(["fs_cli", "-x", "-weight: 500;">status"], text=True) for line in out.split('\n'): if 'session(s) - peak' in line: parts = line.split() sessions_peak.set(int(parts[0])) if 'session(s) per Sec' in line: parts = line.split() sessions_per_sec.set(float(parts[0])) if 'years' in line or 'days' in line or 'hours' in line: # Parse uptime — simplified pass except Exception as e: print(f"Collection error: {e}") if __name__ == '__main__': start_http_server(9282) while True: collect() time.sleep(15)
PYEOF
chmod +x /usr/local/bin/freeswitch_exporter.py # Create systemd -weight: 500;">service
cat > /etc/systemd/system/freeswitch-exporter.-weight: 500;">service << 'EOF'
[Unit]
Description=FreeSWITCH Prometheus Exporter
After=freeswitch.-weight: 500;">service [Service]
ExecStart=/usr/local/bin/freeswitch_exporter.py
Restart=on-failure [Install]
WantedBy=multi-user.target
EOF -weight: 500;">systemctl daemon-reload
-weight: 500;">systemctl -weight: 500;">enable --now freeswitch-exporter
# rtpengine-exporter scrapes RTPEngine's statistics interface
cat > /usr/local/bin/rtpengine_exporter.py << 'PYEOF'
#!/usr/bin/env python3
"""RTPEngine Prometheus exporter via ng control protocol."""
import socket
import bencodepy
import time
from prometheus_client import start_http_server, Gauge RTPENGINE_HOST = "127.0.0.1"
RTPENGINE_PORT = 2223 # Metrics
sessions = Gauge('rtpengine_sessions_active', 'Active media sessions')
sessions_total = Gauge('rtpengine_sessions_total', 'Total sessions since -weight: 500;">start')
errors = Gauge('rtpengine_errors_total', 'Total errors')
offer_total = Gauge('rtpengine_offer_total', 'Total offer commands')
answer_total = Gauge('rtpengine_answer_total', 'Total answer commands')
delete_total = Gauge('rtpengine_delete_total', 'Total delete commands')
packets_relayed = Gauge('rtpengine_packets_relayed', 'Packets relayed')
bytes_relayed = Gauge('rtpengine_bytes_relayed', 'Bytes relayed') def query_rtpengine(command): """Send ng protocol command to RTPEngine.""" cookie = "stats_" + str(int(time.time())) msg = bencodepy.encode({ b"command": command.encode() }) full_msg = f"{cookie} ".encode() + msg sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) sock.settimeout(2) sock.sendto(full_msg, (RTPENGINE_HOST, RTPENGINE_PORT)) data, _ = sock.recvfrom(65535) sock.close() # Strip cookie prefix space_idx = data.index(b' ') return bencodepy.decode(data[space_idx + 1:]) def collect(): try: result = query_rtpengine("list totals") if b'result' in result and result[b'result'] == b'ok': totals = result.get(b'totals', {}) sessions.set(totals.get(b'current_sessions', 0)) sessions_total.set(totals.get(b'total_sessions', 0)) offer_total.set(totals.get(b'offer', 0)) answer_total.set(totals.get(b'answer', 0)) delete_total.set(totals.get(b'delete', 0)) except Exception as e: print(f"Collection error: {e}") if __name__ == '__main__': start_http_server(9283) while True: collect() time.sleep(15)
PYEOF
chmod +x /usr/local/bin/rtpengine_exporter.py
# rtpengine-exporter scrapes RTPEngine's statistics interface
cat > /usr/local/bin/rtpengine_exporter.py << 'PYEOF'
#!/usr/bin/env python3
"""RTPEngine Prometheus exporter via ng control protocol."""
import socket
import bencodepy
import time
from prometheus_client import start_http_server, Gauge RTPENGINE_HOST = "127.0.0.1"
RTPENGINE_PORT = 2223 # Metrics
sessions = Gauge('rtpengine_sessions_active', 'Active media sessions')
sessions_total = Gauge('rtpengine_sessions_total', 'Total sessions since -weight: 500;">start')
errors = Gauge('rtpengine_errors_total', 'Total errors')
offer_total = Gauge('rtpengine_offer_total', 'Total offer commands')
answer_total = Gauge('rtpengine_answer_total', 'Total answer commands')
delete_total = Gauge('rtpengine_delete_total', 'Total delete commands')
packets_relayed = Gauge('rtpengine_packets_relayed', 'Packets relayed')
bytes_relayed = Gauge('rtpengine_bytes_relayed', 'Bytes relayed') def query_rtpengine(command): """Send ng protocol command to RTPEngine.""" cookie = "stats_" + str(int(time.time())) msg = bencodepy.encode({ b"command": command.encode() }) full_msg = f"{cookie} ".encode() + msg sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) sock.settimeout(2) sock.sendto(full_msg, (RTPENGINE_HOST, RTPENGINE_PORT)) data, _ = sock.recvfrom(65535) sock.close() # Strip cookie prefix space_idx = data.index(b' ') return bencodepy.decode(data[space_idx + 1:]) def collect(): try: result = query_rtpengine("list totals") if b'result' in result and result[b'result'] == b'ok': totals = result.get(b'totals', {}) sessions.set(totals.get(b'current_sessions', 0)) sessions_total.set(totals.get(b'total_sessions', 0)) offer_total.set(totals.get(b'offer', 0)) answer_total.set(totals.get(b'answer', 0)) delete_total.set(totals.get(b'delete', 0)) except Exception as e: print(f"Collection error: {e}") if __name__ == '__main__': start_http_server(9283) while True: collect() time.sleep(15)
PYEOF
chmod +x /usr/local/bin/rtpengine_exporter.py
# rtpengine-exporter scrapes RTPEngine's statistics interface
cat > /usr/local/bin/rtpengine_exporter.py << 'PYEOF'
#!/usr/bin/env python3
"""RTPEngine Prometheus exporter via ng control protocol."""
import socket
import bencodepy
import time
from prometheus_client import start_http_server, Gauge RTPENGINE_HOST = "127.0.0.1"
RTPENGINE_PORT = 2223 # Metrics
sessions = Gauge('rtpengine_sessions_active', 'Active media sessions')
sessions_total = Gauge('rtpengine_sessions_total', 'Total sessions since -weight: 500;">start')
errors = Gauge('rtpengine_errors_total', 'Total errors')
offer_total = Gauge('rtpengine_offer_total', 'Total offer commands')
answer_total = Gauge('rtpengine_answer_total', 'Total answer commands')
delete_total = Gauge('rtpengine_delete_total', 'Total delete commands')
packets_relayed = Gauge('rtpengine_packets_relayed', 'Packets relayed')
bytes_relayed = Gauge('rtpengine_bytes_relayed', 'Bytes relayed') def query_rtpengine(command): """Send ng protocol command to RTPEngine.""" cookie = "stats_" + str(int(time.time())) msg = bencodepy.encode({ b"command": command.encode() }) full_msg = f"{cookie} ".encode() + msg sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) sock.settimeout(2) sock.sendto(full_msg, (RTPENGINE_HOST, RTPENGINE_PORT)) data, _ = sock.recvfrom(65535) sock.close() # Strip cookie prefix space_idx = data.index(b' ') return bencodepy.decode(data[space_idx + 1:]) def collect(): try: result = query_rtpengine("list totals") if b'result' in result and result[b'result'] == b'ok': totals = result.get(b'totals', {}) sessions.set(totals.get(b'current_sessions', 0)) sessions_total.set(totals.get(b'total_sessions', 0)) offer_total.set(totals.get(b'offer', 0)) answer_total.set(totals.get(b'answer', 0)) delete_total.set(totals.get(b'delete', 0)) except Exception as e: print(f"Collection error: {e}") if __name__ == '__main__': start_http_server(9283) while True: collect() time.sleep(15)
PYEOF
chmod +x /usr/local/bin/rtpengine_exporter.py
scrape_configs: # Kamailio - job_name: 'kamailio' static_configs: - targets: - 'YOUR_KAM1_PRIVATE:9494' - 'YOUR_KAM2_PRIVATE:9494' labels: component: 'kamailio' # FreeSWITCH - job_name: 'freeswitch' static_configs: - targets: - 'YOUR_FS1_IP:9282' - 'YOUR_FS2_IP:9282' - 'YOUR_FS3_IP:9282' labels: component: 'freeswitch' # RTPEngine - job_name: 'rtpengine' static_configs: - targets: - 'YOUR_RTP1_PRIVATE:9283' - 'YOUR_RTP2_PRIVATE:9283' labels: component: 'rtpengine' # MariaDB (via mysqld_exporter) - job_name: 'mariadb' static_configs: - targets: - 'YOUR_DB1_IP:9104' - 'YOUR_DB2_IP:9104' - 'YOUR_DB3_IP:9104' labels: component: 'database'
scrape_configs: # Kamailio - job_name: 'kamailio' static_configs: - targets: - 'YOUR_KAM1_PRIVATE:9494' - 'YOUR_KAM2_PRIVATE:9494' labels: component: 'kamailio' # FreeSWITCH - job_name: 'freeswitch' static_configs: - targets: - 'YOUR_FS1_IP:9282' - 'YOUR_FS2_IP:9282' - 'YOUR_FS3_IP:9282' labels: component: 'freeswitch' # RTPEngine - job_name: 'rtpengine' static_configs: - targets: - 'YOUR_RTP1_PRIVATE:9283' - 'YOUR_RTP2_PRIVATE:9283' labels: component: 'rtpengine' # MariaDB (via mysqld_exporter) - job_name: 'mariadb' static_configs: - targets: - 'YOUR_DB1_IP:9104' - 'YOUR_DB2_IP:9104' - 'YOUR_DB3_IP:9104' labels: component: 'database'
scrape_configs: # Kamailio - job_name: 'kamailio' static_configs: - targets: - 'YOUR_KAM1_PRIVATE:9494' - 'YOUR_KAM2_PRIVATE:9494' labels: component: 'kamailio' # FreeSWITCH - job_name: 'freeswitch' static_configs: - targets: - 'YOUR_FS1_IP:9282' - 'YOUR_FS2_IP:9282' - 'YOUR_FS3_IP:9282' labels: component: 'freeswitch' # RTPEngine - job_name: 'rtpengine' static_configs: - targets: - 'YOUR_RTP1_PRIVATE:9283' - 'YOUR_RTP2_PRIVATE:9283' labels: component: 'rtpengine' # MariaDB (via mysqld_exporter) - job_name: 'mariadb' static_configs: - targets: - 'YOUR_DB1_IP:9104' - 'YOUR_DB2_IP:9104' - 'YOUR_DB3_IP:9104' labels: component: 'database'
Row 1: Platform Overview - Total active calls (sum of all FS instances) - Active registrations - Calls per second (rate) - Platform uptime Row 2: Kamailio - Active dialogs (gauge) - SIP response codes (stacked bar: 2xx, 3xx, 4xx, 5xx) - Dispatcher backend -weight: 500;">status (table: name, state, latency) - Shared memory usage (%) Row 3: FreeSWITCH - Active calls per instance (stacked area) - Channels per instance (line) - CPU usage per instance (line) - Sessions per second (rate) Row 4: RTPEngine - Active media sessions (gauge) - Packets relayed per second (rate) - Media errors (rate) - Session duration histogram Row 5: Database - Queries per second - Replication lag (Galera) - Connection count - Slow queries
Row 1: Platform Overview - Total active calls (sum of all FS instances) - Active registrations - Calls per second (rate) - Platform uptime Row 2: Kamailio - Active dialogs (gauge) - SIP response codes (stacked bar: 2xx, 3xx, 4xx, 5xx) - Dispatcher backend -weight: 500;">status (table: name, state, latency) - Shared memory usage (%) Row 3: FreeSWITCH - Active calls per instance (stacked area) - Channels per instance (line) - CPU usage per instance (line) - Sessions per second (rate) Row 4: RTPEngine - Active media sessions (gauge) - Packets relayed per second (rate) - Media errors (rate) - Session duration histogram Row 5: Database - Queries per second - Replication lag (Galera) - Connection count - Slow queries
Row 1: Platform Overview - Total active calls (sum of all FS instances) - Active registrations - Calls per second (rate) - Platform uptime Row 2: Kamailio - Active dialogs (gauge) - SIP response codes (stacked bar: 2xx, 3xx, 4xx, 5xx) - Dispatcher backend -weight: 500;">status (table: name, state, latency) - Shared memory usage (%) Row 3: FreeSWITCH - Active calls per instance (stacked area) - Channels per instance (line) - CPU usage per instance (line) - Sessions per second (rate) Row 4: RTPEngine - Active media sessions (gauge) - Packets relayed per second (rate) - Media errors (rate) - Session duration histogram Row 5: Database - Queries per second - Replication lag (Galera) - Connection count - Slow queries
# Install heplify agent on each SIP component (Kamailio, FreeSWITCH)
-weight: 500;">wget https://github.com/sipcapture/heplify/releases/latest/download/heplify -O /usr/local/bin/heplify
chmod +x /usr/local/bin/heplify # Run heplify on Kamailio servers
cat > /etc/systemd/system/heplify.-weight: 500;">service << 'EOF'
[Unit]
Description=HEPlify SIP Capture Agent
After=network.target [Service]
ExecStart=/usr/local/bin/heplify \ -i eth0 \ -hs YOUR_HOMER_IP:9060 \ -m SIP \ -dim REGISTER \ -pr 5060-5061
Restart=on-failure [Install]
WantedBy=multi-user.target
EOF -weight: 500;">systemctl daemon-reload
-weight: 500;">systemctl -weight: 500;">enable --now heplify
# Install heplify agent on each SIP component (Kamailio, FreeSWITCH)
-weight: 500;">wget https://github.com/sipcapture/heplify/releases/latest/download/heplify -O /usr/local/bin/heplify
chmod +x /usr/local/bin/heplify # Run heplify on Kamailio servers
cat > /etc/systemd/system/heplify.-weight: 500;">service << 'EOF'
[Unit]
Description=HEPlify SIP Capture Agent
After=network.target [Service]
ExecStart=/usr/local/bin/heplify \ -i eth0 \ -hs YOUR_HOMER_IP:9060 \ -m SIP \ -dim REGISTER \ -pr 5060-5061
Restart=on-failure [Install]
WantedBy=multi-user.target
EOF -weight: 500;">systemctl daemon-reload
-weight: 500;">systemctl -weight: 500;">enable --now heplify
# Install heplify agent on each SIP component (Kamailio, FreeSWITCH)
-weight: 500;">wget https://github.com/sipcapture/heplify/releases/latest/download/heplify -O /usr/local/bin/heplify
chmod +x /usr/local/bin/heplify # Run heplify on Kamailio servers
cat > /etc/systemd/system/heplify.-weight: 500;">service << 'EOF'
[Unit]
Description=HEPlify SIP Capture Agent
After=network.target [Service]
ExecStart=/usr/local/bin/heplify \ -i eth0 \ -hs YOUR_HOMER_IP:9060 \ -m SIP \ -dim REGISTER \ -pr 5060-5061
Restart=on-failure [Install]
WantedBy=multi-user.target
EOF -weight: 500;">systemctl daemon-reload
-weight: 500;">systemctl -weight: 500;">enable --now heplify
# prometheus/alerts/voip-platform.yml
groups: - name: voip_platform rules: # All FreeSWITCH servers down - alert: AllMediaServersDown expr: count(freeswitch_calls_active) == 0 for: 1m labels: severity: critical annotations: summary: "All FreeSWITCH media servers are down" # Single FreeSWITCH down - alert: MediaServerDown expr: up{job="freeswitch"} == 0 for: 2m labels: severity: warning annotations: summary: "FreeSWITCH {{ $labels.instance }} is down" # Kamailio high error rate - alert: KamailioHighErrorRate expr: rate(kamailio_tmx_code_total{code=~"5.."}[5m]) > 0.5 for: 5m labels: severity: warning annotations: summary: "Kamailio 5xx error rate > 0.5/sec" # Dispatcher all backends down - alert: DispatcherAllBackendsDown expr: kamailio_dispatcher_target_up == 0 for: 30s labels: severity: critical annotations: summary: "All dispatcher backends are down" # RTPEngine down - alert: RTPEngineDown expr: up{job="rtpengine"} == 0 for: 1m labels: severity: critical annotations: summary: "RTPEngine {{ $labels.instance }} is down" # High call volume (capacity planning) - alert: HighCallVolume expr: sum(freeswitch_calls_active) > 2000 for: 5m labels: severity: warning annotations: summary: "Platform handling {{ $value }} concurrent calls (threshold: 2000)" # Database replication lag - alert: GaleraReplicationLag expr: mysql_galera_cluster_status{wsrep_local_recv_queue_avg} > 10 for: 5m labels: severity: warning annotations: summary: "Galera replication queue building up"
# prometheus/alerts/voip-platform.yml
groups: - name: voip_platform rules: # All FreeSWITCH servers down - alert: AllMediaServersDown expr: count(freeswitch_calls_active) == 0 for: 1m labels: severity: critical annotations: summary: "All FreeSWITCH media servers are down" # Single FreeSWITCH down - alert: MediaServerDown expr: up{job="freeswitch"} == 0 for: 2m labels: severity: warning annotations: summary: "FreeSWITCH {{ $labels.instance }} is down" # Kamailio high error rate - alert: KamailioHighErrorRate expr: rate(kamailio_tmx_code_total{code=~"5.."}[5m]) > 0.5 for: 5m labels: severity: warning annotations: summary: "Kamailio 5xx error rate > 0.5/sec" # Dispatcher all backends down - alert: DispatcherAllBackendsDown expr: kamailio_dispatcher_target_up == 0 for: 30s labels: severity: critical annotations: summary: "All dispatcher backends are down" # RTPEngine down - alert: RTPEngineDown expr: up{job="rtpengine"} == 0 for: 1m labels: severity: critical annotations: summary: "RTPEngine {{ $labels.instance }} is down" # High call volume (capacity planning) - alert: HighCallVolume expr: sum(freeswitch_calls_active) > 2000 for: 5m labels: severity: warning annotations: summary: "Platform handling {{ $value }} concurrent calls (threshold: 2000)" # Database replication lag - alert: GaleraReplicationLag expr: mysql_galera_cluster_status{wsrep_local_recv_queue_avg} > 10 for: 5m labels: severity: warning annotations: summary: "Galera replication queue building up"
# prometheus/alerts/voip-platform.yml
groups: - name: voip_platform rules: # All FreeSWITCH servers down - alert: AllMediaServersDown expr: count(freeswitch_calls_active) == 0 for: 1m labels: severity: critical annotations: summary: "All FreeSWITCH media servers are down" # Single FreeSWITCH down - alert: MediaServerDown expr: up{job="freeswitch"} == 0 for: 2m labels: severity: warning annotations: summary: "FreeSWITCH {{ $labels.instance }} is down" # Kamailio high error rate - alert: KamailioHighErrorRate expr: rate(kamailio_tmx_code_total{code=~"5.."}[5m]) > 0.5 for: 5m labels: severity: warning annotations: summary: "Kamailio 5xx error rate > 0.5/sec" # Dispatcher all backends down - alert: DispatcherAllBackendsDown expr: kamailio_dispatcher_target_up == 0 for: 30s labels: severity: critical annotations: summary: "All dispatcher backends are down" # RTPEngine down - alert: RTPEngineDown expr: up{job="rtpengine"} == 0 for: 1m labels: severity: critical annotations: summary: "RTPEngine {{ $labels.instance }} is down" # High call volume (capacity planning) - alert: HighCallVolume expr: sum(freeswitch_calls_active) > 2000 for: 5m labels: severity: warning annotations: summary: "Platform handling {{ $value }} concurrent calls (threshold: 2000)" # Database replication lag - alert: GaleraReplicationLag expr: mysql_galera_cluster_status{wsrep_local_recv_queue_avg} > 10 for: 5m labels: severity: warning annotations: summary: "Galera replication queue building up"
# 1. Set up the new server (base OS + FreeSWITCH -weight: 500;">install from Section 7)
# 2. Configure SIP profile, ACL, dialplan (copy from existing FS node)
# 3. Test locally: fs_cli -x "sofia -weight: 500;">status profile kamailio" # 4. Add to dispatcher database
mysql -u kamailio -pYOUR_DB_PASSWORD kamailio -e \ "INSERT INTO dispatcher (setid, destination, flags, priority, attrs, description) \ VALUES (1, 'sip:NEW_FS_IP:5060', 0, 0, 'weight=50;duid=fs04', 'FreeSWITCH-4 Media');" # 5. Reload dispatcher on Kamailio
kamcmd dispatcher.reload # 6. Verify the new node appears
kamcmd dispatcher.list # 7. Monitor — should -weight: 500;">start receiving calls within seconds
# 1. Set up the new server (base OS + FreeSWITCH -weight: 500;">install from Section 7)
# 2. Configure SIP profile, ACL, dialplan (copy from existing FS node)
# 3. Test locally: fs_cli -x "sofia -weight: 500;">status profile kamailio" # 4. Add to dispatcher database
mysql -u kamailio -pYOUR_DB_PASSWORD kamailio -e \ "INSERT INTO dispatcher (setid, destination, flags, priority, attrs, description) \ VALUES (1, 'sip:NEW_FS_IP:5060', 0, 0, 'weight=50;duid=fs04', 'FreeSWITCH-4 Media');" # 5. Reload dispatcher on Kamailio
kamcmd dispatcher.reload # 6. Verify the new node appears
kamcmd dispatcher.list # 7. Monitor — should -weight: 500;">start receiving calls within seconds
# 1. Set up the new server (base OS + FreeSWITCH -weight: 500;">install from Section 7)
# 2. Configure SIP profile, ACL, dialplan (copy from existing FS node)
# 3. Test locally: fs_cli -x "sofia -weight: 500;">status profile kamailio" # 4. Add to dispatcher database
mysql -u kamailio -pYOUR_DB_PASSWORD kamailio -e \ "INSERT INTO dispatcher (setid, destination, flags, priority, attrs, description) \ VALUES (1, 'sip:NEW_FS_IP:5060', 0, 0, 'weight=50;duid=fs04', 'FreeSWITCH-4 Media');" # 5. Reload dispatcher on Kamailio
kamcmd dispatcher.reload # 6. Verify the new node appears
kamcmd dispatcher.list # 7. Monitor — should -weight: 500;">start receiving calls within seconds
# 1. Drain the node (Section 11)
./drain-freeswitch.sh fs03 YOUR_FS3_IP # 2. Stop FreeSWITCH
ssh YOUR_FS3_IP "-weight: 500;">systemctl -weight: 500;">stop freeswitch" # 3. Remove from dispatcher database
mysql -u kamailio -pYOUR_DB_PASSWORD kamailio -e \ "DELETE FROM dispatcher WHERE destination='sip:YOUR_FS3_IP:5060';" # 4. Reload dispatcher
kamcmd dispatcher.reload
# 1. Drain the node (Section 11)
./drain-freeswitch.sh fs03 YOUR_FS3_IP # 2. Stop FreeSWITCH
ssh YOUR_FS3_IP "-weight: 500;">systemctl -weight: 500;">stop freeswitch" # 3. Remove from dispatcher database
mysql -u kamailio -pYOUR_DB_PASSWORD kamailio -e \ "DELETE FROM dispatcher WHERE destination='sip:YOUR_FS3_IP:5060';" # 4. Reload dispatcher
kamcmd dispatcher.reload
# 1. Drain the node (Section 11)
./drain-freeswitch.sh fs03 YOUR_FS3_IP # 2. Stop FreeSWITCH
ssh YOUR_FS3_IP "-weight: 500;">systemctl -weight: 500;">stop freeswitch" # 3. Remove from dispatcher database
mysql -u kamailio -pYOUR_DB_PASSWORD kamailio -e \ "DELETE FROM dispatcher WHERE destination='sip:YOUR_FS3_IP:5060';" # 4. Reload dispatcher
kamcmd dispatcher.reload
# 1. Renew certificate (certbot handles this automatically)
certbot renew # 2. Reload services (handled by deploy hook, but manual if needed)
-weight: 500;">systemctl reload kamailio
-weight: 500;">systemctl -weight: 500;">restart rtpengine # 3. Verify TLS
openssl s_client -connect YOUR_PUBLIC_VIP:5061 -brief
openssl s_client -connect YOUR_PUBLIC_VIP:8443 -brief
# 1. Renew certificate (certbot handles this automatically)
certbot renew # 2. Reload services (handled by deploy hook, but manual if needed)
-weight: 500;">systemctl reload kamailio
-weight: 500;">systemctl -weight: 500;">restart rtpengine # 3. Verify TLS
openssl s_client -connect YOUR_PUBLIC_VIP:5061 -brief
openssl s_client -connect YOUR_PUBLIC_VIP:8443 -brief
# 1. Renew certificate (certbot handles this automatically)
certbot renew # 2. Reload services (handled by deploy hook, but manual if needed)
-weight: 500;">systemctl reload kamailio
-weight: 500;">systemctl -weight: 500;">restart rtpengine # 3. Verify TLS
openssl s_client -connect YOUR_PUBLIC_VIP:5061 -brief
openssl s_client -connect YOUR_PUBLIC_VIP:8443 -brief
Step 1: Find the Call-ID - From the SIP phone/trunk: check the INVITE headers - From Kamailio logs: grep for the caller/callee number - From Homer: search by phone number or time range Step 2: Trace through Kamailio grep "CALL-ID-HERE" /var/log/kamailio.log Step 3: Check which FreeSWITCH received it - Look for "DISPATCH:" log line with the Call-ID - Note the destination IP Step 4: Trace on FreeSWITCH ssh fs01 "grep 'CALL-ID-HERE' /var/log/freeswitch/freeswitch.log" Step 5: Check RTPEngine - RTPEngine logs show SDP manipulation per Call-ID journalctl -u rtpengine | grep "CALL-ID-HERE"
Step 1: Find the Call-ID - From the SIP phone/trunk: check the INVITE headers - From Kamailio logs: grep for the caller/callee number - From Homer: search by phone number or time range Step 2: Trace through Kamailio grep "CALL-ID-HERE" /var/log/kamailio.log Step 3: Check which FreeSWITCH received it - Look for "DISPATCH:" log line with the Call-ID - Note the destination IP Step 4: Trace on FreeSWITCH ssh fs01 "grep 'CALL-ID-HERE' /var/log/freeswitch/freeswitch.log" Step 5: Check RTPEngine - RTPEngine logs show SDP manipulation per Call-ID journalctl -u rtpengine | grep "CALL-ID-HERE"
Step 1: Find the Call-ID - From the SIP phone/trunk: check the INVITE headers - From Kamailio logs: grep for the caller/callee number - From Homer: search by phone number or time range Step 2: Trace through Kamailio grep "CALL-ID-HERE" /var/log/kamailio.log Step 3: Check which FreeSWITCH received it - Look for "DISPATCH:" log line with the Call-ID - Note the destination IP Step 4: Trace on FreeSWITCH ssh fs01 "grep 'CALL-ID-HERE' /var/log/freeswitch/freeswitch.log" Step 5: Check RTPEngine - RTPEngine logs show SDP manipulation per Call-ID journalctl -u rtpengine | grep "CALL-ID-HERE"
# ---- Kamailio ---- # Watch SIP traffic in real-time
sngrep -d eth0 port 5060 # Enable debug logging temporarily
kamcmd cfg.seti core debug 4
# ... reproduce the issue ...
kamcmd cfg.seti core debug 2 # Restore normal level # Check active dialogs
kamcmd dlg.list # Check dispatcher -weight: 500;">status
kamcmd dispatcher.list # Memory usage
kamcmd core.shmmem # ---- FreeSWITCH ---- # Show active calls
fs_cli -x "show calls" # Show active channels with details
fs_cli -x "show channels" # Trace a specific call (-weight: 500;">enable sofia debug)
fs_cli -x "sofia loglevel all 9"
# ... reproduce the issue ...
fs_cli -x "sofia loglevel all 0" # Restore # SIP trace on the kamailio profile
fs_cli -x "sofia profile kamailio siptrace on"
# ... reproduce ...
fs_cli -x "sofia profile kamailio siptrace off" # Check codec negotiation
fs_cli -x "show channels" | grep -E "codec|read_codec|write_codec" # ---- RTPEngine ---- # List all active sessions
rtpengine-ctl list sessions # Show detailed stats
rtpengine-ctl list totals # Show per-session details (requires Call-ID)
rtpengine-ctl list sessions CALL-ID-HERE
# ---- Kamailio ---- # Watch SIP traffic in real-time
sngrep -d eth0 port 5060 # Enable debug logging temporarily
kamcmd cfg.seti core debug 4
# ... reproduce the issue ...
kamcmd cfg.seti core debug 2 # Restore normal level # Check active dialogs
kamcmd dlg.list # Check dispatcher -weight: 500;">status
kamcmd dispatcher.list # Memory usage
kamcmd core.shmmem # ---- FreeSWITCH ---- # Show active calls
fs_cli -x "show calls" # Show active channels with details
fs_cli -x "show channels" # Trace a specific call (-weight: 500;">enable sofia debug)
fs_cli -x "sofia loglevel all 9"
# ... reproduce the issue ...
fs_cli -x "sofia loglevel all 0" # Restore # SIP trace on the kamailio profile
fs_cli -x "sofia profile kamailio siptrace on"
# ... reproduce ...
fs_cli -x "sofia profile kamailio siptrace off" # Check codec negotiation
fs_cli -x "show channels" | grep -E "codec|read_codec|write_codec" # ---- RTPEngine ---- # List all active sessions
rtpengine-ctl list sessions # Show detailed stats
rtpengine-ctl list totals # Show per-session details (requires Call-ID)
rtpengine-ctl list sessions CALL-ID-HERE
# ---- Kamailio ---- # Watch SIP traffic in real-time
sngrep -d eth0 port 5060 # Enable debug logging temporarily
kamcmd cfg.seti core debug 4
# ... reproduce the issue ...
kamcmd cfg.seti core debug 2 # Restore normal level # Check active dialogs
kamcmd dlg.list # Check dispatcher -weight: 500;">status
kamcmd dispatcher.list # Memory usage
kamcmd core.shmmem # ---- FreeSWITCH ---- # Show active calls
fs_cli -x "show calls" # Show active channels with details
fs_cli -x "show channels" # Trace a specific call (-weight: 500;">enable sofia debug)
fs_cli -x "sofia loglevel all 9"
# ... reproduce the issue ...
fs_cli -x "sofia loglevel all 0" # Restore # SIP trace on the kamailio profile
fs_cli -x "sofia profile kamailio siptrace on"
# ... reproduce ...
fs_cli -x "sofia profile kamailio siptrace off" # Check codec negotiation
fs_cli -x "show channels" | grep -E "codec|read_codec|write_codec" # ---- RTPEngine ---- # List all active sessions
rtpengine-ctl list sessions # Show detailed stats
rtpengine-ctl list totals # Show per-session details (requires Call-ID)
rtpengine-ctl list sessions CALL-ID-HERE
Symptom: Kamailio returns 503 "Service Unavailable" Check 1: Are FreeSWITCH servers marked as active? kamcmd dispatcher.list Look for "FLAGS: AP" (Active + Probing) If "FLAGS: IP" or "FLAGS: DX" — server is detected as down Check 2: Can Kamailio reach FreeSWITCH on port 5060? # From Kamailio server nc -u -z YOUR_FS1_IP 5060 && echo OK || echo FAIL sipsak -s sip:test@YOUR_FS1_IP:5060 Check 3: Is FreeSWITCH actually listening? ssh YOUR_FS1_IP "ss -ulnp | grep 5060" ssh YOUR_FS1_IP "fs_cli -x 'sofia -weight: 500;">status profile kamailio'" Check 4: ACL blocking? ssh YOUR_FS1_IP "fs_cli -x 'reloadacl'" Check /var/log/freeswitch/freeswitch.log for "ACL reject" Fix: If FS is running but dispatcher shows inactive, manually reset: kamcmd dispatcher.set_state a 1 sip:YOUR_FS1_IP:5060
Symptom: Kamailio returns 503 "Service Unavailable" Check 1: Are FreeSWITCH servers marked as active? kamcmd dispatcher.list Look for "FLAGS: AP" (Active + Probing) If "FLAGS: IP" or "FLAGS: DX" — server is detected as down Check 2: Can Kamailio reach FreeSWITCH on port 5060? # From Kamailio server nc -u -z YOUR_FS1_IP 5060 && echo OK || echo FAIL sipsak -s sip:test@YOUR_FS1_IP:5060 Check 3: Is FreeSWITCH actually listening? ssh YOUR_FS1_IP "ss -ulnp | grep 5060" ssh YOUR_FS1_IP "fs_cli -x 'sofia -weight: 500;">status profile kamailio'" Check 4: ACL blocking? ssh YOUR_FS1_IP "fs_cli -x 'reloadacl'" Check /var/log/freeswitch/freeswitch.log for "ACL reject" Fix: If FS is running but dispatcher shows inactive, manually reset: kamcmd dispatcher.set_state a 1 sip:YOUR_FS1_IP:5060
Symptom: Kamailio returns 503 "Service Unavailable" Check 1: Are FreeSWITCH servers marked as active? kamcmd dispatcher.list Look for "FLAGS: AP" (Active + Probing) If "FLAGS: IP" or "FLAGS: DX" — server is detected as down Check 2: Can Kamailio reach FreeSWITCH on port 5060? # From Kamailio server nc -u -z YOUR_FS1_IP 5060 && echo OK || echo FAIL sipsak -s sip:test@YOUR_FS1_IP:5060 Check 3: Is FreeSWITCH actually listening? ssh YOUR_FS1_IP "ss -ulnp | grep 5060" ssh YOUR_FS1_IP "fs_cli -x 'sofia -weight: 500;">status profile kamailio'" Check 4: ACL blocking? ssh YOUR_FS1_IP "fs_cli -x 'reloadacl'" Check /var/log/freeswitch/freeswitch.log for "ACL reject" Fix: If FS is running but dispatcher shows inactive, manually reset: kamcmd dispatcher.set_state a 1 sip:YOUR_FS1_IP:5060
Symptom: Call connects but audio only flows in one direction (or no audio) Check 1: Is RTPEngine running and reachable? echo 'd7:command4:pinge' | nc -u YOUR_RTP1_PRIVATE 2223 Expected: 'd6:result4:ponge' Check 2: Are the RTPEngine interfaces correct? rtpengine-ctl list sessions Verify the session shows correct internal and external IPs Check 3: SDP analysis — is RTPEngine rewriting SDPs correctly? sngrep on Kamailio — compare SDP in INVITE before and after rtpengine_offer() The c= line should change from external IP to internal IP (towards FS) The c= line in 200 OK should change from FS IP to external IP (towards trunk) Check 4: Firewall — are RTP ports open? On RTPEngine server: ufw -weight: 500;">status | grep 20000 Must allow 20000-40000/udp from anywhere (external endpoints) Check 5: Are there asymmetric routes? RTP must flow: External ↔ RTPEngine ↔ FreeSWITCH If any hop has incorrect routing, media breaks Common fix: Verify interface= lines in rtpengine.conf interface = internal/PRIVATE_IP ← Must be reachable from FreeSWITCH interface = external/PRIVATE_IP!PUBLIC_IP ← PUBLIC_IP must be routable from internet
Symptom: Call connects but audio only flows in one direction (or no audio) Check 1: Is RTPEngine running and reachable? echo 'd7:command4:pinge' | nc -u YOUR_RTP1_PRIVATE 2223 Expected: 'd6:result4:ponge' Check 2: Are the RTPEngine interfaces correct? rtpengine-ctl list sessions Verify the session shows correct internal and external IPs Check 3: SDP analysis — is RTPEngine rewriting SDPs correctly? sngrep on Kamailio — compare SDP in INVITE before and after rtpengine_offer() The c= line should change from external IP to internal IP (towards FS) The c= line in 200 OK should change from FS IP to external IP (towards trunk) Check 4: Firewall — are RTP ports open? On RTPEngine server: ufw -weight: 500;">status | grep 20000 Must allow 20000-40000/udp from anywhere (external endpoints) Check 5: Are there asymmetric routes? RTP must flow: External ↔ RTPEngine ↔ FreeSWITCH If any hop has incorrect routing, media breaks Common fix: Verify interface= lines in rtpengine.conf interface = internal/PRIVATE_IP ← Must be reachable from FreeSWITCH interface = external/PRIVATE_IP!PUBLIC_IP ← PUBLIC_IP must be routable from internet
Symptom: Call connects but audio only flows in one direction (or no audio) Check 1: Is RTPEngine running and reachable? echo 'd7:command4:pinge' | nc -u YOUR_RTP1_PRIVATE 2223 Expected: 'd6:result4:ponge' Check 2: Are the RTPEngine interfaces correct? rtpengine-ctl list sessions Verify the session shows correct internal and external IPs Check 3: SDP analysis — is RTPEngine rewriting SDPs correctly? sngrep on Kamailio — compare SDP in INVITE before and after rtpengine_offer() The c= line should change from external IP to internal IP (towards FS) The c= line in 200 OK should change from FS IP to external IP (towards trunk) Check 4: Firewall — are RTP ports open? On RTPEngine server: ufw -weight: 500;">status | grep 20000 Must allow 20000-40000/udp from anywhere (external endpoints) Check 5: Are there asymmetric routes? RTP must flow: External ↔ RTPEngine ↔ FreeSWITCH If any hop has incorrect routing, media breaks Common fix: Verify interface= lines in rtpengine.conf interface = internal/PRIVATE_IP ← Must be reachable from FreeSWITCH interface = external/PRIVATE_IP!PUBLIC_IP ← PUBLIC_IP must be routable from internet
Symptom: Registrations fail or loop infinitely Check: Kamailio is trying to proxy REGISTER to FreeSWITCH, FreeSWITCH is sending it back to Kamailio Fix: Ensure Kamailio handles registrations locally (save to location table) OR ensure FreeSWITCH does not relay registrations back In kamailio.cfg, the REGISTER handler should either: save("location") — store locally OR forward to FS and NOT relay back In FreeSWITCH, ensure the kamailio profile does NOT have: <param name="accept-blind-reg" value="true"/>
Symptom: Registrations fail or loop infinitely Check: Kamailio is trying to proxy REGISTER to FreeSWITCH, FreeSWITCH is sending it back to Kamailio Fix: Ensure Kamailio handles registrations locally (save to location table) OR ensure FreeSWITCH does not relay registrations back In kamailio.cfg, the REGISTER handler should either: save("location") — store locally OR forward to FS and NOT relay back In FreeSWITCH, ensure the kamailio profile does NOT have: <param name="accept-blind-reg" value="true"/>
Symptom: Registrations fail or loop infinitely Check: Kamailio is trying to proxy REGISTER to FreeSWITCH, FreeSWITCH is sending it back to Kamailio Fix: Ensure Kamailio handles registrations locally (save to location table) OR ensure FreeSWITCH does not relay registrations back In kamailio.cfg, the REGISTER handler should either: save("location") — store locally OR forward to FS and NOT relay back In FreeSWITCH, ensure the kamailio profile does NOT have: <param name="accept-blind-reg" value="true"/>
Symptom: VIP stays on failed node or does not move to standby Check 1: Is Keepalived running on both nodes? -weight: 500;">systemctl -weight: 500;">status keepalived Check 2: VRRP communication tcpdump -i eth0 vrrp Both nodes should be sending VRRP advertisements Check 3: Virtual router ID conflict? Ensure virtual_router_id is the same on both nodes Ensure no other Keepalived instance on the network uses the same ID Check 4: Check health script /etc/keepalived/check_kamailio.sh echo $? # Should be 0 (healthy) or 1 (unhealthy) Check 5: IP forwarding sysctl net.ipv4.ip_nonlocal_bind # Must be 1 for the backup node to send SIP from the VIP echo "net.ipv4.ip_nonlocal_bind = 1" >> /etc/sysctl.d/90-voip.conf sysctl -p /etc/sysctl.d/90-voip.conf
Symptom: VIP stays on failed node or does not move to standby Check 1: Is Keepalived running on both nodes? -weight: 500;">systemctl -weight: 500;">status keepalived Check 2: VRRP communication tcpdump -i eth0 vrrp Both nodes should be sending VRRP advertisements Check 3: Virtual router ID conflict? Ensure virtual_router_id is the same on both nodes Ensure no other Keepalived instance on the network uses the same ID Check 4: Check health script /etc/keepalived/check_kamailio.sh echo $? # Should be 0 (healthy) or 1 (unhealthy) Check 5: IP forwarding sysctl net.ipv4.ip_nonlocal_bind # Must be 1 for the backup node to send SIP from the VIP echo "net.ipv4.ip_nonlocal_bind = 1" >> /etc/sysctl.d/90-voip.conf sysctl -p /etc/sysctl.d/90-voip.conf
Symptom: VIP stays on failed node or does not move to standby Check 1: Is Keepalived running on both nodes? -weight: 500;">systemctl -weight: 500;">status keepalived Check 2: VRRP communication tcpdump -i eth0 vrrp Both nodes should be sending VRRP advertisements Check 3: Virtual router ID conflict? Ensure virtual_router_id is the same on both nodes Ensure no other Keepalived instance on the network uses the same ID Check 4: Check health script /etc/keepalived/check_kamailio.sh echo $? # Should be 0 (healthy) or 1 (unhealthy) Check 5: IP forwarding sysctl net.ipv4.ip_nonlocal_bind # Must be 1 for the backup node to send SIP from the VIP echo "net.ipv4.ip_nonlocal_bind = 1" >> /etc/sysctl.d/90-voip.conf sysctl -p /etc/sysctl.d/90-voip.conf
Kamailio (signaling only): Max CPS = CPU_cores × 1000 (approximately) 4-core = ~4,000 calls/sec setup rate Memory: ~1 KB per active dialog + ~0.5 KB per registration RTPEngine (media relay): Max streams = CPU_cores × 500 (G.711, no transcoding) 8-core = ~4,000 RTP streams = ~2,000 concurrent calls With transcoding: divide by 3-5x Bandwidth: 87 kbps × concurrent_calls × 2 (bidirectional) FreeSWITCH (media processing): G.711 (no transcoding): CPU_cores × 300 With recording: CPU_cores × 200 With transcoding: CPU_cores × 100 With conferencing: CPU_cores × 50 (mixing is expensive) Memory: ~2 MB per active call (+ recording buffer) Disk I/O: ~100 KB/s per recorded call (G.711) Database: 1 registration = 1 write + periodic refreshes 1 call = ~5-10 queries (setup + routing + CDR) 10,000 concurrent calls ≈ 500-1,000 queries/sec
Kamailio (signaling only): Max CPS = CPU_cores × 1000 (approximately) 4-core = ~4,000 calls/sec setup rate Memory: ~1 KB per active dialog + ~0.5 KB per registration RTPEngine (media relay): Max streams = CPU_cores × 500 (G.711, no transcoding) 8-core = ~4,000 RTP streams = ~2,000 concurrent calls With transcoding: divide by 3-5x Bandwidth: 87 kbps × concurrent_calls × 2 (bidirectional) FreeSWITCH (media processing): G.711 (no transcoding): CPU_cores × 300 With recording: CPU_cores × 200 With transcoding: CPU_cores × 100 With conferencing: CPU_cores × 50 (mixing is expensive) Memory: ~2 MB per active call (+ recording buffer) Disk I/O: ~100 KB/s per recorded call (G.711) Database: 1 registration = 1 write + periodic refreshes 1 call = ~5-10 queries (setup + routing + CDR) 10,000 concurrent calls ≈ 500-1,000 queries/sec
Kamailio (signaling only): Max CPS = CPU_cores × 1000 (approximately) 4-core = ~4,000 calls/sec setup rate Memory: ~1 KB per active dialog + ~0.5 KB per registration RTPEngine (media relay): Max streams = CPU_cores × 500 (G.711, no transcoding) 8-core = ~4,000 RTP streams = ~2,000 concurrent calls With transcoding: divide by 3-5x Bandwidth: 87 kbps × concurrent_calls × 2 (bidirectional) FreeSWITCH (media processing): G.711 (no transcoding): CPU_cores × 300 With recording: CPU_cores × 200 With transcoding: CPU_cores × 100 With conferencing: CPU_cores × 50 (mixing is expensive) Memory: ~2 MB per active call (+ recording buffer) Disk I/O: ~100 KB/s per recorded call (G.711) Database: 1 registration = 1 write + periodic refreshes 1 call = ~5-10 queries (setup + routing + CDR) 10,000 concurrent calls ≈ 500-1,000 queries/sec - Introduction — Why Combine Kamailio + FreeSWITCH
- Architecture Overview
- Prerequisites & Server Planning
- Kamailio SBC Configuration
- Dispatcher — Load Balancing FreeSWITCH
- RTPEngine — Media Relay
- FreeSWITCH Media Server Configuration
- Database-Driven Routing
- WebRTC Gateway
- High Availability — Kamailio
- High Availability — FreeSWITCH
- Geographic Distribution
- Monitoring & Operations
- Troubleshooting - Kamailio handles everything at the signaling layer: SIP routing, load balancing, authentication, rate limiting, topology hiding, NAT fixing, and DDoS protection.
- FreeSWITCH handles everything at the media layer: IVR menus, call queues, conference bridges, voicemail, recording, codec transcoding, and call control logic.
- RTPEngine sits between them handling media relay: NAT traversal for RTP, SRTP/DTLS bridging for WebRTC, and codec transcoding when needed. - 10,000+ concurrent calls — scale FreeSWITCH horizontally (add more servers)
- Zero-downtime upgrades — drain a FreeSWITCH node, -weight: 500;">upgrade, re-add to the pool
- Geographic distribution — Kamailio clusters in multiple data centers
- No single point of failure — every component is redundant
- WebRTC support — Kamailio terminates WSS, RTPEngine bridges DTLS↔RTP
- DDoS resilience — Kamailio's pike module and rate limiting protect backend servers
- Topology hiding — external parties never see your internal FreeSWITCH IPs - Twilio — Kamailio for SIP routing, custom media servers
- Vonage/Nexmo — Kamailio + FreeSWITCH at scale
- Plivo — Kamailio + FreeSWITCH (open about their stack)
- Large call centers — 500+ agents typically need this architecture
- Wholesale VoIP carriers — millions of minutes per month - Internal network: All components on same VLAN or low-latency private network (<1ms RTT)
- External network: Kamailio and RTPEngine need public IPs (or 1:1 NAT)
- Firewall: Only Kamailio and RTPEngine exposed externally; FreeSWITCH and DB are internal only
- Bandwidth: RTP uses ~87 kbps per call (G.711), so 1 Gbps supports ~10,000 concurrent call streams - Existing calls on that instance lose media (unavoidable — RTP state is local)
- New calls are automatically routed to the surviving instance
- Kamailio detects the failure via the control socket timeout - Dynamic routing — change DID→destination mapping without restarting anything
- Multi-tenant — domain-based isolation of users and routes
- Shared user directory — FreeSWITCH instances share the same user/extension database
- Centralized CDR — all call records in one place regardless of which FreeSWITCH handled the call
- Runtime changes — add/-weight: 500;">remove servers, DIDs, routes via database without restarts - Open https://webrtc.YOUR_DOMAIN/ in Chrome or Firefox
- Enter your SIP credentials and click Register
- Status should change to "Registered"
- Enter a number (e.g., 9196 for echo test) and click Call
- Verify audio flows both directions - Multiple FreeSWITCH instances run simultaneously (not standby — all active)
- Kamailio's dispatcher distributes calls across the pool
- If one FreeSWITCH fails, only its active calls are lost (not the entire platform)
- New calls are automatically routed to surviving instances
- The more instances in the pool, the smaller the blast radius of any single failure - Active calls on a failed node are lost. The media streams are in that instance's memory and cannot be transferred.
- Conference bridges on a failed node are terminated. All participants must rejoin.
- Voicemail sessions in progress are lost. The caller must call back. - Client resolves _sip._udp.YOUR_DOMAIN and gets 2 records with priority 10
- Client distributes requests based on weight: 60% to EU, 40% to US
- If the priority-10 servers fail, client falls back to priority-20 servers
- SIP INVITE includes a Route header for the selected server - Galera writes are synchronous — a write in EU must be acknowledged by US nodes before committing
- Cross-Atlantic latency is typically 80-120ms RTT
- This adds ~100ms to every database write (registration, CDR insert)
- For very high write volumes, consider: Asynchronous replication (standard MySQL replication) for CDRs Local caching in Kamailio htables for frequently-read data Read/write splitting: reads from local node, writes to any node
- Asynchronous replication (standard MySQL replication) for CDRs
- Local caching in Kamailio htables for frequently-read data
- Read/write splitting: reads from local node, writes to any node - Asynchronous replication (standard MySQL replication) for CDRs
- Local caching in Kamailio htables for frequently-read data
- Read/write splitting: reads from local node, writes to any node - RTPEngine should be in the same DC as the caller (or as close as possible)
- FreeSWITCH should be in the same DC as the RTPEngine it works with
- Cross-DC media relay adds 80-120ms of latency each way — noticeable in voice calls
- For calls between users in different DCs, the media should anchor at one DC (caller's preferred) - Separation of concerns is the fundamental design principle: Kamailio for signaling, RTPEngine for media relay, FreeSWITCH for call logic
- Dispatcher is the heart of the load balancing: understand algorithms, probing, and failover
- RTPEngine solves NAT, WebRTC bridging, and topology hiding for media — it is essential in any production deployment
- HA comes from pool architecture for FreeSWITCH (not active/standby) and VIP failover for Kamailio
- Monitor everything with Prometheus + Grafana + Homer — you cannot fix what you cannot see
- Practice draining and failover before you need it in production