$ -weight: 500;">git clone https://github.com/kelsey-aai/voice-agent-agora
cd voice-agent-agora python -m venv .venv && source .venv/bin/activate
-weight: 500;">pip -weight: 500;">install -r requirements.txt
-weight: 500;">git clone https://github.com/kelsey-aai/voice-agent-agora
cd voice-agent-agora python -m venv .venv && source .venv/bin/activate
-weight: 500;">pip -weight: 500;">install -r requirements.txt
-weight: 500;">git clone https://github.com/kelsey-aai/voice-agent-agora
cd voice-agent-agora python -m venv .venv && source .venv/bin/activate
-weight: 500;">pip -weight: 500;">install -r requirements.txt
cp .env.example .env
cp .env.example .env
cp .env.example .env
ASSEMBLYAI_API_KEY=your_assemblyai_key
AGORA_APP_ID=your_agora_app_id
AGORA_APP_CERTIFICATE=your_agora_app_certificate
AGORA_CHANNEL=voice-agent-demo
AGORA_BOT_UID=9999
ASSEMBLYAI_API_KEY=your_assemblyai_key
AGORA_APP_ID=your_agora_app_id
AGORA_APP_CERTIFICATE=your_agora_app_certificate
AGORA_CHANNEL=voice-agent-demo
AGORA_BOT_UID=9999
ASSEMBLYAI_API_KEY=your_assemblyai_key
AGORA_APP_ID=your_agora_app_id
AGORA_APP_CERTIFICATE=your_agora_app_certificate
AGORA_CHANNEL=voice-agent-demo
AGORA_BOT_UID=9999
python bot.py --channel voice-agent-demo
python bot.py --channel voice-agent-demo
python bot.py --channel voice-agent-demo
URL = "wss://agents.assemblyai.com/v1/ws"
headers = {"Authorization": f"Bearer {API_KEY}"} async with websockets.connect(URL, additional_headers=headers) as ws: await ws.send(json.dumps({ "type": "session.-weight: 500;">update", "session": { "system_prompt": "You are a friendly voice assistant.", "greeting": "Hi — I just joined the call.", "input": {"format": {"encoding": "audio/pcm"}}, "output": {"voice": "ivy", "format": {"encoding": "audio/pcm"}}, }, })) \\
URL = "wss://agents.assemblyai.com/v1/ws"
headers = {"Authorization": f"Bearer {API_KEY}"} async with websockets.connect(URL, additional_headers=headers) as ws: await ws.send(json.dumps({ "type": "session.-weight: 500;">update", "session": { "system_prompt": "You are a friendly voice assistant.", "greeting": "Hi — I just joined the call.", "input": {"format": {"encoding": "audio/pcm"}}, "output": {"voice": "ivy", "format": {"encoding": "audio/pcm"}}, }, })) \\
URL = "wss://agents.assemblyai.com/v1/ws"
headers = {"Authorization": f"Bearer {API_KEY}"} async with websockets.connect(URL, additional_headers=headers) as ws: await ws.send(json.dumps({ "type": "session.-weight: 500;">update", "session": { "system_prompt": "You are a friendly voice assistant.", "greeting": "Hi — I just joined the call.", "input": {"format": {"encoding": "audio/pcm"}}, "output": {"voice": "ivy", "format": {"encoding": "audio/pcm"}}, }, })) \\
def on_playback_audio_frame_before_mixing(self, channel_id, uid, frame): pcm16 = bytes(frame.buffer) # 16 kHz PCM16 pcm24 = resample_pcm16(pcm16, 16_000, 24_000) loop.call_soon_threadsafe(agent.inbound_audio.put_nowait, pcm24) return 0
def on_playback_audio_frame_before_mixing(self, channel_id, uid, frame): pcm16 = bytes(frame.buffer) # 16 kHz PCM16 pcm24 = resample_pcm16(pcm16, 16_000, 24_000) loop.call_soon_threadsafe(agent.inbound_audio.put_nowait, pcm24) return 0
def on_playback_audio_frame_before_mixing(self, channel_id, uid, frame): pcm16 = bytes(frame.buffer) # 16 kHz PCM16 pcm24 = resample_pcm16(pcm16, 16_000, 24_000) loop.call_soon_threadsafe(agent.inbound_audio.put_nowait, pcm24) return 0
chunk = await mic_queue.get()
await ws.send(json.dumps({ "type": "input.audio", "audio": base64.b64encode(chunk).decode(),
}))
chunk = await mic_queue.get()
await ws.send(json.dumps({ "type": "input.audio", "audio": base64.b64encode(chunk).decode(),
}))
chunk = await mic_queue.get()
await ws.send(json.dumps({ "type": "input.audio", "audio": base64.b64encode(chunk).decode(),
}))
elif t == "reply.audio": pcm = base64.b64decode(event["data"]) await self.outbound_audio.put(pcm) pcm16 = resample_pcm16(pcm24, 24_000, 16_000)
self.pcm_sender.send_audio_pcm_data( pcm16, 0, len(pcm16)//2, 2, 1, 16_000,
)
elif t == "reply.audio": pcm = base64.b64decode(event["data"]) await self.outbound_audio.put(pcm) pcm16 = resample_pcm16(pcm24, 24_000, 16_000)
self.pcm_sender.send_audio_pcm_data( pcm16, 0, len(pcm16)//2, 2, 1, 16_000,
)
elif t == "reply.audio": pcm = base64.b64decode(event["data"]) await self.outbound_audio.put(pcm) pcm16 = resample_pcm16(pcm24, 24_000, 16_000)
self.pcm_sender.send_audio_pcm_data( pcm16, 0, len(pcm16)//2, 2, 1, 16_000,
)
elif t == "reply.done" and event.get("-weight: 500;">status") == "interrupted": while not outbound_audio.empty(): outbound_audio.get_nowait()
elif t == "reply.done" and event.get("-weight: 500;">status") == "interrupted": while not outbound_audio.empty(): outbound_audio.get_nowait()
elif t == "reply.done" and event.get("-weight: 500;">status") == "interrupted": while not outbound_audio.empty(): outbound_audio.get_nowait()
"output": {"voice": "james"} # conversational US male
"output": {"voice": "sophie"} # clear UK female
"output": {"voice": "diego"} # Latin American Spanish
"output": {"voice": "arjun"} # Hindi/Hinglish
"output": {"voice": "james"} # conversational US male
"output": {"voice": "sophie"} # clear UK female
"output": {"voice": "diego"} # Latin American Spanish
"output": {"voice": "arjun"} # Hindi/Hinglish
"output": {"voice": "james"} # conversational US male
"output": {"voice": "sophie"} # clear UK female
"output": {"voice": "diego"} # Latin American Spanish
"output": {"voice": "arjun"} # Hindi/Hinglish
"input": { "turn_detection": { "vad_threshold": 0.5, "min_silence": 600, "max_silence": 1500, "interrupt_response": True, }
}
"input": { "turn_detection": { "vad_threshold": 0.5, "min_silence": 600, "max_silence": 1500, "interrupt_response": True, }
}
"input": { "turn_detection": { "vad_threshold": 0.5, "min_silence": 600, "max_silence": 1500, "interrupt_response": True, }
}
"input": {"keyterms": ["AssemblyAI", "Agora", "Universal-3"]}
"input": {"keyterms": ["AssemblyAI", "Agora", "Universal-3"]}
"input": {"keyterms": ["AssemblyAI", "Agora", "Universal-3"]} - Python 3.10+
- An Agora project with an App ID (and App Certificate if enabled)
- An AssemblyAI API key — free tier available
- Linux or macOS (the Agora native server SDK does not officially ship Windows wheels; use WSL2 or a Linux container on Windows) - agora-python-server-sdk is a beta wrapper around Agora's native C++ SDK. Class layouts have moved between minor versions. We pin 2.2.4 and document the exact API surface the bot uses.
- Agora's recommended path for new voice-agent projects is the Conversational AI Engine — a hosted REST -weight: 500;">service. Use this tutorial when you want the full AI pipeline on AssemblyAI's Voice Agent API.