βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ β FRONTEND / API GATEWAY β βββββββββββββ¬βββββββββββββββββββ¬ββββββββββββββββββββ¬ββββββββββββββ β β β βββββββββΌβββββββ ββββββββββΌββββββββ βββββββββΌβββββββββββ β PERSONALI- β β MULTI-AGENT β β AGENTIC RAG β β ZATION β β ORCHESTRATOR β β (Customer Q&A) β β ENGINE β β (Gemini 1.5) β β (Gemini + β β (Cloud Run) β β (Vertex AI β β Vertex Search) β βββββββββ¬βββββββ β Reasoning) β βββββββββ¬βββββββββββ β ββββββββββ¬ββββββββ β β β β ββββββββββββββββββββΌβββββββββββββββββββ β ββββββββββββββββββΌβββββββββββββββββ β GOOGLE CLOUD PUB/SUB β β (Shared Event Spine) β βββββ¬βββββββββββ¬βββββββββββ¬βββββββββ β β β βββββββββΌβββ βββββββΌβββββ ββββΌβββββββββββββ β Dataflow β βSpecialistβ β Vertex AI β β Streaming β β Agents β β Search Index β βββββββββ¬βββ βββββββ¬βββββ ββββ¬βββββββββββββ β β β βββββΌβββββββββββΌβββββββββββΌββββ β BIGQUERY β β (Shared Operational Store) β βββββββββββββββββββββββββββββββ CODE_BLOCK: βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ β FRONTEND / API GATEWAY β βββββββββββββ¬βββββββββββββββββββ¬ββββββββββββββββββββ¬ββββββββββββββ β β β βββββββββΌβββββββ ββββββββββΌββββββββ βββββββββΌβββββββββββ β PERSONALI- β β MULTI-AGENT β β AGENTIC RAG β β ZATION β β ORCHESTRATOR β β (Customer Q&A) β β ENGINE β β (Gemini 1.5) β β (Gemini + β β (Cloud Run) β β (Vertex AI β β Vertex Search) β βββββββββ¬βββββββ β Reasoning) β βββββββββ¬βββββββββββ β ββββββββββ¬ββββββββ β β β β ββββββββββββββββββββΌβββββββββββββββββββ β ββββββββββββββββββΌβββββββββββββββββ β GOOGLE CLOUD PUB/SUB β β (Shared Event Spine) β βββββ¬βββββββββββ¬βββββββββββ¬βββββββββ β β β βββββββββΌβββ βββββββΌβββββ ββββΌβββββββββββββ β Dataflow β βSpecialistβ β Vertex AI β β Streaming β β Agents β β Search Index β βββββββββ¬βββ βββββββ¬βββββ ββββ¬βββββββββββββ β β β βββββΌβββββββββββΌβββββββββββΌββββ β BIGQUERY β β (Shared Operational Store) β βββββββββββββββββββββββββββββββ CODE_BLOCK: βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ β FRONTEND / API GATEWAY β βββββββββββββ¬βββββββββββββββββββ¬ββββββββββββββββββββ¬ββββββββββββββ β β β βββββββββΌβββββββ ββββββββββΌββββββββ βββββββββΌβββββββββββ β PERSONALI- β β MULTI-AGENT β β AGENTIC RAG β β ZATION β β ORCHESTRATOR β β (Customer Q&A) β β ENGINE β β (Gemini 1.5) β β (Gemini + β β (Cloud Run) β β (Vertex AI β β Vertex Search) β βββββββββ¬βββββββ β Reasoning) β βββββββββ¬βββββββββββ β ββββββββββ¬ββββββββ β β β β ββββββββββββββββββββΌβββββββββββββββββββ β ββββββββββββββββββΌβββββββββββββββββ β GOOGLE CLOUD PUB/SUB β β (Shared Event Spine) β βββββ¬βββββββββββ¬βββββββββββ¬βββββββββ β β β βββββββββΌβββ βββββββΌβββββ ββββΌβββββββββββββ β Dataflow β βSpecialistβ β Vertex AI β β Streaming β β Agents β β Search Index β βββββββββ¬βββ βββββββ¬βββββ ββββ¬βββββββββββββ β β β βββββΌβββββββββββΌβββββββββββΌββββ β BIGQUERY β β (Shared Operational Store) β βββββββββββββββββββββββββββββββ CODE_BLOCK: { "event_type": "CART_ADD", "user_id": "u_8821", "sku_id": "SKU-4471", "session_id": "s_992abc", "ts": "2026-03-22T14:03:11Z", "context": { "device": "mobile", "location": "Atlanta, GA" } } CODE_BLOCK: { "event_type": "CART_ADD", "user_id": "u_8821", "sku_id": "SKU-4471", "session_id": "s_992abc", "ts": "2026-03-22T14:03:11Z", "context": { "device": "mobile", "location": "Atlanta, GA" } } CODE_BLOCK: { "event_type": "CART_ADD", "user_id": "u_8821", "sku_id": "SKU-4471", "session_id": "s_992abc", "ts": "2026-03-22T14:03:11Z", "context": { "device": "mobile", "location": "Atlanta, GA" } } CODE_BLOCK: feature_store_client.read_feature_values( entity_type="user", entity_ids=[user_id], feature_selector={ "id_matcher": { "ids": ["purchase_history", "session_clicks", "device_type", "location"] } } ) CODE_BLOCK: feature_store_client.read_feature_values( entity_type="user", entity_ids=[user_id], feature_selector={ "id_matcher": { "ids": ["purchase_history", "session_clicks", "device_type", "location"] } } ) CODE_BLOCK: feature_store_client.read_feature_values( entity_type="user", entity_ids=[user_id], feature_selector={ "id_matcher": { "ids": ["purchase_history", "session_clicks", "device_type", "location"] } } ) CODE_BLOCK: response = index_endpoint.find_neighbors( deployed_index_id="retail_item_embeddings", queries=[user_context_vector], num_neighbors=50 ) CODE_BLOCK: response = index_endpoint.find_neighbors( deployed_index_id="retail_item_embeddings", queries=[user_context_vector], num_neighbors=50 ) CODE_BLOCK: response = index_endpoint.find_neighbors( deployed_index_id="retail_item_embeddings", queries=[user_context_vector], num_neighbors=50 ) CODE_BLOCK: Operator / System Trigger β βΌ βββββββββββββββββββββββββββββββββββ β ORCHESTRATOR AGENT β β Gemini 1.5 Pro β β Vertex AI Reasoning Engine β β
- Decomposes tasks β β
- Routes to specialists β β
- Synthesizes final response β ββββββ¬βββββββββββ¬βββββββββββ¬βββββββ β Pub/Sub β β βΌ βΌ βΌ βββββββββββ ββββββββββ ββββββββββββ ββββββββββββ βInventoryβ βPricing β βSupplier β βCustomer β βAgent β βAgent β βAgent β βAgent β βBigQuery β βBQ ML β βVertex AI β βAgentic β β β β β βSearch β βRAG βββββββ Layer 3 βββββββββββ ββββββββββ ββββββββββββ ββββββββββββ CODE_BLOCK: Operator / System Trigger β βΌ βββββββββββββββββββββββββββββββββββ β ORCHESTRATOR AGENT β β Gemini 1.5 Pro β β Vertex AI Reasoning Engine β β
- Decomposes tasks β β
- Routes to specialists β β
- Synthesizes final response β ββββββ¬βββββββββββ¬βββββββββββ¬βββββββ β Pub/Sub β β βΌ βΌ βΌ βββββββββββ ββββββββββ ββββββββββββ ββββββββββββ βInventoryβ βPricing β βSupplier β βCustomer β βAgent β βAgent β βAgent β βAgent β βBigQuery β βBQ ML β βVertex AI β βAgentic β β β β β βSearch β βRAG βββββββ Layer 3 βββββββββββ ββββββββββ ββββββββββββ ββββββββββββ CODE_BLOCK: Operator / System Trigger β βΌ βββββββββββββββββββββββββββββββββββ β ORCHESTRATOR AGENT β β Gemini 1.5 Pro β β Vertex AI Reasoning Engine β β
- Decomposes tasks β β
- Routes to specialists β β
- Synthesizes final response β ββββββ¬βββββββββββ¬βββββββββββ¬βββββββ β Pub/Sub β β βΌ βΌ βΌ βββββββββββ ββββββββββ ββββββββββββ ββββββββββββ βInventoryβ βPricing β βSupplier β βCustomer β βAgent β βAgent β βAgent β βAgent β βBigQuery β βBQ ML β βVertex AI β βAgentic β β β β β βSearch β βRAG βββββββ Layer 3 βββββββββββ ββββββββββ ββββββββββββ ββββββββββββ COMMAND_BLOCK: tasks = orchestrator.decompose(query) # β [ # {"agent": "inventory", "task": "get_stock_level", "sku": "SKU-991"}, # {"agent": "supplier", "task": "get_eta_and_cost", "sku": "SKU-991"}, # {"agent": "pricing", "task": "get_reorder_cost", "sku": "SKU-991"} # ] COMMAND_BLOCK: tasks = orchestrator.decompose(query) # β [ # {"agent": "inventory", "task": "get_stock_level", "sku": "SKU-991"}, # {"agent": "supplier", "task": "get_eta_and_cost", "sku": "SKU-991"}, # {"agent": "pricing", "task": "get_reorder_cost", "sku": "SKU-991"} # ] COMMAND_BLOCK: tasks = orchestrator.decompose(query) # β [ # {"agent": "inventory", "task": "get_stock_level", "sku": "SKU-991"}, # {"agent": "supplier", "task": "get_eta_and_cost", "sku": "SKU-991"}, # {"agent": "pricing", "task": "get_reorder_cost", "sku": "SKU-991"} # ] COMMAND_BLOCK: # Inventory Agent stock = bq_client.query(""" SELECT units_available FROM inventory_snapshot WHERE sku_id = 'SKU-991' AND store_id = 'DC-ATL' """).result() # Pricing Agent (BigQuery ML) reorder_cost = bq_client.query(""" SELECT ML.PREDICT(MODEL `retail.pricing_model`, (SELECT * FROM pricing_signals WHERE sku_id = 'SKU-991')) """).result() COMMAND_BLOCK: # Inventory Agent stock = bq_client.query(""" SELECT units_available FROM inventory_snapshot WHERE sku_id = 'SKU-991' AND store_id = 'DC-ATL' """).result() # Pricing Agent (BigQuery ML) reorder_cost = bq_client.query(""" SELECT ML.PREDICT(MODEL `retail.pricing_model`, (SELECT * FROM pricing_signals WHERE sku_id = 'SKU-991')) """).result() COMMAND_BLOCK: # Inventory Agent stock = bq_client.query(""" SELECT units_available FROM inventory_snapshot WHERE sku_id = 'SKU-991' AND store_id = 'DC-ATL' """).result() # Pricing Agent (BigQuery ML) reorder_cost = bq_client.query(""" SELECT ML.PREDICT(MODEL `retail.pricing_model`, (SELECT * FROM pricing_signals WHERE sku_id = 'SKU-991')) """).result() CODE_BLOCK: Orchestrator β "Reorder 50 units from Vendor A at $4.20/unit, ETA 3 days. Current stock: 8 units (below reorder threshold of 15)." β
CODE_BLOCK: Orchestrator β "Reorder 50 units from Vendor A at $4.20/unit, ETA 3 days. Current stock: 8 units (below reorder threshold of 15)." β
CODE_BLOCK: Orchestrator β "Reorder 50 units from Vendor A at $4.20/unit, ETA 3 days. Current stock: 8 units (below reorder threshold of 15)." β
CODE_BLOCK: CREATE TABLE retail.agent_decision_log ( request_id STRING, ts TIMESTAMP, agent_called STRING, tools_used ARRAY<STRING>, input_payload JSON, output_payload JSON, latency_ms INT64, confidence FLOAT64 ); CODE_BLOCK: CREATE TABLE retail.agent_decision_log ( request_id STRING, ts TIMESTAMP, agent_called STRING, tools_used ARRAY<STRING>, input_payload JSON, output_payload JSON, latency_ms INT64, confidence FLOAT64 ); CODE_BLOCK: CREATE TABLE retail.agent_decision_log ( request_id STRING, ts TIMESTAMP, agent_called STRING, tools_used ARRAY<STRING>, input_payload JSON, output_payload JSON, latency_ms INT64, confidence FLOAT64 ); COMMAND_BLOCK: tools = [ VertexAISearchTool(index="retail_policy_index"), VertexAISearchTool(index="retail_product_index"), BigQueryTool(query_template=INVENTORY_QUERY) # live call, not indexed ] COMMAND_BLOCK: tools = [ VertexAISearchTool(index="retail_policy_index"), VertexAISearchTool(index="retail_product_index"), BigQueryTool(query_template=INVENTORY_QUERY) # live call, not indexed ] COMMAND_BLOCK: tools = [ VertexAISearchTool(index="retail_policy_index"), VertexAISearchTool(index="retail_product_index"), BigQueryTool(query_template=INVENTORY_QUERY) # live call, not indexed ] CODE_BLOCK: Agent Plan: Sub-query A β Policy Index: "online purchase battery return policy in-store" Sub-query B β BigQuery Tool: SELECT units_available FROM inventory_snapshot WHERE sku_id='SKU-4471' AND store='GA-CUMMING' CODE_BLOCK: Agent Plan: Sub-query A β Policy Index: "online purchase battery return policy in-store" Sub-query B β BigQuery Tool: SELECT units_available FROM inventory_snapshot WHERE sku_id='SKU-4471' AND store='GA-CUMMING' CODE_BLOCK: Agent Plan: Sub-query A β Policy Index: "online purchase battery return policy in-store" Sub-query B β BigQuery Tool: SELECT units_available FROM inventory_snapshot WHERE sku_id='SKU-4471' AND store='GA-CUMMING' CODE_BLOCK: "Yes β online purchases can be returned in-store within 90 days (Policy Β§3.2). The 40V battery (SKU-4471) shows 3 units in stock at Cumming, GA as of 14:07 EST today." CODE_BLOCK: "Yes β online purchases can be returned in-store within 90 days (Policy Β§3.2). The 40V battery (SKU-4471) shows 3 units in stock at Cumming, GA as of 14:07 EST today." CODE_BLOCK: "Yes β online purchases can be returned in-store within 90 days (Policy Β§3.2). The 40V battery (SKU-4471) shows 3 units in stock at Cumming, GA as of 14:07 EST today." COMMAND_BLOCK: MAX_RETRIES = 3 for attempt in range(MAX_RETRIES): result = vertex_search.retrieve(query, index=index_id) if result.confidence_score >= THRESHOLD: return result # Reformulate: broaden scope, try synonyms, switch retrieval mode query = agent.reformulate(query, attempt) # After max retries: escalate to human agent queue escalate_to_human(original_query) COMMAND_BLOCK: MAX_RETRIES = 3 for attempt in range(MAX_RETRIES): result = vertex_search.retrieve(query, index=index_id) if result.confidence_score >= THRESHOLD: return result # Reformulate: broaden scope, try synonyms, switch retrieval mode query = agent.reformulate(query, attempt) # After max retries: escalate to human agent queue escalate_to_human(original_query) COMMAND_BLOCK: MAX_RETRIES = 3 for attempt in range(MAX_RETRIES): result = vertex_search.retrieve(query, index=index_id) if result.confidence_score >= THRESHOLD: return result # Reformulate: broaden scope, try synonyms, switch retrieval mode query = agent.reformulate(query, attempt) # After max retries: escalate to human agent queue escalate_to_human(original_query) CODE_BLOCK: 1. Customer browses β Pub/Sub event β Personalization Engine surfaces relevant products (Layer 1) 2. Inventory drops below threshold β Pub/Sub alert β Orchestrator Agent dispatches reorder across 3 specialist agents in parallel (Layer 2) 3. Customer asks: "Is this in stock?" β Customer Agent (Layer 2) β Agentic RAG (Layer 3) queries BigQuery live + policy index β grounded, cited answer in < 2s 4. All events β BigQuery agent_decision_log + interaction_log β weekly eval reports + model retraining for Layers 1 & 3 CODE_BLOCK: 1. Customer browses β Pub/Sub event β Personalization Engine surfaces relevant products (Layer 1) 2. Inventory drops below threshold β Pub/Sub alert β Orchestrator Agent dispatches reorder across 3 specialist agents in parallel (Layer 2) 3. Customer asks: "Is this in stock?" β Customer Agent (Layer 2) β Agentic RAG (Layer 3) queries BigQuery live + policy index β grounded, cited answer in < 2s 4. All events β BigQuery agent_decision_log + interaction_log β weekly eval reports + model retraining for Layers 1 & 3 CODE_BLOCK: 1. Customer browses β Pub/Sub event β Personalization Engine surfaces relevant products (Layer 1) 2. Inventory drops below threshold β Pub/Sub alert β Orchestrator Agent dispatches reorder across 3 specialist agents in parallel (Layer 2) 3. Customer asks: "Is this in stock?" β Customer Agent (Layer 2) β Agentic RAG (Layer 3) queries BigQuery live + policy index β grounded, cited answer in < 2s 4. All events β BigQuery agent_decision_log + interaction_log β weekly eval reports + model retraining for Layers 1 & 3
- Session feature update β Vertex AI Feature Store (< 5s latency)
- Interaction log β BigQuery (for offline model training)
- Current inventory level
- Promotional pricing flag
- User's price sensitivity segment
- Real-time trend score
- Loose coupling: agents have no direct dependency on each other, only on topic names
- Fault tolerance: if an agent crashes, the message is retained and redelivered on recovery
- Independent scaling: each Cloud Run agent scales on its own Pub/Sub queue depth
- A single customer question often spans multiple knowledge domains (policy + inventory + product specs)
- Inventory data goes stale in minutes β you cannot index it as static documents
- Retrieval confidence varies β a system that cannot detect low-confidence answers will hallucinate
- Set up BigQuery tables: inventory_snapshot, interaction_log, agent_decision_log
- Stand up Pub/Sub topics and Dataflow streaming job
- This infrastructure is shared by all three layers β do it once, use it everywhere
- Train a two-tower model on BigQuery interaction history
- Index item embeddings into Vertex AI Matching Engine
- Wire up Cloud Run serving API
- Measure: recommendation CTR vs. batch baseline
- Start with two agents: Inventory + Pricing
- Orchestrator on Vertex AI Reasoning Engine
- Add Supplier Agent once the first two are stable
- Index return policy + product catalog into Vertex AI Search
- Wire the BigQuery inventory tool into the agent
- Deploy as the Customer Agent inside your multi-agent system
- Share infrastructure, not code. BigQuery and Pub/Sub serve all three layers. Build them once.
- The Customer Agent IS Agentic RAG. Don't build these as separate projects.
- The agent_decision_log is your most valuable table. It is your audit trail, your eval dataset, and your retraining signal.
- Personalization cold start is solved by context, not history. Device + time + location gets you 80% of the way there for new users.
- Hybrid retrieval beats pure vector search for retail. BM25 handles part numbers and model codes that semantic search misses.