Agents & Orchestration Advanced
Pipeline Orchestration
Build an HTTP API → Queue → Worker pipeline with Claude CLI as the agent runtime
Command
"color:#9CA3AF;font-style:italic"># Architecture: "color:#9CA3AF;font-style:italic"># Client → HTTP API → Queue → Worker → "color:#7C5CFC">claude -p → Parse → Return "color:#9CA3AF;font-style:italic"># ↓ "color:#9CA3AF;font-style:italic"># Session Store $ "color:#9CA3AF;font-style:italic"># Simplest backend call: $ "color:#7C5CFC">claude -p "Summarize: $(">cat input.txt)" \ "color:#d97757">--output-format json \ "color:#d97757">--max-budget-usd 0.50 \ "color:#d97757">--no-session-persistence \ "color:#d97757">--permission-mode bypassPermissions
Response
{
"result": "The document describes...",
"total_cost_usd": 0.018,
"session_id": "a1b2c3d4-..."
} Parsing Code
059669">">// SaaS pattern: store session per user 059669">">async 059669">">function getOrCreateSession(userId, prompt) { 059669">">const session = 059669">">await db.getActiveSession(userId); 059669">">const args = session ? [059669059669">">'--resume', session.sessionId] : []; 059669">">const result = runClaude(prompt, args); 059669">">await db.storeSession(userId, result.session_id); 059669">">await db.incrementCost(userId, result.total_cost_usd); return result; }
Gotchas
! Each claude subprocess uses ~200MB memory — limit concurrent workers
! Init overhead: ~50K tokens with MCP servers active (~$0.016/call for Opus minimum)