{"@context":"https://schema.org","@graph":[{"@type":"Organization","@id":"https://inquir.org/#organization","name":"Inquir","url":"https://inquir.org","logo":{"@type":"ImageObject","url":"https://inquir.org/favicon.png","width":256,"height":256}},{"@type":"WebSite","@id":"https://inquir.org/#website","name":"Inquir Compute","url":"https://inquir.org","inLanguage":["en","ru"],"publisher":{"@id":"https://inquir.org/#organization"}},{"@type":"WebPage","@id":"https://inquir.org/use-cases/llm-pipelines#webpage","url":"https://inquir.org/use-cases/llm-pipelines","name":"LLM pipelines and serverless AI workflows","headline":"LLM pipelines and serverless AI workflows","description":"LLM pipelines with retrieval, moderation, tool calls, summarization, retries, and cost control—each stage is a serverless function with traces instead of one giant prompt.","inLanguage":"en-US","isPartOf":{"@id":"https://inquir.org/#website"},"breadcrumb":{"@id":"https://inquir.org/use-cases/llm-pipelines#breadcrumb"},"author":{"@id":"https://inquir.org/#organization"},"datePublished":"2025-11-01T00:00:00.000Z","dateModified":"2026-06-23T00:00:00.000Z","citation":{"@type":"CreativeWork","url":"https://inquir.org/docs"}},{"@type":"BreadcrumbList","@id":"https://inquir.org/use-cases/llm-pipelines#breadcrumb","itemListElement":[{"@type":"ListItem","position":1,"name":"Home","item":"https://inquir.org/"},{"@type":"ListItem","position":2,"name":"Use cases","item":"https://inquir.org/use-cases"},{"@type":"ListItem","position":3,"name":"LLM pipelines & AI workflows","item":"https://inquir.org/use-cases/llm-pipelines"}]},{"@type":"HowTo","@id":"https://inquir.org/use-cases/llm-pipelines#howto","name":"LLM pipelines and serverless AI workflows","description":"LLM pipelines with retrieval, moderation, tool calls, summarization, retries, and cost control—each stage is a serverless function with traces instead of one giant prompt.","inLanguage":"en-US","author":{"@id":"https://inquir.org/#organization"},"step":[{"@type":"HowToStep","position":1,"name":"Draw dataflow","text":"Name inputs/outputs per box."},{"@type":"HowToStep","position":2,"name":"Codify","text":"Implement each box as a function or pipeline step."},{"@type":"HowToStep","position":3,"name":"Measure cost","text":"Track tokens and wall time per stage."}],"isPartOf":{"@id":"https://inquir.org/use-cases/llm-pipelines#webpage"}},{"@type":"FAQPage","@id":"https://inquir.org/use-cases/llm-pipelines#faq","url":"https://inquir.org/use-cases/llm-pipelines","isPartOf":{"@id":"https://inquir.org/use-cases/llm-pipelines#webpage"},"mainEntity":[{"@type":"Question","name":"Why split an LLM workflow into stages?","acceptedAnswer":{"@type":"Answer","text":"Retries, cost attribution, and debugging improve when retrieval, moderation, tool calls, and summarization are separate steps with their own logs."}},{"@type":"Question","name":"Streaming tokens to end users?","acceptedAnswer":{"@type":"Answer","text":"Keep user-visible streaming at the boundary; internal stages can use request/response for simpler failure handling and replays."}},{"@type":"Question","name":"How do I control cost across stages?","acceptedAnswer":{"@type":"Answer","text":"Measure tokens and wall time per stage in observability; cap expensive steps with budgets and short-circuit when moderation fails."}}]}]}