AINext.jsAPI
Building a Streaming LLM API with Server-Sent Events
March 10, 2026
11 min read
Building a Streaming LLM API with Server-Sent Events
Real-time token streaming transforms LLM UX from "wait for the whole response" to "watch it think." Here's the implementation.
Server Side (Next.js Route Handler)
export async function POST(req: Request) {
const { prompt } = await req.json();
const stream = new ReadableStream({
async start(controller) {
const encoder = new TextEncoder();
const response = await fetch('https://api.openai.com/v1/chat/completions', {
method: 'POST',
headers: { Authorization: `Bearer ${process.env.OPENAI_KEY}` },
body: JSON.stringify({ model: 'gpt-4o', messages: [{ role: 'user', content: prompt }], stream: true }),
});
const reader = response.body!.getReader();
while (true) {
const { done, value } = await reader.read();
if (done) break;
controller.enqueue(encoder.encode(`data: ${new TextDecoder().decode(value)}\n\n`));
}
controller.close();
},
});
return new Response(stream, {
headers: { 'Content-Type': 'text/event-stream', 'Cache-Control': 'no-cache' },
});
}Client Side
const response = await fetch('/api/chat', { method: 'POST', body: JSON.stringify({ prompt }) });
const reader = response.body!.getReader();
while (true) {
const { done, value } = await reader.read();
if (done) break;
const text = new TextDecoder().decode(value);
setOutput(prev => prev + text);
}