AINext.jsAPI

Building a Streaming LLM API with Server-Sent Events

March 10, 2026

11 min read

Building a Streaming LLM API with Server-Sent Events

Real-time token streaming transforms LLM UX from "wait for the whole response" to "watch it think." Here's the implementation.

Server Side (Next.js Route Handler)

export async function POST(req: Request) {
  const { prompt } = await req.json();
  
  const stream = new ReadableStream({
    async start(controller) {
      const encoder = new TextEncoder();
      
      const response = await fetch('https://api.openai.com/v1/chat/completions', {
        method: 'POST',
        headers: { Authorization: `Bearer ${process.env.OPENAI_KEY}` },
        body: JSON.stringify({ model: 'gpt-4o', messages: [{ role: 'user', content: prompt }], stream: true }),
      });
      
      const reader = response.body!.getReader();
      
      while (true) {
        const { done, value } = await reader.read();
        if (done) break;
        controller.enqueue(encoder.encode(`data: ${new TextDecoder().decode(value)}\n\n`));
      }
      
      controller.close();
    },
  });

  return new Response(stream, {
    headers: { 'Content-Type': 'text/event-stream', 'Cache-Control': 'no-cache' },
  });
}

Client Side

const response = await fetch('/api/chat', { method: 'POST', body: JSON.stringify({ prompt }) });
const reader = response.body!.getReader();

while (true) {
  const { done, value } = await reader.read();
  if (done) break;
  const text = new TextDecoder().decode(value);
  setOutput(prev => prev + text);
}