Next.js

API route handlers, React Server Components, and client hooks for Next.js 13+ App Router.

Server-side integration. These handlers run the Gerbil class on the Node.js runtime. To run inference in the browser on the GPU instead, use the native WebGPU engine directly in a client component.

Installation

Terminal
npm install @tryhamster/gerbil

API Route Handler

Create a simple API route that handles chat requests:

app/api/chat/route.ts
01// app/api/chat/route.ts
02import { gerbil } from "@tryhamster/gerbil/next";
03
04export const POST = gerbil.handler({
05 model: "qwen3.5-0.8b",
06});
07
08// This creates an endpoint that accepts:
09// POST /api/chat
10// Body: { prompt: string, stream?: boolean, options?: {...} }
11// Returns: { text: string, ... } or streams text

Handler Options

app/api/ai/route.ts
01// app/api/ai/route.ts
02import { gerbil } from "@tryhamster/gerbil/next";
03
04export const POST = gerbil.handler({
05 // Model configuration (GerbilConfig)
06 model: "qwen3.5-0.8b",
07 device: "auto", // "auto" | "webgpu"
08 dtype: "q4", // "q4" | "q8" | "fp16" | "fp32"
09
10 // Response caching
11 cache: { enabled: true, ttl: 3600 },
12
13 // Preload the model on startup so the first request is fast
14 preload: true,
15});
16
17// Per-request generation options (maxTokens, temperature, system,
18// thinking) come from the request body, not the handler config.

Multiple Endpoints

Create multiple AI endpoints with a catch-all route:

app/api/ai/[...path]/route.ts
01// app/api/ai/[...path]/route.ts
02import { gerbil } from "@tryhamster/gerbil/next";
03
04const handlers = gerbil.createHandlers({
05 model: "qwen3.5-0.8b",
06 maxTokens: 500,
07});
08
09export async function POST(
10 req: Request,
11 { params }: { params: { path: string[] } }
12) {
13 const path = params.path.join("/");
14
15 switch (path) {
16 case "generate":
17 return handlers.generate(req);
18 case "stream":
19 return handlers.stream(req);
20 case "json":
21 return handlers.json(req);
22 case "embed":
23 return handlers.embed(req);
24 case "chat":
25 return handlers.chat(req);
26 default:
27 return new Response("Not found", { status: 404 });
28 }
29}
30
31export async function GET(
32 req: Request,
33 { params }: { params: { path: string[] } }
34) {
35 const path = params.path.join("/");
36
37 switch (path) {
38 case "info":
39 return handlers.info(req);
40 case "models":
41 return handlers.models(req);
42 default:
43 return new Response("Not found", { status: 404 });
44 }
45}
46
47// Endpoints created:
48// POST /api/ai/generate - Generate text
49// POST /api/ai/stream - Stream text
50// POST /api/ai/json - Structured JSON output
51// POST /api/ai/embed - Generate embeddings
52// POST /api/ai/chat - Multi-turn chat
53// GET /api/ai/info - Model info
54// GET /api/ai/models - List available models

Streaming Responses

Stream responses for real-time chat UIs:

app/api/stream/route.ts
01// app/api/stream/route.ts
02import { gerbil } from "@tryhamster/gerbil/next";
03
04export async function POST(req: Request) {
05 const { prompt, system } = await req.json();
06
07 const stream = await gerbil.stream(prompt, {
08 model: "qwen3.5-0.8b",
09 system,
10 });
11
12 return new Response(stream, {
13 headers: {
14 "Content-Type": "text/event-stream",
15 "Cache-Control": "no-cache",
16 "Connection": "keep-alive",
17 },
18 });
19}

Structured JSON Output

app/api/extract/route.ts
01// app/api/extract/route.ts
02import { gerbil } from "@tryhamster/gerbil/next";
03import { z } from "zod";
04
05const PersonSchema = z.object({
06 name: z.string(),
07 age: z.number(),
08 email: z.string().email().optional(),
09});
10
11export async function POST(req: Request) {
12 const { text } = await req.json();
13
14 const data = await gerbil.json(text, {
15 model: "qwen3.5-0.8b",
16 schema: PersonSchema,
17 retries: 3,
18 });
19
20 return Response.json(data);
21}

React Server Components

Use Gerbil directly in Server Components:

app/summary/page.tsx
01// app/summary/page.tsx
02import gerbil from "@tryhamster/gerbil";
03
04// This runs on the server
05export default async function SummaryPage() {
06 await gerbil.loadModel("qwen3.5-0.8b");
07
08 const summary = await gerbil.generate(
09 "Summarize: The quick brown fox jumps over the lazy dog.",
10 { maxTokens: 100 }
11 );
12
13 return (
14 <div>
15 <h1>Summary</h1>
16 <p>{summary.text}</p>
17 </div>
18 );
19}

Client Components

Use the React hooks in client components:

app/chat/page.tsx
01// app/chat/page.tsx
02"use client";
03
04import { useGerbil } from "@tryhamster/gerbil/react";
05import { useState } from "react";
06
07export default function ChatPage() {
08 const [input, setInput] = useState("");
09 const [response, setResponse] = useState("");
10 const { generate, stream, isLoading } = useGerbil({
11 endpoint: "/api/ai",
12 });
13
14 const handleGenerate = async () => {
15 const result = await generate(input);
16 setResponse(result.text);
17 };
18
19 const handleStream = async () => {
20 setResponse("");
21 for await (const chunk of stream(input)) {
22 setResponse((prev) => prev + chunk);
23 }
24 };
25
26 return (
27 <div className="p-4">
28 <textarea
29 value={input}
30 onChange={(e) => setInput(e.target.value)}
31 placeholder="Enter your prompt..."
32 className="w-full p-2 border rounded"
33 />
34 <div className="flex gap-2 mt-2">
35 <button onClick={handleGenerate} disabled={isLoading}>
36 Generate
37 </button>
38 <button onClick={handleStream} disabled={isLoading}>
39 Stream
40 </button>
41 </div>
42 {response && (
43 <div className="mt-4 p-4 bg-gray-100 rounded">
44 {response}
45 </div>
46 )}
47 </div>
48 );
49}

Middleware

Add authentication or rate limiting:

app/api/ai/route.ts
01// app/api/ai/route.ts
02import { gerbil } from "@tryhamster/gerbil/next";
03import { getServerSession } from "next-auth";
04
05export async function POST(req: Request) {
06 // Check authentication
07 const session = await getServerSession();
08 if (!session) {
09 return new Response("Unauthorized", { status: 401 });
10 }
11
12 // Rate limiting (example with upstash)
13 const ip = req.headers.get("x-forwarded-for") || "anonymous";
14 const { success } = await ratelimit.limit(ip);
15 if (!success) {
16 return new Response("Too many requests", { status: 429 });
17 }
18
19 // Process request
20 const { prompt } = await req.json();
21 const result = await gerbil.generate(prompt, {
22 model: "qwen3.5-0.8b",
23 });
24
25 return Response.json(result);
26}

Edge Runtime

Note: Gerbil requires Node.js runtime for model loading. For edge deployments, use a separate API server or serverless function.

app/api/ai/route.ts
// app/api/ai/route.ts
// This route uses Node.js runtime (default)
export const runtime = "nodejs";
import { gerbil } from "@tryhamster/gerbil/next";
export const POST = gerbil.handler({
model: "qwen3.5-0.8b",
});

Environment Variables

.env.local
# .env.local
# Model cache directory (optional)
GERBIL_CACHE_DIR=./models
# Default model (optional)
GERBIL_DEFAULT_MODEL=qwen3.5-0.8b
# Device preference (optional)
GERBIL_DEVICE=auto

Full Chat Application

Complete chat app with streaming, history, and thinking mode:

app/api/chat/route.ts
01// app/api/chat/route.ts
02import { gerbil } from "@tryhamster/gerbil/next";
03
04export async function POST(req: Request) {
05 const { messages, thinking } = await req.json();
06
07 const stream = await gerbil.chat(messages, {
08 model: "qwen3.5-0.8b",
09 thinking,
10 stream: true,
11 });
12
13 return new Response(stream, {
14 headers: { "Content-Type": "text/event-stream" },
15 });
16}
app/chat/page.tsx
01// app/chat/page.tsx
02"use client";
03
04import { useState } from "react";
05import { useChat } from "@tryhamster/gerbil/react";
06
07export default function ChatApp() {
08 const [thinking, setThinking] = useState(false);
09 const {
10 messages,
11 input,
12 setInput,
13 handleSubmit,
14 isLoading,
15 } = useChat({
16 endpoint: "/api/chat",
17 });
18
19 return (
20 <div className="flex flex-col h-screen">
21 {/* Messages */}
22 <div className="flex-1 overflow-auto p-4 space-y-4">
23 {messages.map((m, i) => (
24 <div
25 key={i}
26 className={`p-3 rounded ${
27 m.role === "user" ? "bg-blue-100 ml-auto" : "bg-gray-100"
28 } max-w-[80%]`}
29 >
30 {m.content}
31 </div>
32 ))}
33 </div>
34
35 {/* Input */}
36 <form onSubmit={handleSubmit} className="p-4 border-t">
37 <div className="flex gap-2">
38 <input
39 value={input}
40 onChange={(e) => setInput(e.target.value)}
41 placeholder="Type a message..."
42 className="flex-1 p-2 border rounded"
43 disabled={isLoading}
44 />
45 <label className="flex items-center gap-1">
46 <input
47 type="checkbox"
48 checked={thinking}
49 onChange={(e) => setThinking(e.target.checked)}
50 />
51 Think
52 </label>
53 <button
54 type="submit"
55 disabled={isLoading}
56 className="px-4 py-2 bg-blue-500 text-white rounded"
57 >
58 {isLoading ? "..." : "Send"}
59 </button>
60 </div>
61 </form>
62 </div>
63 );
64}