Express
Middleware and route handlers for Express.js applications.
Installation
Terminal
npm install @tryhamster/gerbil expressQuick Start
Mount all Gerbil routes with a single line:
server.ts
01import express from "express";02import { gerbil } from "@tryhamster/gerbil/express";03
04const app = express();05app.use(express.json());06
07// Mount all AI routes under /ai08app.use("/ai", gerbil({ model: "qwen3-0.6b" })());09
10app.listen(3000, () => {11 console.log("Server running on http://localhost:3000");12});Available Endpoints
The middleware creates these endpoints:
| Method | Endpoint | Description |
|---|---|---|
| POST | /generate | Generate text |
| POST | /stream | Stream text (SSE) |
| POST | /json | Structured JSON output |
| POST | /embed | Generate embeddings |
| POST | /chat | Multi-turn chat |
| GET | /info | Model info |
| GET | /stats | Session statistics |
Configuration
config.ts
01import { gerbil } from "@tryhamster/gerbil/express";02
03const aiMiddleware = gerbil({04 // Model configuration05 model: "qwen3-0.6b",06 device: "auto", // "auto" | "gpu" | "cpu"07 dtype: "q4", // "q4" | "q8" | "fp16" | "fp32"08 cacheDir: "./models", // Model cache directory09
10 // Default generation options11 maxTokens: 500,12 temperature: 0.7,13 topP: 0.9,14
15 // System prompt16 system: "You are a helpful assistant.",17
18 // CORS (if needed)19 cors: true,20 corsOptions: {21 origin: "https://myapp.com",22 credentials: true,23 },24});25
26app.use("/ai", aiMiddleware());POST /generate
Generate text from a prompt:
generate.json
// RequestPOST /ai/generateContent-Type: application/json
{ "prompt": "Write a haiku about coding", "options": { "maxTokens": 100, "temperature": 0.8, "thinking": false }}
// Response{ "text": "Silent keystrokes fall\nBugs emerge from tangled code\nCoffee saves the day", "tokensGenerated": 23, "tokensPerSecond": 45.2, "totalTime": 0.51}POST /stream
Stream text using Server-Sent Events:
stream.txt
// RequestPOST /ai/streamContent-Type: application/json
{ "prompt": "Tell me a story", "options": { "maxTokens": 500 }}
// Response (SSE stream)data: {"token": "Once"}data: {"token": " upon"}data: {"token": " a"}data: {"token": " time"}...data: {"done": true, "tokensGenerated": 150}Client-side consumption:
client.ts
01const response = await fetch("/ai/stream", {02 method: "POST",03 headers: { "Content-Type": "application/json" },04 body: JSON.stringify({ prompt: "Tell me a story" }),05});06
07const reader = response.body.getReader();08const decoder = new TextDecoder();09
10while (true) {11 const { done, value } = await reader.read();12 if (done) break;13 14 const chunk = decoder.decode(value);15 const lines = chunk.split("\n").filter(Boolean);16 17 for (const line of lines) {18 if (line.startsWith("data: ")) {19 const data = JSON.parse(line.slice(6));20 if (data.token) {21 process.stdout.write(data.token);22 }23 }24 }25}POST /json
Generate structured JSON with schema validation:
json.json
// RequestPOST /ai/jsonContent-Type: application/json
{ "prompt": "Extract: John is 32 and lives in NYC", "schema": { "type": "object", "properties": { "name": { "type": "string" }, "age": { "type": "number" }, "city": { "type": "string" } }, "required": ["name", "age", "city"] }, "options": { "retries": 3 }}
// Response{ "data": { "name": "John", "age": 32, "city": "NYC" }}POST /embed
Generate embeddings for text:
embed.json
// Single textPOST /ai/embed{ "text": "Hello world"}
// Response{ "vector": [0.123, -0.456, ...], // 384 dimensions "dimensions": 384}
// BatchPOST /ai/embed{ "texts": ["Hello", "World", "Foo"]}
// Response{ "vectors": [ [0.123, ...], [0.456, ...], [0.789, ...] ], "dimensions": 384}POST /chat
Multi-turn conversation:
chat.json
// RequestPOST /ai/chat{ "messages": [ { "role": "user", "content": "My name is Alice" }, { "role": "assistant", "content": "Hello Alice!" }, { "role": "user", "content": "What's my name?" } ], "options": { "stream": false, "thinking": false }}
// Response{ "message": { "role": "assistant", "content": "Your name is Alice!" }, "tokensGenerated": 8}Custom Routes
Create custom endpoints with the Gerbil instance:
custom-routes.ts
01import express from "express";02import gerbil from "@tryhamster/gerbil";03
04const app = express();05app.use(express.json());06
07// Load model on startup08await gerbil.loadModel("qwen3-0.6b");09
10// Custom summarize endpoint11app.post("/summarize", async (req, res) => {12 const { content, length = "short" } = req.body;13
14 const result = await gerbil.generate(15 `Summarize the following in a ${length} paragraph:\n\n${content}`,16 { maxTokens: length === "short" ? 100 : 300 }17 );18
19 res.json({ summary: result.text });20});21
22// Custom code review endpoint23app.post("/review", async (req, res) => {24 const { code, language } = req.body;25
26 const result = await gerbil.generate(27 `Review this ${language} code for bugs and improvements:\n\n${code}`,28 { maxTokens: 500, temperature: 0.3 }29 );30
31 res.json({ review: result.text });32});33
34app.listen(3000);Middleware Chain
Add authentication, rate limiting, and logging:
middleware-chain.ts
01import express from "express";02import { gerbil } from "@tryhamster/gerbil/express";03import rateLimit from "express-rate-limit";04
05const app = express();06app.use(express.json());07
08// Rate limiting09const limiter = rateLimit({10 windowMs: 60 * 1000, // 1 minute11 max: 20, // 20 requests per minute12 message: { error: "Too many requests" },13});14
15// Authentication middleware16const auth = (req, res, next) => {17 const token = req.headers.authorization?.split(" ")[1];18 if (!token || !isValidToken(token)) {19 return res.status(401).json({ error: "Unauthorized" });20 }21 next();22};23
24// Logging middleware25const logger = (req, res, next) => {26 console.log(`[${new Date().toISOString()}] ${req.method} ${req.path}`);27 next();28};29
30// Apply middleware chain31app.use("/ai",32 limiter,33 auth,34 logger,35 gerbil({ model: "qwen3-0.6b" })()36);37
38app.listen(3000);Error Handling
error-handling.ts
01import { gerbil, GerbilError } from "@tryhamster/gerbil/express";02
03// Custom error handler04app.use("/ai", gerbil({05 model: "qwen3-0.6b",06 onError: (error, req, res) => {07 console.error("AI Error:", error);08 09 if (error instanceof GerbilError) {10 return res.status(400).json({11 error: error.message,12 code: error.code,13 });14 }15 16 res.status(500).json({17 error: "Internal server error",18 });19 },20})());21
22// Global error handler23app.use((err, req, res, next) => {24 console.error(err);25 res.status(500).json({ error: "Something went wrong" });26});TypeScript Types
types.ts
import { gerbil, GerbilOptions, GenerateRequest, GenerateResponse } from "@tryhamster/gerbil/express";import { Request, Response } from "express";
const options: GerbilOptions = { model: "qwen3-0.6b", maxTokens: 500,};
// Typed request handlerapp.post("/custom", async (req: Request<{}, {}, GenerateRequest>, res: Response<GenerateResponse>) => { const { prompt, options } = req.body; // ...});