API Reference

Complete API documentation for Gerbil.

Gerbil Class

The main class for local LLM inference.

Constructor

constructor.ts
import { Gerbil } from "@tryhamster/gerbil";
const g = new Gerbil(config?: GerbilConfig);

loadModel()

loadModel.ts
await g.loadModel(
modelId: string,
options?: LoadOptions
): Promise<void>
// Example
await g.loadModel("qwen3-0.6b", {
device: "gpu",
dtype: "q4",
onProgress: (info) => console.log(info.status),
});

generate()

generate.ts
await g.generate(
prompt: string,
options?: GenerateOptions
): Promise<GenerateResult>
// Example
const result = await g.generate("Hello", {
maxTokens: 256,
temperature: 0.7,
topP: 0.9,
topK: 50,
system: "You are helpful.",
thinking: true,
stopSequences: ["\n\n"],
});

stream()

stream.ts
g.stream(
prompt: string,
options?: GenerateOptions
): AsyncGenerator<string, GenerateResult, unknown>
// Example
for await (const chunk of g.stream("Tell me a story")) {
process.stdout.write(chunk);
}

json()

json.ts
await g.json<T>(
prompt: string,
options: JsonOptions<T>
): Promise<T>
// Example
const person = await g.json("Extract: John, 32, NYC", {
schema: z.object({
name: z.string(),
age: z.number(),
city: z.string(),
}),
retries: 3,
temperature: 0.3,
});

embed()

embed.ts
await g.embed(
text: string,
options?: EmbedOptions
): Promise<EmbedResult>
// Example
const result = await g.embed("Hello world");
console.log(result.vector); // number[]

Response Cache Methods

cache-methods.ts
// Get response cache statistics
g.getResponseCacheStats(): ResponseCacheStats
// Returns: { hits, misses, size, hitRate }
// Clear all cached responses
g.clearResponseCache(): void

Other Methods

other-methods.ts
// Check if model is loaded
g.isLoaded(): boolean
// Get current model info
g.getModelInfo(): ModelConfig | null
// Check if loaded model supports vision
g.supportsVision(): boolean
// Get current device mode
g.getDeviceMode(): "webgpu" | "cpu"
// Get session stats
g.getStats(): SessionStats
// Get system info
g.getInfo(): SystemInfo
// Clear KV cache (attention states)
g.clearCache(): void
// Clean up
await g.dispose(): Promise<void>
// Static: List built-in models
Gerbil.listModels(): ModelConfig[]
// Static: Get model by ID
Gerbil.getModel(id: string): ModelConfig | undefined

Types

GerbilConfig

GerbilConfig.ts
interface GerbilConfig {
model?: string;
device?: "auto" | "gpu" | "cpu";
dtype?: "q4" | "q8" | "fp16" | "fp32";
cache?: CacheConfig;
fallback?: FallbackConfig;
}

GenerateOptions

GenerateOptions.ts
interface GenerateOptions {
maxTokens?: number; // default: 256
temperature?: number; // default: 0.7
topP?: number; // default: 0.9
topK?: number; // default: 50
stopSequences?: string[];
system?: string;
thinking?: boolean;
onToken?: (token: string) => void;
images?: ImageInput[]; // Vision: images to include
cache?: boolean; // Enable response caching (default: false)
cacheTtl?: number; // Cache TTL in ms (default: 5 min)
}

GenerateResult

GenerateResult.ts
interface GenerateResult {
text: string;
thinking?: string;
tokensGenerated: number;
tokensPerSecond: number;
totalTime: number;
finishReason: "stop" | "length" | "error";
provider?: "local" | "openai" | "anthropic";
cached?: boolean;
}

LoadOptions

LoadOptions.ts
interface LoadOptions {
onProgress?: (info: ProgressInfo) => void;
device?: "auto" | "gpu" | "cpu" | "webgpu";
dtype?: "q4" | "q8" | "fp16" | "fp32";
contextLength?: number;
}

ModelConfig

ModelConfig.ts
interface ModelConfig {
id: string;
repo: string;
description: string;
size: string;
contextLength: number;
supportsThinking: boolean;
supportsJson: boolean;
supportsVision?: boolean; // Vision: model understands images
visionEncoderSize?: string; // Vision: encoder params (e.g. "0.4B")
family: "qwen" | "smollm" | "phi" | "mistral" | "llama" | "other";
}

ImageInput

ImageInput.ts
interface ImageInput {
/** Image source: URL, base64 data URI, or local file path */
source: string;
/** Optional alt text for context */
alt?: string;
}

ResponseCacheStats

ResponseCacheStats.ts
interface ResponseCacheStats {
hits: number; // Number of cache hits
misses: number; // Number of cache misses
size: number; // Number of cached entries
hitRate: number; // Hit rate percentage (0-100)
}

Functional API

One-liner functions that auto-load the model:

functional.ts
import gerbil, { generate, stream, json, embed } from "@tryhamster/gerbil";
// Default export - simplest usage
const text = await gerbil("Hello");
// Named exports
const result = await generate("Hello");
const data = await json("Extract...", { schema });
const vec = await embed("Hello");
for await (const chunk of stream("Story")) {
console.log(chunk);
}

Skills API

skills-api.ts
import {
// Skill system
defineSkill,
useSkill,
listSkills,
loadSkills,
// Built-in skills
commit,
summarize,
explain,
review,
test,
translate,
extract,
title,
// Vision skills (require ministral-3b or vision model)
describeImage,
analyzeScreenshot,
extractFromImage,
compareImages,
captionImage,
} from "@tryhamster/gerbil/skills";
// All skills accept an input object and return Promise<string | T>
const msg = await commit({ type: "conventional" });
const summary = await summarize({ content, length: "short" });
const explanation = await explain({ content, level: "beginner" });
const feedback = await review({ code, focus: ["security"] });
const tests = await test({ code, framework: "vitest" });
const translated = await translate({ text, to: "es" });
const headline = await title({ content, style: "professional" });
// Vision skills
const description = await describeImage({ image: url, focus: "details" });
const analysis = await analyzeScreenshot({ image: dataUri, type: "qa" });
const text = await extractFromImage({ image, extract: "text" });
const diff = await compareImages({ image1, image2, focus: "differences" });
const alt = await captionImage({ image, style: "descriptive" });
// extract() returns Promise<T>
const data = await extract({ content, schema: myZodSchema });
// Custom skills
const mySkill = defineSkill({
name: "my-skill",
input: z.object({ text: z.string() }),
run: async ({ input, gerbil }) => gerbil.generate(input.text),
});