API Reference

Name: Gerbil
Author: Gerbil

Complete API documentation for Gerbil.

Gerbil Class

The main class for local LLM inference.

Constructor

constructor.ts

import { Gerbil } from "@tryhamster/gerbil";

const g = new Gerbil(config?: GerbilConfig);

loadModel()

loadModel.ts

await g.loadModel(
  modelId: string,
  options?: LoadOptions
): Promise<void>

// Example
await g.loadModel("qwen3-0.6b", {
  device: "gpu",
  dtype: "q4",
  onProgress: (info) => console.log(info.status),
});

generate()

generate.ts

await g.generate(
  prompt: string,
  options?: GenerateOptions
): Promise<GenerateResult>

// Example
const result = await g.generate("Hello", {
  maxTokens: 256,
  temperature: 0.7,
  topP: 0.9,
  topK: 50,
  system: "You are helpful.",
  thinking: true,
  stopSequences: ["\n\n"],
});

stream()

stream.ts

g.stream(
  prompt: string,
  options?: GenerateOptions
): AsyncGenerator<string, GenerateResult, unknown>

// Example
for await (const chunk of g.stream("Tell me a story")) {
  process.stdout.write(chunk);
}

json()

json.ts

await g.json<T>(
  prompt: string,
  options: JsonOptions<T>
): Promise<T>

// Example
const person = await g.json("Extract: John, 32, NYC", {
  schema: z.object({
    name: z.string(),
    age: z.number(),
    city: z.string(),
  }),
  retries: 3,
  temperature: 0.3,
});

embed()

embed.ts

await g.embed(
  text: string,
  options?: EmbedOptions
): Promise<EmbedResult>

// Example
const result = await g.embed("Hello world");
console.log(result.vector); // number[]

Response Cache Methods

cache-methods.ts

// Get response cache statistics
g.getResponseCacheStats(): ResponseCacheStats
// Returns: { hits, misses, size, hitRate }

// Clear all cached responses
g.clearResponseCache(): void

Other Methods

other-methods.ts

// Check if model is loaded
g.isLoaded(): boolean

// Get current model info
g.getModelInfo(): ModelConfig | null

// Check if loaded model supports vision
g.supportsVision(): boolean

// Get current device mode
g.getDeviceMode(): "webgpu" | "cpu"

// Get session stats
g.getStats(): SessionStats

// Get system info
g.getInfo(): SystemInfo

// Clear KV cache (attention states)
g.clearCache(): void

// Clean up
await g.dispose(): Promise<void>

// Static: List built-in models
Gerbil.listModels(): ModelConfig[]

// Static: Get model by ID
Gerbil.getModel(id: string): ModelConfig | undefined

Types

GerbilConfig

GerbilConfig.ts

interface GerbilConfig {
  model?: string;
  device?: "auto" | "gpu" | "cpu";
  dtype?: "q4" | "q8" | "fp16" | "fp32";
  cache?: CacheConfig;
  fallback?: FallbackConfig;
}

GenerateOptions

GenerateOptions.ts

interface GenerateOptions {
  maxTokens?: number;      // default: 256
  temperature?: number;    // default: 0.7
  topP?: number;           // default: 0.9
  topK?: number;           // default: 50
  stopSequences?: string[];
  system?: string;
  thinking?: boolean;
  onToken?: (token: string) => void;
  images?: ImageInput[];   // Vision: images to include
  cache?: boolean;         // Enable response caching (default: false)
  cacheTtl?: number;       // Cache TTL in ms (default: 5 min)
}

GenerateResult

GenerateResult.ts

interface GenerateResult {
  text: string;
  thinking?: string;
  tokensGenerated: number;
  tokensPerSecond: number;
  totalTime: number;
  finishReason: "stop" | "length" | "error";
  provider?: "local" | "openai" | "anthropic";
  cached?: boolean;
}

LoadOptions

LoadOptions.ts

interface LoadOptions {
  onProgress?: (info: ProgressInfo) => void;
  device?: "auto" | "gpu" | "cpu" | "webgpu";
  dtype?: "q4" | "q8" | "fp16" | "fp32";
  contextLength?: number;
}

ModelConfig

ModelConfig.ts

interface ModelConfig {
  id: string;
  repo: string;
  description: string;
  size: string;
  contextLength: number;
  supportsThinking: boolean;
  supportsJson: boolean;
  supportsVision?: boolean;      // Vision: model understands images
  visionEncoderSize?: string;    // Vision: encoder params (e.g. "0.4B")
  family: "qwen" | "smollm" | "phi" | "mistral" | "llama" | "other";
}

ImageInput

ImageInput.ts

interface ImageInput {
  /** Image source: URL, base64 data URI, or local file path */
  source: string;
  /** Optional alt text for context */
  alt?: string;
}

ResponseCacheStats

ResponseCacheStats.ts

interface ResponseCacheStats {
  hits: number;      // Number of cache hits
  misses: number;    // Number of cache misses
  size: number;      // Number of cached entries
  hitRate: number;   // Hit rate percentage (0-100)
}

Functional API

One-liner functions that auto-load the model:

functional.ts

import gerbil, { generate, stream, json, embed } from "@tryhamster/gerbil";

// Default export - simplest usage
const text = await gerbil("Hello");

// Named exports
const result = await generate("Hello");
const data = await json("Extract...", { schema });
const vec = await embed("Hello");

for await (const chunk of stream("Story")) {
  console.log(chunk);
}

Skills API

skills-api.ts

import {
  // Skill system
  defineSkill,
  useSkill,
  listSkills,
  loadSkills,
  
  // Built-in skills
  commit,
  summarize,
  explain,
  review,
  test,
  translate,
  extract,
  title,
  
  // Vision skills (require ministral-3b or vision model)
  describeImage,
  analyzeScreenshot,
  extractFromImage,
  compareImages,
  captionImage,
} from "@tryhamster/gerbil/skills";

// All skills accept an input object and return Promise<string | T>
const msg = await commit({ type: "conventional" });
const summary = await summarize({ content, length: "short" });
const explanation = await explain({ content, level: "beginner" });
const feedback = await review({ code, focus: ["security"] });
const tests = await test({ code, framework: "vitest" });
const translated = await translate({ text, to: "es" });
const headline = await title({ content, style: "professional" });

// Vision skills
const description = await describeImage({ image: url, focus: "details" });
const analysis = await analyzeScreenshot({ image: dataUri, type: "qa" });
const text = await extractFromImage({ image, extract: "text" });
const diff = await compareImages({ image1, image2, focus: "differences" });
const alt = await captionImage({ image, style: "descriptive" });

// extract() returns Promise<T>
const data = await extract({ content, schema: myZodSchema });

// Custom skills
const mySkill = defineSkill({
  name: "my-skill",
  input: z.object({ text: z.string() }),
  run: async ({ input, gerbil }) => gerbil.generate(input.text),
});