LlamaIndex

Name: Gerbil
Author: Gerbil

LLM and embedding integration for LlamaIndex. Build RAG applications with local models.

Installation

Terminal

npm install @tryhamster/gerbil llamaindex

Quick Start

quick-start.ts

01import { GerbilLLM, GerbilEmbedding } from "@tryhamster/gerbil/llamaindex";
02import { Document, VectorStoreIndex } from "llamaindex";
03
04// Create LLM and embedding model
05const llm = new GerbilLLM({ model: "qwen3-0.6b" });
06const embedModel = new GerbilEmbedding();
07
08// Create documents
09const documents = [
10  new Document({ text: "Gerbil runs LLMs locally" }),
11  new Document({ text: "It supports WebGPU acceleration" }),
12];
13
14// Create index
15const index = await VectorStoreIndex.fromDocuments(documents, {
16  llm,
17  embedModel,
18});
19
20// Query
21const queryEngine = index.asQueryEngine();
22const response = await queryEngine.query("What is Gerbil?");
23
24console.log(response.toString());

GerbilLLM

llm.ts

01import { GerbilLLM } from "@tryhamster/gerbil/llamaindex";
02
03const llm = new GerbilLLM({
04  // Model configuration
05  model: "qwen3-0.6b",
06  device: "auto",
07  dtype: "q4",
08
09  // Generation options
10  maxTokens: 500,
11  temperature: 0.7,
12  topP: 0.9,
13
14  // Thinking mode
15  thinking: false,
16});
17
18// Direct completion
19const response = await llm.complete("Explain recursion");
20console.log(response.text);
21
22// Chat completion
23const chatResponse = await llm.chat({
24  messages: [
25    { role: "user", content: "Hello!" },
26  ],
27});
28console.log(chatResponse.message.content);

Streaming

streaming.ts

01import { GerbilLLM } from "@tryhamster/gerbil/llamaindex";
02
03const llm = new GerbilLLM({ model: "qwen3-0.6b" });
04
05// Stream completion
06const stream = await llm.stream("Tell me a story");
07
08for await (const chunk of stream) {
09  process.stdout.write(chunk.delta);
10}
11
12// Stream chat
13const chatStream = await llm.streamChat({
14  messages: [
15    { role: "user", content: "Explain hooks" },
16  ],
17});
18
19for await (const chunk of chatStream) {
20  process.stdout.write(chunk.delta);
21}

GerbilEmbedding

embeddings.ts

01import { GerbilEmbedding } from "@tryhamster/gerbil/llamaindex";
02
03const embedModel = new GerbilEmbedding({
04  model: "all-MiniLM-L6-v2", // Optional
05});
06
07// Single query embedding
08const queryEmbedding = await embedModel.getQueryEmbedding("What is Gerbil?");
09// Returns: number[]
10
11// Multiple text embeddings
12const textEmbeddings = await embedModel.getTextEmbeddings([
13  "First document",
14  "Second document",
15]);
16// Returns: number[][]

Vector Store Index

vector-index.ts

01import { GerbilLLM, GerbilEmbedding } from "@tryhamster/gerbil/llamaindex";
02import { Document, VectorStoreIndex, Settings } from "llamaindex";
03
04// Set global defaults
05Settings.llm = new GerbilLLM({ model: "qwen3-0.6b" });
06Settings.embedModel = new GerbilEmbedding();
07
08// Create documents
09const documents = [
10  new Document({
11    text: "Gerbil is a local LLM library for Node.js",
12    metadata: { source: "readme" },
13  }),
14  new Document({
15    text: "It supports GPU acceleration via WebGPU",
16    metadata: { source: "features" },
17  }),
18  new Document({
19    text: "Models are downloaded and cached locally",
20    metadata: { source: "features" },
21  }),
22];
23
24// Create index
25const index = await VectorStoreIndex.fromDocuments(documents);
26
27// Query
28const queryEngine = index.asQueryEngine();
29const response = await queryEngine.query("How does Gerbil handle models?");
30
31console.log(response.toString());
32console.log("Sources:", response.sourceNodes);

Chat Engine

chat-engine.ts

01import { GerbilLLM, GerbilEmbedding } from "@tryhamster/gerbil/llamaindex";
02import { Document, VectorStoreIndex, Settings } from "llamaindex";
03
04Settings.llm = new GerbilLLM({ model: "qwen3-0.6b" });
05Settings.embedModel = new GerbilEmbedding();
06
07// Create index from documents
08const index = await VectorStoreIndex.fromDocuments(documents);
09
10// Create chat engine
11const chatEngine = index.asChatEngine();
12
13// Chat with context
14const response1 = await chatEngine.chat({
15  message: "What is Gerbil?",
16});
17console.log(response1.toString());
18
19// Follow-up (remembers context)
20const response2 = await chatEngine.chat({
21  message: "Does it work offline?",
22});
23console.log(response2.toString());
24
25// Reset conversation
26chatEngine.reset();

Document Loaders

document-loaders.ts

01import { GerbilLLM, GerbilEmbedding } from "@tryhamster/gerbil/llamaindex";
02import {
03  SimpleDirectoryReader,
04  VectorStoreIndex,
05  Settings,
06} from "llamaindex";
07
08Settings.llm = new GerbilLLM({ model: "qwen3-0.6b" });
09Settings.embedModel = new GerbilEmbedding();
10
11// Load all documents from a directory
12const reader = new SimpleDirectoryReader();
13const documents = await reader.loadData("./docs");
14
15// Create index
16const index = await VectorStoreIndex.fromDocuments(documents);
17
18// Query
19const queryEngine = index.asQueryEngine();
20const response = await queryEngine.query("Summarize the documentation");

Custom Retriever

custom-retriever.ts

01import { GerbilLLM, GerbilEmbedding } from "@tryhamster/gerbil/llamaindex";
02import {
03  VectorStoreIndex,
04  VectorIndexRetriever,
05  ResponseSynthesizer,
06  RetrieverQueryEngine,
07  Settings,
08} from "llamaindex";
09
10Settings.llm = new GerbilLLM({ model: "qwen3-0.6b" });
11Settings.embedModel = new GerbilEmbedding();
12
13// Create index
14const index = await VectorStoreIndex.fromDocuments(documents);
15
16// Custom retriever with more results
17const retriever = new VectorIndexRetriever({
18  index,
19  similarityTopK: 5, // Retrieve top 5 similar chunks
20});
21
22// Custom response synthesizer
23const responseSynthesizer = new ResponseSynthesizer();
24
25// Create query engine with custom components
26const queryEngine = new RetrieverQueryEngine(retriever, responseSynthesizer);
27
28const response = await queryEngine.query("What are the main features?");

Thinking Mode

thinking.ts

01import { GerbilLLM } from "@tryhamster/gerbil/llamaindex";
02
03const llm = new GerbilLLM({
04  model: "qwen3-0.6b",
05  thinking: true, // Enable chain-of-thought
06});
07
08const response = await llm.complete("What is 127 × 43? Show your work.");
09
10// Access thinking process
11console.log("Thinking:", response.raw?.thinking);
12console.log("Answer:", response.text);

Persistence

persistence.ts

01import { GerbilLLM, GerbilEmbedding } from "@tryhamster/gerbil/llamaindex";
02import {
03  VectorStoreIndex,
04  storageContextFromDefaults,
05  Settings,
06} from "llamaindex";
07
08Settings.llm = new GerbilLLM({ model: "qwen3-0.6b" });
09Settings.embedModel = new GerbilEmbedding();
10
11// Create storage context
12const storageContext = await storageContextFromDefaults({
13  persistDir: "./storage",
14});
15
16// Create and persist index
17const index = await VectorStoreIndex.fromDocuments(documents, {
18  storageContext,
19});
20
21// Later: Load from storage
22const loadedIndex = await VectorStoreIndex.init({
23  storageContext: await storageContextFromDefaults({
24    persistDir: "./storage",
25  }),
26});
27
28const queryEngine = loadedIndex.asQueryEngine();

Metadata Filtering

metadata.ts

01import { GerbilLLM, GerbilEmbedding } from "@tryhamster/gerbil/llamaindex";
02import { Document, VectorStoreIndex, MetadataFilters, Settings } from "llamaindex";
03
04Settings.llm = new GerbilLLM({ model: "qwen3-0.6b" });
05Settings.embedModel = new GerbilEmbedding();
06
07// Documents with metadata
08const documents = [
09  new Document({
10    text: "Installation guide for Linux",
11    metadata: { category: "installation", os: "linux" },
12  }),
13  new Document({
14    text: "Installation guide for macOS",
15    metadata: { category: "installation", os: "macos" },
16  }),
17  new Document({
18    text: "API reference documentation",
19    metadata: { category: "api" },
20  }),
21];
22
23const index = await VectorStoreIndex.fromDocuments(documents);
24
25// Query with metadata filter
26const queryEngine = index.asQueryEngine({
27  preFilters: new MetadataFilters({
28    filters: [
29      { key: "category", value: "installation" },
30    ],
31  }),
32});
33
34const response = await queryEngine.query("How do I install?");

Sub-Question Query Engine

sub-question.ts

01import { GerbilLLM, GerbilEmbedding } from "@tryhamster/gerbil/llamaindex";
02import {
03  VectorStoreIndex,
04  SubQuestionQueryEngine,
05  QueryEngineTool,
06  Settings,
07} from "llamaindex";
08
09Settings.llm = new GerbilLLM({ model: "qwen3-0.6b" });
10Settings.embedModel = new GerbilEmbedding();
11
12// Create multiple indexes for different topics
13const docsIndex = await VectorStoreIndex.fromDocuments(docsDocs);
14const apiIndex = await VectorStoreIndex.fromDocuments(apiDocs);
15
16// Create query engine tools
17const queryEngineTools = [
18  new QueryEngineTool({
19    queryEngine: docsIndex.asQueryEngine(),
20    metadata: {
21      name: "docs",
22      description: "Documentation and guides",
23    },
24  }),
25  new QueryEngineTool({
26    queryEngine: apiIndex.asQueryEngine(),
27    metadata: {
28      name: "api",
29      description: "API reference",
30    },
31  }),
32];
33
34// Create sub-question engine
35const queryEngine = SubQuestionQueryEngine.fromDefaults({
36  queryEngineTools,
37});
38
39// Complex query that spans multiple sources
40const response = await queryEngine.query(
41  "How do I install Gerbil and what API methods are available?"
42);