LlamaIndex

LLM and embedding integration for LlamaIndex. Build RAG applications with local models.

Installation

Terminal
npm install @tryhamster/gerbil llamaindex

Quick Start

quick-start.ts
01import { GerbilLLM, GerbilEmbedding } from "@tryhamster/gerbil/llamaindex";
02import { Document, VectorStoreIndex } from "llamaindex";
03
04// Create LLM and embedding model
05const llm = new GerbilLLM({ model: "qwen3-0.6b" });
06const embedModel = new GerbilEmbedding();
07
08// Create documents
09const documents = [
10 new Document({ text: "Gerbil runs LLMs locally" }),
11 new Document({ text: "It supports WebGPU acceleration" }),
12];
13
14// Create index
15const index = await VectorStoreIndex.fromDocuments(documents, {
16 llm,
17 embedModel,
18});
19
20// Query
21const queryEngine = index.asQueryEngine();
22const response = await queryEngine.query("What is Gerbil?");
23
24console.log(response.toString());

GerbilLLM

llm.ts
01import { GerbilLLM } from "@tryhamster/gerbil/llamaindex";
02
03const llm = new GerbilLLM({
04 // Model configuration
05 model: "qwen3-0.6b",
06 device: "auto",
07 dtype: "q4",
08
09 // Generation options
10 maxTokens: 500,
11 temperature: 0.7,
12 topP: 0.9,
13
14 // Thinking mode
15 thinking: false,
16});
17
18// Direct completion
19const response = await llm.complete("Explain recursion");
20console.log(response.text);
21
22// Chat completion
23const chatResponse = await llm.chat({
24 messages: [
25 { role: "user", content: "Hello!" },
26 ],
27});
28console.log(chatResponse.message.content);

Streaming

streaming.ts
01import { GerbilLLM } from "@tryhamster/gerbil/llamaindex";
02
03const llm = new GerbilLLM({ model: "qwen3-0.6b" });
04
05// Stream completion
06const stream = await llm.stream("Tell me a story");
07
08for await (const chunk of stream) {
09 process.stdout.write(chunk.delta);
10}
11
12// Stream chat
13const chatStream = await llm.streamChat({
14 messages: [
15 { role: "user", content: "Explain hooks" },
16 ],
17});
18
19for await (const chunk of chatStream) {
20 process.stdout.write(chunk.delta);
21}

GerbilEmbedding

embeddings.ts
01import { GerbilEmbedding } from "@tryhamster/gerbil/llamaindex";
02
03const embedModel = new GerbilEmbedding({
04 model: "all-MiniLM-L6-v2", // Optional
05});
06
07// Single query embedding
08const queryEmbedding = await embedModel.getQueryEmbedding("What is Gerbil?");
09// Returns: number[]
10
11// Multiple text embeddings
12const textEmbeddings = await embedModel.getTextEmbeddings([
13 "First document",
14 "Second document",
15]);
16// Returns: number[][]

Vector Store Index

vector-index.ts
01import { GerbilLLM, GerbilEmbedding } from "@tryhamster/gerbil/llamaindex";
02import { Document, VectorStoreIndex, Settings } from "llamaindex";
03
04// Set global defaults
05Settings.llm = new GerbilLLM({ model: "qwen3-0.6b" });
06Settings.embedModel = new GerbilEmbedding();
07
08// Create documents
09const documents = [
10 new Document({
11 text: "Gerbil is a local LLM library for Node.js",
12 metadata: { source: "readme" },
13 }),
14 new Document({
15 text: "It supports GPU acceleration via WebGPU",
16 metadata: { source: "features" },
17 }),
18 new Document({
19 text: "Models are downloaded and cached locally",
20 metadata: { source: "features" },
21 }),
22];
23
24// Create index
25const index = await VectorStoreIndex.fromDocuments(documents);
26
27// Query
28const queryEngine = index.asQueryEngine();
29const response = await queryEngine.query("How does Gerbil handle models?");
30
31console.log(response.toString());
32console.log("Sources:", response.sourceNodes);

Chat Engine

chat-engine.ts
01import { GerbilLLM, GerbilEmbedding } from "@tryhamster/gerbil/llamaindex";
02import { Document, VectorStoreIndex, Settings } from "llamaindex";
03
04Settings.llm = new GerbilLLM({ model: "qwen3-0.6b" });
05Settings.embedModel = new GerbilEmbedding();
06
07// Create index from documents
08const index = await VectorStoreIndex.fromDocuments(documents);
09
10// Create chat engine
11const chatEngine = index.asChatEngine();
12
13// Chat with context
14const response1 = await chatEngine.chat({
15 message: "What is Gerbil?",
16});
17console.log(response1.toString());
18
19// Follow-up (remembers context)
20const response2 = await chatEngine.chat({
21 message: "Does it work offline?",
22});
23console.log(response2.toString());
24
25// Reset conversation
26chatEngine.reset();

Document Loaders

document-loaders.ts
01import { GerbilLLM, GerbilEmbedding } from "@tryhamster/gerbil/llamaindex";
02import {
03 SimpleDirectoryReader,
04 VectorStoreIndex,
05 Settings,
06} from "llamaindex";
07
08Settings.llm = new GerbilLLM({ model: "qwen3-0.6b" });
09Settings.embedModel = new GerbilEmbedding();
10
11// Load all documents from a directory
12const reader = new SimpleDirectoryReader();
13const documents = await reader.loadData("./docs");
14
15// Create index
16const index = await VectorStoreIndex.fromDocuments(documents);
17
18// Query
19const queryEngine = index.asQueryEngine();
20const response = await queryEngine.query("Summarize the documentation");

Custom Retriever

custom-retriever.ts
01import { GerbilLLM, GerbilEmbedding } from "@tryhamster/gerbil/llamaindex";
02import {
03 VectorStoreIndex,
04 VectorIndexRetriever,
05 ResponseSynthesizer,
06 RetrieverQueryEngine,
07 Settings,
08} from "llamaindex";
09
10Settings.llm = new GerbilLLM({ model: "qwen3-0.6b" });
11Settings.embedModel = new GerbilEmbedding();
12
13// Create index
14const index = await VectorStoreIndex.fromDocuments(documents);
15
16// Custom retriever with more results
17const retriever = new VectorIndexRetriever({
18 index,
19 similarityTopK: 5, // Retrieve top 5 similar chunks
20});
21
22// Custom response synthesizer
23const responseSynthesizer = new ResponseSynthesizer();
24
25// Create query engine with custom components
26const queryEngine = new RetrieverQueryEngine(retriever, responseSynthesizer);
27
28const response = await queryEngine.query("What are the main features?");

Thinking Mode

thinking.ts
01import { GerbilLLM } from "@tryhamster/gerbil/llamaindex";
02
03const llm = new GerbilLLM({
04 model: "qwen3-0.6b",
05 thinking: true, // Enable chain-of-thought
06});
07
08const response = await llm.complete("What is 127 × 43? Show your work.");
09
10// Access thinking process
11console.log("Thinking:", response.raw?.thinking);
12console.log("Answer:", response.text);

Persistence

persistence.ts
01import { GerbilLLM, GerbilEmbedding } from "@tryhamster/gerbil/llamaindex";
02import {
03 VectorStoreIndex,
04 storageContextFromDefaults,
05 Settings,
06} from "llamaindex";
07
08Settings.llm = new GerbilLLM({ model: "qwen3-0.6b" });
09Settings.embedModel = new GerbilEmbedding();
10
11// Create storage context
12const storageContext = await storageContextFromDefaults({
13 persistDir: "./storage",
14});
15
16// Create and persist index
17const index = await VectorStoreIndex.fromDocuments(documents, {
18 storageContext,
19});
20
21// Later: Load from storage
22const loadedIndex = await VectorStoreIndex.init({
23 storageContext: await storageContextFromDefaults({
24 persistDir: "./storage",
25 }),
26});
27
28const queryEngine = loadedIndex.asQueryEngine();

Metadata Filtering

metadata.ts
01import { GerbilLLM, GerbilEmbedding } from "@tryhamster/gerbil/llamaindex";
02import { Document, VectorStoreIndex, MetadataFilters, Settings } from "llamaindex";
03
04Settings.llm = new GerbilLLM({ model: "qwen3-0.6b" });
05Settings.embedModel = new GerbilEmbedding();
06
07// Documents with metadata
08const documents = [
09 new Document({
10 text: "Installation guide for Linux",
11 metadata: { category: "installation", os: "linux" },
12 }),
13 new Document({
14 text: "Installation guide for macOS",
15 metadata: { category: "installation", os: "macos" },
16 }),
17 new Document({
18 text: "API reference documentation",
19 metadata: { category: "api" },
20 }),
21];
22
23const index = await VectorStoreIndex.fromDocuments(documents);
24
25// Query with metadata filter
26const queryEngine = index.asQueryEngine({
27 preFilters: new MetadataFilters({
28 filters: [
29 { key: "category", value: "installation" },
30 ],
31 }),
32});
33
34const response = await queryEngine.query("How do I install?");

Sub-Question Query Engine

sub-question.ts
01import { GerbilLLM, GerbilEmbedding } from "@tryhamster/gerbil/llamaindex";
02import {
03 VectorStoreIndex,
04 SubQuestionQueryEngine,
05 QueryEngineTool,
06 Settings,
07} from "llamaindex";
08
09Settings.llm = new GerbilLLM({ model: "qwen3-0.6b" });
10Settings.embedModel = new GerbilEmbedding();
11
12// Create multiple indexes for different topics
13const docsIndex = await VectorStoreIndex.fromDocuments(docsDocs);
14const apiIndex = await VectorStoreIndex.fromDocuments(apiDocs);
15
16// Create query engine tools
17const queryEngineTools = [
18 new QueryEngineTool({
19 queryEngine: docsIndex.asQueryEngine(),
20 metadata: {
21 name: "docs",
22 description: "Documentation and guides",
23 },
24 }),
25 new QueryEngineTool({
26 queryEngine: apiIndex.asQueryEngine(),
27 metadata: {
28 name: "api",
29 description: "API reference",
30 },
31 }),
32];
33
34// Create sub-question engine
35const queryEngine = SubQuestionQueryEngine.fromDefaults({
36 queryEngineTools,
37});
38
39// Complex query that spans multiple sources
40const response = await queryEngine.query(
41 "How do I install Gerbil and what API methods are available?"
42);