LlamaIndex
LLM and embedding integration for LlamaIndex. Build RAG applications with local models.
Installation
Terminal
npm install @tryhamster/gerbil llamaindexQuick Start
quick-start.ts
01import { GerbilLLM, GerbilEmbedding } from "@tryhamster/gerbil/llamaindex";02import { Document, VectorStoreIndex } from "llamaindex";03
04// Create LLM and embedding model05const llm = new GerbilLLM({ model: "qwen3-0.6b" });06const embedModel = new GerbilEmbedding();07
08// Create documents09const documents = [10 new Document({ text: "Gerbil runs LLMs locally" }),11 new Document({ text: "It supports WebGPU acceleration" }),12];13
14// Create index15const index = await VectorStoreIndex.fromDocuments(documents, {16 llm,17 embedModel,18});19
20// Query21const queryEngine = index.asQueryEngine();22const response = await queryEngine.query("What is Gerbil?");23
24console.log(response.toString());GerbilLLM
llm.ts
01import { GerbilLLM } from "@tryhamster/gerbil/llamaindex";02
03const llm = new GerbilLLM({04 // Model configuration05 model: "qwen3-0.6b",06 device: "auto",07 dtype: "q4",08
09 // Generation options10 maxTokens: 500,11 temperature: 0.7,12 topP: 0.9,13
14 // Thinking mode15 thinking: false,16});17
18// Direct completion19const response = await llm.complete("Explain recursion");20console.log(response.text);21
22// Chat completion23const chatResponse = await llm.chat({24 messages: [25 { role: "user", content: "Hello!" },26 ],27});28console.log(chatResponse.message.content);Streaming
streaming.ts
01import { GerbilLLM } from "@tryhamster/gerbil/llamaindex";02
03const llm = new GerbilLLM({ model: "qwen3-0.6b" });04
05// Stream completion06const stream = await llm.stream("Tell me a story");07
08for await (const chunk of stream) {09 process.stdout.write(chunk.delta);10}11
12// Stream chat13const chatStream = await llm.streamChat({14 messages: [15 { role: "user", content: "Explain hooks" },16 ],17});18
19for await (const chunk of chatStream) {20 process.stdout.write(chunk.delta);21}GerbilEmbedding
embeddings.ts
01import { GerbilEmbedding } from "@tryhamster/gerbil/llamaindex";02
03const embedModel = new GerbilEmbedding({04 model: "all-MiniLM-L6-v2", // Optional05});06
07// Single query embedding08const queryEmbedding = await embedModel.getQueryEmbedding("What is Gerbil?");09// Returns: number[]10
11// Multiple text embeddings12const textEmbeddings = await embedModel.getTextEmbeddings([13 "First document",14 "Second document",15]);16// Returns: number[][]Vector Store Index
vector-index.ts
01import { GerbilLLM, GerbilEmbedding } from "@tryhamster/gerbil/llamaindex";02import { Document, VectorStoreIndex, Settings } from "llamaindex";03
04// Set global defaults05Settings.llm = new GerbilLLM({ model: "qwen3-0.6b" });06Settings.embedModel = new GerbilEmbedding();07
08// Create documents09const documents = [10 new Document({11 text: "Gerbil is a local LLM library for Node.js",12 metadata: { source: "readme" },13 }),14 new Document({15 text: "It supports GPU acceleration via WebGPU",16 metadata: { source: "features" },17 }),18 new Document({19 text: "Models are downloaded and cached locally",20 metadata: { source: "features" },21 }),22];23
24// Create index25const index = await VectorStoreIndex.fromDocuments(documents);26
27// Query28const queryEngine = index.asQueryEngine();29const response = await queryEngine.query("How does Gerbil handle models?");30
31console.log(response.toString());32console.log("Sources:", response.sourceNodes);Chat Engine
chat-engine.ts
01import { GerbilLLM, GerbilEmbedding } from "@tryhamster/gerbil/llamaindex";02import { Document, VectorStoreIndex, Settings } from "llamaindex";03
04Settings.llm = new GerbilLLM({ model: "qwen3-0.6b" });05Settings.embedModel = new GerbilEmbedding();06
07// Create index from documents08const index = await VectorStoreIndex.fromDocuments(documents);09
10// Create chat engine11const chatEngine = index.asChatEngine();12
13// Chat with context14const response1 = await chatEngine.chat({15 message: "What is Gerbil?",16});17console.log(response1.toString());18
19// Follow-up (remembers context)20const response2 = await chatEngine.chat({21 message: "Does it work offline?",22});23console.log(response2.toString());24
25// Reset conversation26chatEngine.reset();Document Loaders
document-loaders.ts
01import { GerbilLLM, GerbilEmbedding } from "@tryhamster/gerbil/llamaindex";02import {03 SimpleDirectoryReader,04 VectorStoreIndex,05 Settings,06} from "llamaindex";07
08Settings.llm = new GerbilLLM({ model: "qwen3-0.6b" });09Settings.embedModel = new GerbilEmbedding();10
11// Load all documents from a directory12const reader = new SimpleDirectoryReader();13const documents = await reader.loadData("./docs");14
15// Create index16const index = await VectorStoreIndex.fromDocuments(documents);17
18// Query19const queryEngine = index.asQueryEngine();20const response = await queryEngine.query("Summarize the documentation");Custom Retriever
custom-retriever.ts
01import { GerbilLLM, GerbilEmbedding } from "@tryhamster/gerbil/llamaindex";02import {03 VectorStoreIndex,04 VectorIndexRetriever,05 ResponseSynthesizer,06 RetrieverQueryEngine,07 Settings,08} from "llamaindex";09
10Settings.llm = new GerbilLLM({ model: "qwen3-0.6b" });11Settings.embedModel = new GerbilEmbedding();12
13// Create index14const index = await VectorStoreIndex.fromDocuments(documents);15
16// Custom retriever with more results17const retriever = new VectorIndexRetriever({18 index,19 similarityTopK: 5, // Retrieve top 5 similar chunks20});21
22// Custom response synthesizer23const responseSynthesizer = new ResponseSynthesizer();24
25// Create query engine with custom components26const queryEngine = new RetrieverQueryEngine(retriever, responseSynthesizer);27
28const response = await queryEngine.query("What are the main features?");Thinking Mode
thinking.ts
01import { GerbilLLM } from "@tryhamster/gerbil/llamaindex";02
03const llm = new GerbilLLM({04 model: "qwen3-0.6b",05 thinking: true, // Enable chain-of-thought06});07
08const response = await llm.complete("What is 127 × 43? Show your work.");09
10// Access thinking process11console.log("Thinking:", response.raw?.thinking);12console.log("Answer:", response.text);Persistence
persistence.ts
01import { GerbilLLM, GerbilEmbedding } from "@tryhamster/gerbil/llamaindex";02import {03 VectorStoreIndex,04 storageContextFromDefaults,05 Settings,06} from "llamaindex";07
08Settings.llm = new GerbilLLM({ model: "qwen3-0.6b" });09Settings.embedModel = new GerbilEmbedding();10
11// Create storage context12const storageContext = await storageContextFromDefaults({13 persistDir: "./storage",14});15
16// Create and persist index17const index = await VectorStoreIndex.fromDocuments(documents, {18 storageContext,19});20
21// Later: Load from storage22const loadedIndex = await VectorStoreIndex.init({23 storageContext: await storageContextFromDefaults({24 persistDir: "./storage",25 }),26});27
28const queryEngine = loadedIndex.asQueryEngine();Metadata Filtering
metadata.ts
01import { GerbilLLM, GerbilEmbedding } from "@tryhamster/gerbil/llamaindex";02import { Document, VectorStoreIndex, MetadataFilters, Settings } from "llamaindex";03
04Settings.llm = new GerbilLLM({ model: "qwen3-0.6b" });05Settings.embedModel = new GerbilEmbedding();06
07// Documents with metadata08const documents = [09 new Document({10 text: "Installation guide for Linux",11 metadata: { category: "installation", os: "linux" },12 }),13 new Document({14 text: "Installation guide for macOS",15 metadata: { category: "installation", os: "macos" },16 }),17 new Document({18 text: "API reference documentation",19 metadata: { category: "api" },20 }),21];22
23const index = await VectorStoreIndex.fromDocuments(documents);24
25// Query with metadata filter26const queryEngine = index.asQueryEngine({27 preFilters: new MetadataFilters({28 filters: [29 { key: "category", value: "installation" },30 ],31 }),32});33
34const response = await queryEngine.query("How do I install?");Sub-Question Query Engine
sub-question.ts
01import { GerbilLLM, GerbilEmbedding } from "@tryhamster/gerbil/llamaindex";02import {03 VectorStoreIndex,04 SubQuestionQueryEngine,05 QueryEngineTool,06 Settings,07} from "llamaindex";08
09Settings.llm = new GerbilLLM({ model: "qwen3-0.6b" });10Settings.embedModel = new GerbilEmbedding();11
12// Create multiple indexes for different topics13const docsIndex = await VectorStoreIndex.fromDocuments(docsDocs);14const apiIndex = await VectorStoreIndex.fromDocuments(apiDocs);15
16// Create query engine tools17const queryEngineTools = [18 new QueryEngineTool({19 queryEngine: docsIndex.asQueryEngine(),20 metadata: {21 name: "docs",22 description: "Documentation and guides",23 },24 }),25 new QueryEngineTool({26 queryEngine: apiIndex.asQueryEngine(),27 metadata: {28 name: "api",29 description: "API reference",30 },31 }),32];33
34// Create sub-question engine35const queryEngine = SubQuestionQueryEngine.fromDefaults({36 queryEngineTools,37});38
39// Complex query that spans multiple sources40const response = await queryEngine.query(41 "How do I install Gerbil and what API methods are available?"42);