npm install cactus-react-native react-native-nitro-modulesGet started with Cactus in just a few lines of code:
import { CactusLM, type Message } from 'cactus-react-native';
// Create a new instance
const cactusLM = new CactusLM();
// Download the model
await cactusLM.download({
onProgress: (progress) => console.log(`Download: ${Math.round(progress * 100)}%`)
});
// Generate a completion
const messages: Message[] = [
{ role: 'user', content: 'What is the capital of France?' }
];
const result = await cactusLM.complete({ messages });
console.log(result.response); // "The capital of France is Paris."
// Clean up resources
await cactusLM.destroy();Using the React Hook:
import { useCactusLM } from 'cactus-react-native';
const App = () => {
const cactusLM = useCactusLM();
useEffect(() => {
// Download the model if not already available
if (!cactusLM.isDownloaded) {
cactusLM.download();
}
}, []);
const handleGenerate = () => {
// Generate a completion
cactusLM.complete({
messages: [{ role: 'user', content: 'Hello!' }],
});
};
if (cactusLM.isDownloading) {
return (
<Text>
Downloading model: {Math.round(cactusLM.downloadProgress * 100)}%
</Text>
);
}
return (
<>
<Button onPress={handleGenerate} title="Generate" />
<Text>{cactusLM.completion}</Text>
</>
);
};Choose model quantization and NPU acceleration with Pro models.
import { CactusLM } from 'cactus-react-native';
// Use int4 for faster performance and smaller file size
const cactusLM = new CactusLM({
model: 'lfm2-vl-450m',
options: {
quantization: 'int4', // 'int4' or 'int8'
pro: false
}
});
// Use pro models for NPU acceleration
const cactusPro = new CactusLM({
model: 'lfm2-vl-450m',
options: {
quantization: 'int4',
pro: true
}
});Generate text responses from the model by providing a conversation history.
import { CactusLM, type Message } from 'cactus-react-native';
const cactusLM = new CactusLM();
const messages: Message[] = [{ role: 'user', content: 'Hello, World!' }];
const onToken = (token: string) => { console.log('Token:', token) };
const result = await cactusLM.complete({ messages, onToken });
console.log('Completion result:', result);import { useCactusLM, type Message } from 'cactus-react-native';
const App = () => {
const cactusLM = useCactusLM();
const handleComplete = async () => {
const messages: Message[] = [{ role: 'user', content: 'Hello, World!' }];
const result = await cactusLM.complete({ messages });
console.log('Completion result:', result);
};
return (
<>
<Button title="Complete" onPress={handleComplete} />
<Text>{cactusLM.completion}</Text>
</>
);
};Vision allows you to pass images along with text prompts, enabling the model to analyze and understand visual content.
import { CactusLM, type Message } from 'cactus-react-native';
// Vision-capable model
const cactusLM = new CactusLM({ model: 'lfm2-vl-450m' });
const messages: Message[] = [
{
role: 'user',
content: "What's in the image?",
images: ['path/to/your/image'],
},
];
const result = await cactusLM.complete({ messages });
console.log('Response:', result.response);import { useCactusLM, type Message } from 'cactus-react-native';
const App = () => {
// Vision-capable model
const cactusLM = useCactusLM({ model: 'lfm2-vl-450m' });
const handleAnalyze = async () => {
const messages: Message[] = [
{
role: 'user',
content: "What's in the image?",
images: ['path/to/your/image'],
},
];
await cactusLM.complete({ messages });
};
return (
<>
<Button title="Analyze Image" onPress={handleAnalyze} />
<Text>{cactusLM.completion}</Text>
</>
);
};Enable the model to generate function calls by defining available tools and their parameters.
import { CactusLM, type Message, type Tool } from 'cactus-react-native';
const tools: Tool[] = [
{
name: 'get_weather',
description: 'Get current weather for a location',
parameters: {
type: 'object',
properties: {
location: {
type: 'string',
description: 'City name',
},
},
required: ['location'],
},
},
];
const cactusLM = new CactusLM();
const messages: Message[] = [
{ role: 'user', content: "What's the weather in San Francisco?" },
];
const result = await cactusLM.complete({ messages, tools });
console.log('Response:', result.response);
console.log('Function calls:', result.functionCalls);import { useCactusLM, type Message, type Tool } from 'cactus-react-native';
const tools: Tool[] = [
{
name: 'get_weather',
description: 'Get current weather for a location',
parameters: {
type: 'object',
properties: {
location: {
type: 'string',
description: 'City name',
},
},
required: ['location'],
},
},
];
const App = () => {
const cactusLM = useCactusLM();
const handleComplete = async () => {
const messages: Message[] = [
{ role: 'user', content: "What's the weather in San Francisco?" },
];
const result = await cactusLM.complete({ messages, tools });
console.log('Response:', result.response);
console.log('Function calls:', result.functionCalls);
};
return <Button title="Complete" onPress={handleComplete} />;
};RAG allows you to provide a corpus of documents that the model can reference during generation, enabling it to answer questions based on your data.
import { CactusLM, type Message } from 'cactus-react-native';
const cactusLM = new CactusLM({
corpusDir: 'path/to/your/corpus', // Directory containing .txt files
});
const messages: Message[] = [
{ role: 'user', content: 'What information is in the documents?' },
];
const result = await cactusLM.complete({ messages });
console.log(result.response);import { useCactusLM, type Message } from 'cactus-react-native';
const App = () => {
const cactusLM = useCactusLM({
corpusDir: 'path/to/your/corpus', // Directory containing .txt files
});
const handleAsk = async () => {
const messages: Message[] = [
{ role: 'user', content: 'What information is in the documents?' },
];
await cactusLM.complete({ messages });
};
return (
<>
<Button title="Ask Question" onPress={handleAsk} />
<Text>{cactusLM.completion}</Text>
</>
);
};Convert text into tokens using the model's tokenizer.
import { CactusLM } from 'cactus-react-native';
const cactusLM = new CactusLM();
const result = await cactusLM.tokenize({ text: 'Hello, World!' });
console.log('Token IDs:', result.tokens);import { useCactusLM } from 'cactus-react-native';
const App = () => {
const cactusLM = useCactusLM();
const handleTokenize = async () => {
const result = await cactusLM.tokenize({ text: 'Hello, World!' });
console.log('Token IDs:', result.tokens);
};
return <Button title="Tokenize" onPress={handleTokenize} />;
};Calculate perplexity scores for a window of tokens within a sequence.
import { CactusLM } from 'cactus-react-native';
const cactusLM = new CactusLM();
const tokens = [123, 456, 789, 101, 112];
const result = await cactusLM.scoreWindow({
tokens,
start: 1,
end: 3,
context: 2
});
console.log('Score:', result.score);import { useCactusLM } from 'cactus-react-native';
const App = () => {
const cactusLM = useCactusLM();
const handleScoreWindow = async () => {
const tokens = [123, 456, 789, 101, 112];
const result = await cactusLM.scoreWindow({
tokens,
start: 1,
end: 3,
context: 2
});
console.log('Score:', result.score);
};
return <Button title="Score Window" onPress={handleScoreWindow} />;
};Convert text and images into numerical vector representations that capture semantic meaning, useful for similarity search and semantic understanding.
import { CactusLM } from 'cactus-react-native';
const cactusLM = new CactusLM();
const result = await cactusLM.embed({ text: 'Hello, World!' });
console.log('Embedding vector:', result.embedding);
console.log('Embedding vector length:', result.embedding.length);import { useCactusLM } from 'cactus-react-native';
const App = () => {
const cactusLM = useCactusLM();
const handleEmbed = async () => {
const result = await cactusLM.embed({ text: 'Hello, World!' });
console.log('Embedding vector:', result.embedding);
console.log('Embedding vector length:', result.embedding.length);
};
return <Button title="Embed" onPress={handleEmbed} />;
};import { CactusLM } from 'cactus-react-native';
const cactusLM = new CactusLM({ model: 'lfm2-vl-450m' });
const result = await cactusLM.imageEmbed({ imagePath: 'path/to/your/image.jpg' });
console.log('Image embedding vector:', result.embedding);
console.log('Embedding vector length:', result.embedding.length);import { useCactusLM } from 'cactus-react-native';
const App = () => {
const cactusLM = useCactusLM({ model: 'lfm2-vl-450m' });
const handleImageEmbed = async () => {
const result = await cactusLM.imageEmbed({ imagePath: 'path/to/your/image.jpg' });
console.log('Image embedding vector:', result.embedding);
console.log('Embedding vector length:', result.embedding.length);
};
return <Button title="Embed Image" onPress={handleImageEmbed} />;
};The CactusLM supports a hybrid completion mode that falls back to a cloud-based LLM provider OpenRouter if local inference fails.
import { CactusLM, type Message } from 'cactus-react-native';
const cactusLM = new CactusLM();
const messages: Message[] = [
{ role: 'user', content: 'Hello, World!' }
];
// Falls back to remote if local fails
const result = await cactusLM.complete({
messages,
mode: 'hybrid'
});import { useCactusLM, type Message } from 'cactus-react-native';
const App = () => {
const cactusLM = useCactusLM();
const handleComplete = async () => {
const messages: Message[] = [
{ role: 'user', content: 'Hello, World!' }
];
// Falls back to remote if local fails
await cactusLM.complete({
messages,
mode: 'hybrid'
});
};
return (
<>
<Button title="Complete" onPress={handleComplete} />
<Text>{cactusLM.completion}</Text>
</>
);
};The CactusSTT class provides audio transcription and audio embedding capabilities using Whisper models.
Transcribe audio to text with streaming support. Accepts either a file path or raw PCM audio samples.
import { CactusSTT } from 'cactus-react-native';
const cactusSTT = new CactusSTT({ model: 'whisper-small' });
await cactusSTT.init();
// Transcribe from file path
const result = await cactusSTT.transcribe({
audio: 'path/to/audio.wav',
onToken: (token) => console.log('Token:', token)
});
console.log('Transcription:', result.response);
// Or transcribe from raw PCM samples
const pcmSamples: number[] = [/* ... */];
const result2 = await cactusSTT.transcribe({
audio: pcmSamples,
onToken: (token) => console.log('Token:', token)
});
console.log('Transcription:', result2.response);import { useCactusSTT } from 'cactus-react-native';
const App = () => {
const cactusSTT = useCactusSTT({ model: 'whisper-small' });
const handleTranscribe = async () => {
// Transcribe from file path
const result = await cactusSTT.transcribe({
audio: 'path/to/audio.wav',
});
console.log('Transcription:', result.response);
const pcmSamples: number[] = [/* ... */];
const result2 = await cactusSTT.transcribe({
audio: pcmSamples,
});
console.log('Transcription:', result2.response);
};
return (
<>
<Button onPress={handleTranscribe} title="Transcribe" />
<Text>{cactusSTT.transcription}</Text>
</>
);
};Transcribe audio in real-time with incremental results.
import { CactusSTT } from 'cactus-react-native';
const cactusSTT = new CactusSTT({ model: 'whisper-small' });
await cactusSTT.streamTranscribeInit();
const audioChunk: number[] = [/* PCM samples */];
await cactusSTT.streamTranscribeInsert({ audio: audioChunk });
const result = await cactusSTT.streamTranscribeProcess({
options: { confirmationThreshold: 0.95 }
});
console.log('Confirmed:', result.confirmed);
console.log('Pending:', result.pending);
const final = await cactusSTT.streamTranscribeFinalize();
await cactusSTT.streamTranscribeDestroy();import { useCactusSTT } from 'cactus-react-native';
const App = () => {
const cactusSTT = useCactusSTT({ model: 'whisper-small' });
const handleStream = async () => {
await cactusSTT.streamTranscribeInit();
const audioChunk: number[] = [/* PCM samples */];
await cactusSTT.streamTranscribeInsert({ audio: audioChunk });
await cactusSTT.streamTranscribeProcess();
};
return (
<>
<Button onPress={handleStream} title="Stream" />
<Text>{cactusSTT.streamTranscribeConfirmed}</Text>
<Text>{cactusSTT.streamTranscribePending}</Text>
</>
);
};Generate embeddings from audio files for audio understanding.
import { CactusSTT } from 'cactus-react-native';
const cactusSTT = new CactusSTT();
await cactusSTT.init();
const result = await cactusSTT.audioEmbed({
audioPath: 'path/to/audio.wav'
});
console.log('Audio embedding vector:', result.embedding);
console.log('Embedding vector length:', result.embedding.length);import { useCactusSTT } from 'cactus-react-native';
const App = () => {
const cactusSTT = useCactusSTT();
const handleAudioEmbed = async () => {
const result = await cactusSTT.audioEmbed({
audioPath: 'path/to/audio.wav'
});
console.log('Audio embedding vector:', result.embedding);
console.log('Embedding vector length:', result.embedding.length);
};
return <Button title="Embed Audio" onPress={handleAudioEmbed} />;
};The CactusIndex class provides a vector database for storing and querying embeddings with metadata. Enabling similarity search and retrieval.
import { CactusIndex } from 'cactus-react-native';
const cactusIndex = new CactusIndex('my-index', 1024);
await cactusIndex.init();import { useCactusIndex } from 'cactus-react-native';
const App = () => {
const cactusIndex = useCactusIndex({
name: 'my-index',
embeddingDim: 1024
});
const handleInit = async () => {
await cactusIndex.init();
};
return <Button title="Initialize Index" onPress={handleInit} />
};Add documents with their embeddings and metadata to the index.
import { CactusIndex } from 'cactus-react-native';
const cactusIndex = new CactusIndex('my-index', 1024);
await cactusIndex.init();
await cactusIndex.add({
ids: [1, 2, 3],
documents: ['First document', 'Second document', 'Third document'],
embeddings: [
[0.1, 0.2, ...],
[0.3, 0.4, ...],
[0.5, 0.6, ...]
],
metadatas: ['metadata1', 'metadata2', 'metadata3']
});import { useCactusIndex } from 'cactus-react-native';
const App = () => {
const cactusIndex = useCactusIndex({
name: 'my-index',
embeddingDim: 1024
});
const handleAdd = async () => {
await cactusIndex.add({
ids: [1, 2, 3],
documents: ['First document', 'Second document', 'Third document'],
embeddings: [
[0.1, 0.2, ...],
[0.3, 0.4, ...],
[0.5, 0.6, ...]
],
metadatas: ['metadata1', 'metadata2', 'metadata3']
});
};
return <Button title="Add Documents" onPress={handleAdd} />;
};Search for similar documents using embedding vectors.
import { CactusIndex } from 'cactus-react-native';
const cactusIndex = new CactusIndex('my-index', 1024);
await cactusIndex.init();
const result = await cactusIndex.query({
embeddings: [[0.1, 0.2, ...]],
options: {
topK: 5,
scoreThreshold: 0.7
}
});
console.log('IDs:', result.ids);
console.log('Scores:', result.scores);import { useCactusIndex } from 'cactus-react-native';
const App = () => {
const cactusIndex = useCactusIndex({
name: 'my-index',
embeddingDim: 1024
});
const handleQuery = async () => {
const result = await cactusIndex.query({
embeddings: [[0.1, 0.2, ...]],
options: {
topK: 5,
scoreThreshold: 0.7
}
});
console.log('IDs:', result.ids);
console.log('Scores:', result.scores);
};
return <Button title="Query Index" onPress={handleQuery} />;
};Get documents by their IDs.
import { CactusIndex } from 'cactus-react-native';
const cactusIndex = new CactusIndex('my-index', 1024);
await cactusIndex.init();
const result = await cactusIndex.get({ ids: [1, 2, 3] });
console.log('Documents:', result.documents);
console.log('Metadatas:', result.metadatas);
console.log('Embeddings:', result.embeddings);import { useCactusIndex } from 'cactus-react-native';
const App = () => {
const cactusIndex = useCactusIndex({
name: 'my-index',
embeddingDim: 1024
});
const handleGet = async () => {
const result = await cactusIndex.get({ ids: [1, 2, 3] });
console.log('Documents:', result.documents);
console.log('Metadatas:', result.metadatas);
console.log('Embeddings:', result.embeddings);
};
return <Button title="Get Documents" onPress={handleGet} />;
};Mark documents as deleted by their IDs.
import { CactusIndex } from 'cactus-react-native';
const cactusIndex = new CactusIndex('my-index', 1024);
await cactusIndex.init();
await cactusIndex.delete({ ids: [1, 2, 3] });import { useCactusIndex } from 'cactus-react-native';
const App = () => {
const cactusIndex = useCactusIndex({
name: 'my-index',
embeddingDim: 1024
});
const handleDelete = async () => {
await cactusIndex.delete({ ids: [1, 2, 3] });
};
return <Button title="Delete Documents" onPress={handleDelete} />;
};Optimize the index by removing deleted documents and reorganizing data.
import { CactusIndex } from 'cactus-react-native';
const cactusIndex = new CactusIndex('my-index', 1024);
await cactusIndex.init();
await cactusIndex.compact();import { useCactusIndex } from 'cactus-react-native';
const App = () => {
const cactusIndex = useCactusIndex({
name: 'my-index',
embeddingDim: 1024
});
const handleCompact = async () => {
await cactusIndex.compact();
};
return <Button title="Compact Index" onPress={handleCompact} />;
};new CactusLM(params?: CactusLMParams)
Parameters:
model- Model slug or absolute path to Cactus model (default:'qwen3-0.6b').contextSize- Context window size (default:2048).corpusDir- Directory containing text files for RAG (default:undefined).options- Model options for quantization and NPU acceleration:quantization- Quantization type:'int4'|'int8'(default:'int4').pro- Enable NPU-accelerated models (default:false).
download(params?: CactusLMDownloadParams): Promise<void>
Downloads the model. If the model is already downloaded, returns immediately with progress 1. Throws an error if a download is already in progress.
Parameters:
onProgress- Callback for download progress (0-1).
init(): Promise<void>
Initializes the model and prepares it for inference. Safe to call multiple times (idempotent). Throws an error if the model is not downloaded yet.
complete(params: CactusLMCompleteParams): Promise<CactusLMCompleteResult>
Performs text completion with optional streaming and tool support. Automatically calls init() if not already initialized. Throws an error if a generation (completion or embedding) is already in progress.
Parameters:
messages- Array ofMessageobjects.options- Generation options:temperature- Sampling temperature (default: model-optimized).topP- Nucleus sampling threshold (default: model-optimized).topK- Top-K sampling limit (default: model-optimized).maxTokens- Maximum number of tokens to generate (default:512).stopSequences- Array of strings to stop generation (default:undefined).forceTools- Force the model to call one of the provided tools (default:false).
tools- Array ofToolobjects for function calling (default:undefined).onToken- Callback for streaming tokens.mode- Completion mode:'local'|'hybrid'(default:'local')
tokenize(params: CactusLMTokenizeParams): Promise<CactusLMTokenizeResult>
Converts text into tokens using the model's tokenizer.
Parameters:
text- Text to tokenize.
scoreWindow(params: CactusLMScoreWindowParams): Promise<CactusLMScoreWindowResult>
Calculates perplexity scores for a window of tokens within a sequence.
Parameters:
tokens- Array of token IDs.start- Start index of the window.end- End index of the window.context- Number of context tokens before the window.
embed(params: CactusLMEmbedParams): Promise<CactusLMEmbedResult>
Generates embeddings for the given text. Automatically calls init() if not already initialized. Throws an error if a generation (completion or embedding) is already in progress.
Parameters:
text- Text to embed.normalize- Whether to normalize the embedding vector (default:false).
imageEmbed(params: CactusLMImageEmbedParams): Promise<CactusLMImageEmbedResult>
Generates embeddings for the given image. Requires a vision-capable model. Automatically calls init() if not already initialized. Throws an error if a generation (completion or embedding) is already in progress.
Parameters:
imagePath- Path to the image file.
stop(): Promise<void>
Stops ongoing generation.
reset(): Promise<void>
Resets the model's internal state, clearing any cached context. Automatically calls stop() first.
destroy(): Promise<void>
Releases all resources associated with the model. Automatically calls stop() first. Safe to call even if the model is not initialized.
getModels(): CactusModel[]
Returns available models.
The useCactusLM hook manages a CactusLM instance with reactive state. When model parameters (model, contextSize, corpusDir, options) change, the hook creates a new instance and resets all state. The hook automatically cleans up resources when the component unmounts.
completion: string- Current generated text. Automatically accumulated during streaming. Cleared before each new completion and when callingreset()ordestroy().isGenerating: boolean- Whether the model is currently generating (completion or embedding). Both operations share this flag.isInitializing: boolean- Whether the model is initializing.isDownloaded: boolean- Whether the model is downloaded locally. Automatically checked when the hook mounts or model changes.isDownloading: boolean- Whether the model is being downloaded.downloadProgress: number- Download progress (0-1). Reset to0after download completes.error: string | null- Last error message from any operation, ornullif there is no error. Cleared before starting new operations.
download(params?: CactusLMDownloadParams): Promise<void>- Downloads the model. UpdatesisDownloadinganddownloadProgressstate during download. SetsisDownloadedtotrueon success.init(): Promise<void>- Initializes the model for inference. SetsisInitializingtotrueduring initialization.complete(params: CactusLMCompleteParams): Promise<CactusLMCompleteResult>- Generates text completions. Automatically accumulates tokens in thecompletionstate during streaming. SetsisGeneratingtotruewhile generating. Clearscompletionbefore starting.tokenize(params: CactusLMTokenizeParams): Promise<CactusLMTokenizeResult>- Converts text into tokens. SetsisGeneratingtotrueduring operation.scoreWindow(params: CactusLMScoreWindowParams): Promise<CactusLMScoreWindowResult>- Calculates perplexity scores for a window of tokens. SetsisGeneratingtotrueduring operation.embed(params: CactusLMEmbedParams): Promise<CactusLMEmbedResult>- Generates embeddings for the given text. SetsisGeneratingtotrueduring operation.imageEmbed(params: CactusLMImageEmbedParams): Promise<CactusLMImageEmbedResult>- Generates embeddings for the given image. SetsisGeneratingtotruewhile generating.stop(): Promise<void>- Stops ongoing generation. Clears any errors.reset(): Promise<void>- Resets the model's internal state, clearing cached context. Also clears thecompletionstate.destroy(): Promise<void>- Releases all resources associated with the model. Clears thecompletionstate. Automatically called when the component unmounts.getModels(): CactusModel[]- Returns available models.
new CactusSTT(params?: CactusSTTParams)
Parameters:
model- Model slug or absolute path to Cactus model (default:'whisper-small').contextSize- Context window size (default:2048).options- Model options for quantization and NPU acceleration:quantization- Quantization type:'int4'|'int8'(default:'int4').pro- Enable NPU-accelerated models (default:false).
download(params?: CactusSTTDownloadParams): Promise<void>
Downloads the model. If the model is already downloaded, returns immediately with progress 1. Throws an error if a download is already in progress.
Parameters:
onProgress- Callback for download progress (0-1).
init(): Promise<void>
Initializes the model and prepares it for inference. Safe to call multiple times (idempotent). Throws an error if the model is not downloaded yet.
transcribe(params: CactusSTTTranscribeParams): Promise<CactusSTTTranscribeResult>
Transcribes audio to text with optional streaming support. Accepts either a file path or raw PCM audio samples. Automatically calls init() if not already initialized. Throws an error if a generation is already in progress.
Parameters:
audio- Path to the audio file or raw PCM samples.prompt- Optional prompt to guide transcription (default:'<|startoftranscript|><|en|><|transcribe|><|notimestamps|>').options- Transcription options:temperature- Sampling temperature (default: model-optimized).topP- Nucleus sampling threshold (default: model-optimized).topK- Top-K sampling limit (default: model-optimized).maxTokens- Maximum number of tokens to generate (default:512).stopSequences- Array of strings to stop generation (default:undefined).
onToken- Callback for streaming tokens.
audioEmbed(params: CactusSTTAudioEmbedParams): Promise<CactusSTTAudioEmbedResult>
Generates embeddings for the given audio file. Automatically calls init() if not already initialized. Throws an error if a generation is already in progress.
Parameters:
audioPath- Path to the audio file.
streamTranscribeInit(): Promise<void>
Initializes a streaming transcription session. Automatically calls init() if not already initialized.
streamTranscribeInsert(params: CactusSTTStreamTranscribeInsertParams): Promise<void>
Inserts PCM audio samples into the streaming buffer.
Parameters:
audio- Array of PCM audio samples.
streamTranscribeProcess(params?: CactusSTTStreamTranscribeProcessParams): Promise<CactusSTTStreamTranscribeProcessResult>
Processes accumulated audio and returns incremental transcription results.
Parameters:
options- Processing options:confirmationThreshold- Confidence threshold for confirming text.
streamTranscribeFinalize(): Promise<CactusSTTStreamTranscribeFinalizeResult>
Finalizes the streaming session and returns remaining transcription text.
streamTranscribeDestroy(): Promise<void>
Destroys the streaming session and releases resources.
stop(): Promise<void>
Stops ongoing transcription or embedding generation.
reset(): Promise<void>
Resets the model's internal state. Automatically calls stop() first.
destroy(): Promise<void>
Releases all resources associated with the model. Automatically calls stop() first. Safe to call even if the model is not initialized.
getModels(): CactusModel[]
Returns available speech-to-text models.
The useCactusSTT hook manages a CactusSTT instance with reactive state. When model parameters (model, contextSize, options) change, the hook creates a new instance and resets all state. The hook automatically cleans up resources when the component unmounts.
transcription: string- Current transcription text. Automatically accumulated during streaming. Cleared before each new transcription and when callingreset()ordestroy().streamTranscribeConfirmed: string- Accumulated confirmed text from streaming transcription.streamTranscribePending: string- Current pending text from streaming transcription.isGenerating: boolean- Whether the model is currently generating (transcription or embedding). Both operations share this flag.isStreamTranscribing: boolean- Whether a streaming transcription session is active.isInitializing: boolean- Whether the model is initializing.isDownloaded: boolean- Whether the model is downloaded locally. Automatically checked when the hook mounts or model changes.isDownloading: boolean- Whether the model is being downloaded.downloadProgress: number- Download progress (0-1). Reset to0after download completes.error: string | null- Last error message from any operation, ornullif there is no error. Cleared before starting new operations.
download(params?: CactusSTTDownloadParams): Promise<void>- Downloads the model. UpdatesisDownloadinganddownloadProgressstate during download. SetsisDownloadedtotrueon success.init(): Promise<void>- Initializes the model for inference. SetsisInitializingtotrueduring initialization.transcribe(params: CactusSTTTranscribeParams): Promise<CactusSTTTranscribeResult>- Transcribes audio to text. Automatically accumulates tokens in thetranscriptionstate during streaming. SetsisGeneratingtotruewhile generating. Clearstranscriptionbefore starting.audioEmbed(params: CactusSTTAudioEmbedParams): Promise<CactusSTTAudioEmbedResult>- Generates embeddings for the given audio. SetsisGeneratingtotrueduring operation.streamTranscribeInit(): Promise<void>- Initializes a streaming transcription session. SetsisStreamTranscribingtotrue.streamTranscribeInsert(params: CactusSTTStreamTranscribeInsertParams): Promise<void>- Inserts audio chunks into the streaming buffer.streamTranscribeProcess(params?: CactusSTTStreamTranscribeProcessParams): Promise<CactusSTTStreamTranscribeProcessResult>- Processes audio and returns results. Automatically accumulates confirmed text instreamTranscribeConfirmedand updatesstreamTranscribePending.streamTranscribeFinalize(): Promise<CactusSTTStreamTranscribeFinalizeResult>- Finalizes streaming and returns remaining text.streamTranscribeDestroy(): Promise<void>- Destroys the streaming session. SetsisStreamTranscribingtofalse.stop(): Promise<void>- Stops ongoing generation. Clears any errors.reset(): Promise<void>- Resets the model's internal state. Also clears thetranscriptionstate.destroy(): Promise<void>- Releases all resources associated with the model. Clears thetranscriptionstate. Automatically called when the component unmounts.getModels(): CactusModel[]- Returns available speech-to-text models.
new CactusIndex(name: string, embeddingDim: number)
Parameters:
name- Name of the index.embeddingDim- Dimension of the embedding vectors.
init(): Promise<void>
Initializes the index and prepares it for operations. Must be called before using any other methods.
add(params: CactusIndexAddParams): Promise<void>
Adds documents with their embeddings and metadata to the index.
Parameters:
ids- Array of document IDs.documents- Array of document texts.embeddings- Array of embedding vectors (each vector must matchembeddingDim).metadatas- Optional array of metadata strings.
query(params: CactusIndexQueryParams): Promise<CactusIndexQueryResult>
Searches for similar documents using embedding vectors.
Parameters:
embeddings- Array of query embedding vectors.options- Query options:topK- Number of top results to return (default: 10).scoreThreshold- Minimum similarity score threshold (default: -1.0).
get(params: CactusIndexGetParams): Promise<CactusIndexGetResult>
Retrieves documents by their IDs.
Parameters:
ids- Array of document IDs to retrieve.
delete(params: CactusIndexDeleteParams): Promise<void>
Deletes documents from the index by their IDs.
Parameters:
ids- Array of document IDs to delete.
compact(): Promise<void>
Optimizes the index by removing deleted documents and reorganizing data for better performance. Call after a series of deletions.
destroy(): Promise<void>
Releases all resources associated with the index from memory.
The useCactusIndex hook manages a CactusIndex instance with reactive state. When index parameters (name or embeddingDim) change, the hook creates a new instance and resets all state. The hook automatically cleans up resources when the component unmounts.
isInitializing: boolean- Whether the index is initializing.isProcessing: boolean- Whether the index is processing an operation (add, query, get, delete, or compact).error: string | null- Last error message from any operation, ornullif there is no error. Cleared before starting new operations.
init(): Promise<void>- Initializes the index. SetsisInitializingtotrueduring initialization.add(params: CactusIndexAddParams): Promise<void>- Adds documents to the index. SetsisProcessingtotrueduring operation.query(params: CactusIndexQueryParams): Promise<CactusIndexQueryResult>- Searches for similar documents. SetsisProcessingtotrueduring operation.get(params: CactusIndexGetParams): Promise<CactusIndexGetResult>- Retrieves documents by IDs. SetsisProcessingtotrueduring operation.delete(params: CactusIndexDeleteParams): Promise<void>- Deletes documents. SetsisProcessingtotrueduring operation.compact(): Promise<void>- Optimizes the index. SetsisProcessingtotrueduring operation.destroy(): Promise<void>- Releases all resources. Automatically called when the component unmounts.
interface CactusLMParams {
model?: string;
contextSize?: number;
corpusDir?: string;
options?: ModelOptions;
}interface CactusLMDownloadParams {
onProgress?: (progress: number) => void;
}interface Message {
role: 'user' | 'assistant' | 'system';
content?: string;
images?: string[];
}interface CompleteOptions {
temperature?: number;
topP?: number;
topK?: number;
maxTokens?: number;
stopSequences?: string[];
forceTools?: boolean;
}interface Tool {
name: string;
description: string;
parameters: {
type: 'object';
properties: {
[key: string]: {
type: string;
description: string;
};
};
required: string[];
};
}interface CactusLMCompleteParams {
messages: Message[];
options?: CompleteOptions;
tools?: Tool[];
onToken?: (token: string) => void;
mode?: 'local' | 'hybrid';
}interface CactusLMCompleteResult {
success: boolean;
response: string;
functionCalls?: {
name: string;
arguments: { [key: string]: any };
}[];
timeToFirstTokenMs: number;
totalTimeMs: number;
tokensPerSecond: number;
prefillTokens: number;
decodeTokens: number;
totalTokens: number;
}interface CactusLMTokenizeParams {
text: string;
}interface CactusLMTokenizeResult {
tokens: number[];
}interface CactusLMScoreWindowParams {
tokens: number[];
start: number;
end: number;
context: number;
}interface CactusLMScoreWindowResult {
score: number;
}interface CactusLMEmbedParams {
text: string;
normalize?: boolean;
}interface CactusLMEmbedResult {
embedding: number[];
}interface CactusLMImageEmbedParams {
imagePath: string;
}interface CactusLMImageEmbedResult {
embedding: number[];
}interface CactusModel {
completion: boolean;
tools: boolean;
vision: boolean;
embed: boolean;
speech: boolean;
quantization: {
int4: {
sizeMb: number;
url: string;
pro?: {
apple: string;
};
};
int8: {
sizeMb: number;
url: string;
pro?: {
apple: string;
};
};
};
}interface ModelOptions {
quantization: 'int4' | 'int8';
pro: boolean;
}interface CactusSTTParams {
model?: string;
contextSize?: number;
options?: ModelOptions;
}interface CactusSTTDownloadParams {
onProgress?: (progress: number) => void;
}interface TranscribeOptions {
temperature?: number;
topP?: number;
topK?: number;
maxTokens?: number;
stopSequences?: string[];
}interface CactusSTTTranscribeParams {
audio: string | number[];
prompt?: string;
options?: TranscribeOptions;
onToken?: (token: string) => void;
}interface CactusSTTTranscribeResult {
success: boolean;
response: string;
timeToFirstTokenMs: number;
totalTimeMs: number;
tokensPerSecond: number;
prefillTokens: number;
decodeTokens: number;
totalTokens: number;
}interface CactusSTTAudioEmbedParams {
audioPath: string;
}interface CactusSTTAudioEmbedResult {
embedding: number[];
}interface CactusSTTStreamTranscribeInsertParams {
audio: number[];
}interface StreamTranscribeProcessOptions {
confirmationThreshold?: number;
}interface CactusSTTStreamTranscribeProcessParams {
options?: StreamTranscribeProcessOptions;
}interface CactusSTTStreamTranscribeProcessResult {
success: boolean;
confirmed: string;
pending: string;
}interface CactusSTTStreamTranscribeFinalizeResult {
success: boolean;
confirmed: string;
}interface CactusIndexParams {
name: string;
embeddingDim: number;
}interface CactusIndexAddParams {
ids: number[];
documents: string[];
embeddings: number[][];
metadatas?: string[];
}interface CactusIndexGetParams {
ids: number[];
}interface CactusIndexGetResult {
documents: string[];
metadatas: string[];
embeddings: number[][];
}interface IndexQueryOptions {
topK?: number;
scoreThreshold?: number;
}interface CactusIndexQueryParams {
embeddings: number[][];
options?: IndexQueryOptions;
}interface CactusIndexQueryResult {
ids: number[][];
scores: number[][];
}interface CactusIndexDeleteParams {
ids: number[];
}Cactus offers powerful telemetry for all your projects. Create a token on the Cactus dashboard.
import { CactusConfig } from 'cactus-react-native';
// Enable Telemetry for your project
CactusConfig.telemetryToken = 'your-telemetry-token-here';
// Disable telemetry
CactusConfig.isTelemetryEnabled = false;Enable cloud fallback.
import { CactusConfig } from 'cactus-react-native';
// Set your Cactus token for hybrid mode
CactusConfig.cactusToken = 'your-cactus-token-here';Enable NPU-accelerated models for enhanced performance.
import { CactusConfig } from 'cactus-react-native';
// Set your Cactus Pro key
CactusConfig.cactusProKey = 'your-cactus-pro-key-here';- Model Selection - Choose smaller models for faster inference on mobile devices.
- Context Size - Reduce the context size to lower memory usage.
- Memory Management - Always call
destroy()when you're done with models to free up resources.
Check out our example app for a complete React Native implementation.
