Skip to content
This repository was archived by the owner on Apr 30, 2026. It is now read-only.

Commit 020928c

Browse files
feat: GeminiEmbedding rate-limit handling (#2237)
Co-authored-by: Marcus Schiesser <mail@marcusschiesser.de>
1 parent 91627dc commit 020928c

3 files changed

Lines changed: 76 additions & 3 deletions

File tree

.changeset/nice-zebras-taste.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
---
2+
"@llamaindex/google": minor
3+
"@llamaindex/examples": patch
4+
---
5+
6+
respect Gemini's requests-per-minute rate limit with waits
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
import { GEMINI_EMBEDDING_MODEL, GeminiEmbedding } from "@llamaindex/google";
2+
3+
const requests_per_minute_limit = 3000; // cf. https://ai.google.dev/gemini-api/docs/rate-limits
4+
5+
async function main() {
6+
if (!process.env.GOOGLE_API_KEY) {
7+
throw new Error("Please set the GOOGLE_API_KEY environment variable.");
8+
}
9+
const embedModel = new GeminiEmbedding({
10+
model: GEMINI_EMBEDDING_MODEL.EMBEDDING_001,
11+
});
12+
const texts = Array.from(
13+
{ length: requests_per_minute_limit + 1000 },
14+
(_, i) => `text ${i}`,
15+
);
16+
const embeddings = await embedModel.getTextEmbeddingsBatch(texts);
17+
console.log(`\nWe have ${embeddings.length} embeddings`);
18+
}
19+
20+
main().catch(console.error);

packages/providers/google/src/GeminiEmbedding.ts

Lines changed: 50 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,12 +43,59 @@ export class GeminiEmbedding extends BaseEmbedding {
4343
this.embedBatchSize = opts?.embedBatchSize ?? DEFAULT_EMBED_BATCH_SIZE;
4444
}
4545

46+
// Add a retry wrapper for embedContent to handle rate limits (5s wait, up to 20 tries)
47+
private async embedWithRetry(args: {
48+
model: string;
49+
contents: string | string[];
50+
}) {
51+
const MAX_TRIES = 20;
52+
const DELAY_MS = 5000;
53+
54+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
55+
const isRateLimitError = (err: any) => {
56+
if (!err) return false;
57+
return (
58+
err.status === 429 ||
59+
err.message.indexOf('"status":"RESOURCE_EXHAUSTED"') !== -1
60+
);
61+
};
62+
/* error looks like this
63+
{"error":{"code":429,"message":"You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/usage?tab=rate-limit. \n* Quota exceeded for metric: generativelanguage.googleapis.com/embed_content_paid_tier_requests, limit: 0","status":"RESOURCE_EXHAUSTED","details":[{"@type":"type.googleapis.com/google.rpc.Help","links":[{"description":"Learn more about Gemini API quotas","url":"https://ai.google.dev/gemini-api/docs/rate-limits"}]},{"@type":"type.googleapis.com/google.rpc.QuotaFailure","violations":[{"quotaMetric":"generativelanguage.googleapis.com/embed_content_paid_tier_requests","quotaId":"EmbedContentPerMinutePerProjectPerUserPerModel-PaidTier"}]}]}}
64+
*/
65+
66+
const sleep = (ms: number) => new Promise((res) => setTimeout(res, ms));
67+
68+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
69+
let lastErr: any = null;
70+
for (let attempt = 1; attempt <= MAX_TRIES; attempt++) {
71+
try {
72+
return await this.ai.models.embedContent(args);
73+
} catch (err) {
74+
lastErr = err;
75+
if (isRateLimitError(err) && attempt < MAX_TRIES) {
76+
await sleep(DELAY_MS);
77+
console.debug(
78+
`Gemini Embeddings rate limit error encountered. Retrying attempt ${attempt}...`,
79+
);
80+
continue;
81+
}
82+
throw err;
83+
}
84+
}
85+
// If we exit loop unexpectedly, throw the last error.
86+
throw lastErr;
87+
}
88+
4689
getTextEmbeddings = async (texts: string[]) => {
47-
const result = await this.ai.models.embedContent({
90+
const result = await this.embedWithRetry({
4891
model: this.model,
4992
contents: texts,
5093
});
51-
return result.embeddings?.map((embedding) => embedding.values ?? []) ?? [];
94+
95+
return (
96+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
97+
result.embeddings?.map((embedding: any) => embedding.values ?? []) ?? []
98+
);
5299
};
53100

54101
async getTextEmbeddingsBatch(
@@ -64,7 +111,7 @@ export class GeminiEmbedding extends BaseEmbedding {
64111
}
65112

66113
async getTextEmbedding(text: string): Promise<number[]> {
67-
const result = await this.ai.models.embedContent({
114+
const result = await this.embedWithRetry({
68115
model: this.model,
69116
contents: text,
70117
});

0 commit comments

Comments
 (0)