-
Notifications
You must be signed in to change notification settings - Fork 3.4k
Expand file tree
/
Copy pathretry_litellm.py
More file actions
114 lines (98 loc) · 3.69 KB
/
retry_litellm.py
File metadata and controls
114 lines (98 loc) · 3.69 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import asyncio
import inspect
from agents import (
Agent,
ModelRetrySettings,
ModelSettings,
RetryDecision,
RunConfig,
Runner,
retry_policies,
)
def format_error(error: object) -> str:
if not isinstance(error, BaseException):
return "Unknown error"
return str(error) or error.__class__.__name__
async def main() -> None:
apply_policies = retry_policies.any(
# On OpenAI-backed models, provider_suggested() follows provider retry advice,
# including fallback retryable statuses when x-should-retry is absent
# (for example 408/409/429/5xx).
retry_policies.provider_suggested(),
retry_policies.retry_after(),
retry_policies.network_error(),
retry_policies.http_status([408, 409, 429, 500, 502, 503, 504]),
)
async def policy(context) -> bool | RetryDecision:
raw_decision = apply_policies(context)
decision: bool | RetryDecision
if inspect.isawaitable(raw_decision):
decision = await raw_decision
else:
decision = raw_decision
if isinstance(decision, RetryDecision):
if not decision.retry:
print(
f"[retry] stop after attempt {context.attempt}/{context.max_retries + 1}: "
f"{format_error(context.error)}"
)
return False
print(
" | ".join(
part
for part in [
f"[retry] retry attempt {context.attempt}/{context.max_retries + 1}",
(
f"waiting {decision.delay:.2f}s"
if decision.delay is not None
else "using default backoff"
),
f"reason: {decision.reason}" if decision.reason else None,
f"error: {format_error(context.error)}",
]
if part is not None
)
)
return decision
if not decision:
print(
f"[retry] stop after attempt {context.attempt}/{context.max_retries + 1}: "
f"{format_error(context.error)}"
)
return decision
retry = ModelRetrySettings(
max_retries=4,
backoff={
"initial_delay": 0.5,
"max_delay": 5.0,
"multiplier": 2.0,
"jitter": True,
},
policy=policy,
)
# RunConfig-level model_settings are shared defaults for the run.
# If an Agent also defines model_settings, the Agent wins for overlapping
# keys, while nested objects like retry/backoff are merged.
run_config = RunConfig(model_settings=ModelSettings(retry=retry))
agent = Agent(
name="Assistant",
instructions="You are a concise assistant. Answer in 3 short bullet points at most.",
# Prefix with litellm/ to route this request through the LiteLLM adapter.
model="litellm/openai/gpt-4o-mini",
# This Agent repeats the same retry config for clarity. In real code you
# can keep shared defaults in RunConfig and only put per-agent overrides
# here when you need different retry behavior.
model_settings=ModelSettings(retry=retry),
)
print(
"Retry support is configured. You will only see [retry] logs if a transient failure happens."
)
result = await Runner.run(
agent,
"Explain exponential backoff for API retries in plain English.",
run_config=run_config,
)
print("\nFinal output:\n")
print(result.final_output)
if __name__ == "__main__":
asyncio.run(main())