Skip to content

Commit 3d88cee

Browse files
authored
[Controller] Introduce config for controller (LMCache#2247)
* Introduce config for controller Signed-off-by: baoloongmao <baoloongmao@tencent.com> * Address comment from jiayi Signed-off-by: baoloongmao <baoloongmao@tencent.com> * Fix test Signed-off-by: baoloongmao <baoloongmao@tencent.com> --------- Signed-off-by: baoloongmao <baoloongmao@tencent.com>
1 parent d046832 commit 3d88cee

6 files changed

Lines changed: 1229 additions & 284 deletions

File tree

lmcache/v1/api_server/__main__.py

Lines changed: 94 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# SPDX-License-Identifier: Apache-2.0
22
# Standard
33
from contextlib import asynccontextmanager
4-
from typing import Dict, List, Optional, Tuple
4+
from typing import Any, Dict, List, Optional, Tuple
55
import argparse
66
import asyncio
77
import json
@@ -26,6 +26,9 @@
2626

2727
# First Party
2828
from lmcache.logging import init_logger
29+
from lmcache.v1.cache_controller.config import (
30+
load_controller_config_with_overrides,
31+
)
2932
from lmcache.v1.cache_controller.controller_manager import LMCacheControllerManager
3033
from lmcache.v1.cache_controller.message import ( # noqa: E501
3134
CheckFinishMsg,
@@ -51,11 +54,34 @@
5154
QueryWorkerInfoRetMsg,
5255
WorkerInfo,
5356
)
57+
from lmcache.v1.config_base import parse_command_line_extra_params
5458
from lmcache.v1.internal_api_server.api_registry import APIRegistry
5559

5660
logger = init_logger(__name__)
5761

5862

63+
def parse_extra_params(extra_args: list) -> Dict[str, Any]:
64+
"""Parse extra parameters in key=value format"""
65+
params = {}
66+
for arg in extra_args:
67+
if "=" in arg:
68+
key, value = arg.split("=", 1)
69+
key = key.lstrip("-")
70+
try:
71+
if value.lower() in ("true", "false"):
72+
params[key] = value.lower() == "true"
73+
elif value.isdigit():
74+
params[key] = int(value)
75+
elif value.replace(".", "", 1).isdigit():
76+
params[key] = float(value)
77+
else:
78+
params[key] = value
79+
except ValueError:
80+
params[key] = value
81+
logger.info(f"Extra parameter: {key} = {params[key]}")
82+
return params
83+
84+
5985
def create_app(
6086
controller_urls: dict[str, str],
6187
health_check_interval: int,
@@ -391,6 +417,9 @@ async def query_worker_info(req: QueryWorkerInfoRequest):
391417

392418
def main():
393419
parser = argparse.ArgumentParser()
420+
parser.add_argument(
421+
"--config", type=str, help="Path to controller configuration file"
422+
)
394423
parser.add_argument("--host", type=str, default="0.0.0.0")
395424
parser.add_argument("--port", type=int, default=9000)
396425
parser.add_argument(
@@ -418,33 +447,82 @@ def main():
418447
help="The lmcache worker timeout in seconds.",
419448
)
420449

421-
args = parser.parse_args()
450+
# Parse known args first, then handle extra parameters
451+
args, extra = parser.parse_known_args()
452+
extra_params = parse_command_line_extra_params(extra)
422453

423454
try:
424-
if args.monitor_ports is not None:
455+
# Build overrides dictionary from command-line arguments
456+
override_dict = {}
457+
458+
# Map command-line arguments to config keys
459+
arg_mappings = {
460+
"host": "controller_host",
461+
"port": "controller_port",
462+
"health_check_interval": "health_check_interval",
463+
"lmcache_worker_timeout": "lmcache_worker_timeout",
464+
}
465+
466+
for arg_name, config_key in arg_mappings.items():
467+
arg_value = getattr(args, arg_name)
468+
if arg_value is not None:
469+
override_dict[config_key] = arg_value
470+
471+
# Add extra parameters
472+
if extra_params:
473+
override_dict.update(extra_params)
474+
475+
# Load configuration using the generic utility function
476+
# This replaces the previous manual config loading code
477+
config = load_controller_config_with_overrides(
478+
config_file_path=args.config,
479+
overrides=override_dict,
480+
)
481+
482+
# Build controller URLs from config or arguments
483+
if config.controller_monitor_ports is not None:
425484
controller_urls = {
426-
"pull": f"{args.host}:{args.monitor_ports['pull']}",
427-
"reply": f"{args.host}:{args.monitor_ports['reply']}",
485+
"pull": (
486+
f"{config.controller_host}:"
487+
f"{config.controller_monitor_ports['pull']}"
488+
),
489+
"reply": (
490+
f"{config.controller_host}:"
491+
f"{config.controller_monitor_ports['reply']}"
492+
),
428493
}
429494
else:
430-
logger.warning(
431-
"Argument --monitor-port will be deprecated soon. "
432-
"Please use --monitor-ports instead."
433-
)
495+
if args.monitor_port != 9001: # Only warn if explicitly set
496+
logger.warning(
497+
"Argument --monitor-port will be deprecated soon. "
498+
"Please use --monitor-ports instead."
499+
)
434500
controller_urls = {
435-
"pull": f"{args.host}:{args.monitor_port}",
501+
"pull": f"{config.controller_host}:{args.monitor_port}",
436502
"reply": None,
437503
}
438-
app = create_app(
439-
controller_urls, args.health_check_interval, args.lmcache_worker_timeout
440-
)
441504

442-
logger.info(f"Starting LMCache controller at {args.host}:{args.port}")
443-
logger.info(f"Monitoring lmcache workers at ports {args.monitor_ports}")
505+
# Use config values for health check and timeout
506+
health_check_interval = config.health_check_interval
507+
lmcache_worker_timeout = config.lmcache_worker_timeout
508+
509+
app = create_app(controller_urls, health_check_interval, lmcache_worker_timeout)
510+
511+
logger.info(
512+
f"Starting LMCache controller at "
513+
f"{config.controller_host}:{config.controller_port}"
514+
)
515+
ports_message = f"Monitoring lmcache workers at ports {controller_urls}"
516+
logger.info(ports_message)
517+
logger.info(f"Health check interval: {health_check_interval}s")
518+
logger.info(f"Worker timeout: {lmcache_worker_timeout}s")
444519

445-
uvicorn.run(app, host=args.host, port=args.port)
520+
uvicorn.run(app, host=config.controller_host, port=config.controller_port)
446521
except TimeoutError as e:
447522
logger.error(e)
523+
except Exception as e:
524+
logger.error(f"Failed to start controller: {e}", exc_info=True)
525+
sys.exit(1) # Exit with error code
448526

449527

450528
if __name__ == "__main__":
Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
"""
3+
LMCache Controller Configuration
4+
5+
Configuration system for LMCache Controller that:
6+
- Loads configuration from YAML file or environment variables
7+
- Supports command-line parameter overrides
8+
- Provides thread-safe singleton pattern for global access
9+
"""
10+
11+
# Standard
12+
from typing import Any, Dict, Optional
13+
import json
14+
15+
# First Party
16+
from lmcache.logging import init_logger
17+
from lmcache.v1.config_base import (
18+
create_config_class,
19+
create_singleton_config,
20+
load_config_with_overrides,
21+
)
22+
23+
logger = init_logger(__name__)
24+
25+
26+
# Controller-specific configuration definitions
27+
_CONTROLLER_CONFIG_DEFINITIONS: dict[str, dict[str, Any]] = {
28+
# Basic controller configurations
29+
"controller_monitor_ports": {
30+
"type": Optional[dict],
31+
"default": '{"pull": 8300, "reply": 8400}',
32+
"env_converter": lambda x: (
33+
x if isinstance(x, dict) else json.loads(x) if x else None
34+
),
35+
"description": "JSON string of monitor ports",
36+
},
37+
"controller_host": {
38+
"type": str,
39+
"default": "0.0.0.0",
40+
"env_converter": str,
41+
"description": "Controller host address",
42+
},
43+
"controller_port": {
44+
"type": int,
45+
"default": 9000,
46+
"env_converter": int,
47+
"description": "Controller API server port",
48+
},
49+
"health_check_interval": {
50+
"type": int,
51+
"default": -1,
52+
"env_converter": int,
53+
"description": "Health check interval in seconds (-1 = disabled)",
54+
},
55+
"lmcache_worker_timeout": {
56+
"type": int,
57+
"default": 300,
58+
"env_converter": int,
59+
"description": "LMCache worker timeout in seconds",
60+
},
61+
# Extra configurations
62+
"extra_config": {
63+
"type": Optional[dict],
64+
"default": None,
65+
"env_converter": lambda x: (
66+
x if isinstance(x, dict) else json.loads(x) if x else None
67+
),
68+
"description": "Extra configuration parameters",
69+
},
70+
}
71+
72+
73+
# Specialized methods that are unique to ControllerConfig
74+
def _validate_config(self):
75+
"""Validate configuration parameters"""
76+
# Validate timeouts
77+
if self.health_check_interval != -1 and self.health_check_interval < 1:
78+
raise ValueError(f"Invalid health_check_interval: {self.health_check_interval}")
79+
return self
80+
81+
82+
def _log_config(self):
83+
"""Log configuration"""
84+
config_dict = {}
85+
for name in _CONTROLLER_CONFIG_DEFINITIONS:
86+
value = getattr(self, name)
87+
config_dict[name] = value
88+
89+
logger.info(f"Controller Configuration: {config_dict}")
90+
return self
91+
92+
93+
def _post_init(self):
94+
"""Post-initialization setup"""
95+
pass
96+
97+
98+
# Create configuration class using the base utility
99+
ControllerConfig = create_config_class(
100+
config_name="ControllerConfig",
101+
config_definitions=_CONTROLLER_CONFIG_DEFINITIONS,
102+
namespace_extras={
103+
"validate": _validate_config,
104+
"log_config": _log_config,
105+
"__post_init__": _post_init,
106+
},
107+
env_prefix="LMCACHE_CONTROLLER_",
108+
)
109+
110+
111+
# Create singleton getter using the base utility
112+
controller_get_or_create_config = create_singleton_config(
113+
getter_func_name="controller_get_or_create_config",
114+
config_class=ControllerConfig,
115+
config_env_var="LMCACHE_CONTROLLER_CONFIG_FILE",
116+
)
117+
118+
119+
def override_controller_config_from_dict(
120+
config: "ControllerConfig", # type: ignore[valid-type]
121+
overrides: dict[str, Any],
122+
):
123+
"""Override configuration with dictionary"""
124+
for key, value in overrides.items():
125+
if hasattr(config, key):
126+
old_value = getattr(config, key)
127+
128+
# Check if this field has an env_converter in the definitions
129+
if key in _CONTROLLER_CONFIG_DEFINITIONS:
130+
env_converter = _CONTROLLER_CONFIG_DEFINITIONS[key].get("env_converter")
131+
if env_converter:
132+
try:
133+
# Apply the env_converter to the value
134+
converted_value = env_converter(value)
135+
setattr(config, key, converted_value)
136+
except (ValueError, json.JSONDecodeError) as e:
137+
logger.warning(f"Failed to convert {key}={value!r}: {e}")
138+
# Keep the original value if conversion fails
139+
setattr(config, key, value)
140+
else:
141+
setattr(config, key, value)
142+
else:
143+
setattr(config, key, value)
144+
145+
new_value = getattr(config, key)
146+
if old_value != new_value:
147+
logger.info(
148+
f"Override controller config: {key} = {new_value} (was {old_value})"
149+
)
150+
else:
151+
logger.warning(f"Unknown controller config key: {key}, ignoring")
152+
153+
154+
def load_controller_config_with_overrides(
155+
config_file_path: Optional[str] = None,
156+
overrides: Optional[Dict[str, Any]] = None,
157+
) -> "ControllerConfig": # type: ignore[valid-type]
158+
"""
159+
Load controller configuration with support for file, env vars, and overrides.
160+
161+
This function uses the generic load_config_with_overrides utility from
162+
config_base.py to reduce code duplication.
163+
164+
Args:
165+
config_file_path: Optional direct path to config file
166+
overrides: Optional dictionary of configuration overrides
167+
168+
Returns:
169+
Loaded and validated ControllerConfig instance
170+
"""
171+
return load_config_with_overrides(
172+
config_class=ControllerConfig,
173+
config_file_env_var="LMCACHE_CONTROLLER_CONFIG_FILE",
174+
config_file_path=config_file_path,
175+
overrides=overrides,
176+
)

0 commit comments

Comments
 (0)