Skip to content

Commit 0a5f970

Browse files
ryanaolearyedoakes
andauthored
Update --labels and add --labels-file options for Label Selector API (#51706)
This PR updates the `--labels` option to `ray start` or `ray init` to accept a string list of key-value pairs mapping label names to label values. Labels follow [Kubernetes label syntax](https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#syntax-and-character-set). This PR also adds a `--labels-file` argument to support sourcing labels from a file. Files are expected to contain a valid JSON string containing a serialized key-value pair map. We use `parse_node_labels_json` to parse the argument passed to `--labels-file`. --------- Signed-off-by: Ryan O'Leary <ryanaoleary@google.com> Co-authored-by: Edward Oakes <ed.nmi.oakes@gmail.com>
1 parent 387fe53 commit 0a5f970

10 files changed

Lines changed: 280 additions & 58 deletions

File tree

java/test/src/main/java/io/ray/test/NodeLabelSchedulingTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ public void testEmptyNodeLabels() {
2424
}
2525

2626
public void testSetNodeLabels() {
27-
System.setProperty("ray.head-args.0", "--labels={\"gpu_type\":\"A100\",\"azone\":\"azone-1\"}");
27+
System.setProperty("ray.head-args.0", "--labels=\"gpu_type=A100,azone=azone-1\"");
2828
try {
2929
Ray.init();
3030
List<NodeInfo> nodeInfos = Ray.getRuntimeContext().getAllNodeInfo();

python/ray/_private/label_utils.py

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
import re
2+
import yaml
3+
from typing import Dict
4+
5+
import ray._private.ray_constants as ray_constants
6+
7+
# Regex patterns used to validate that labels conform to Kubernetes label syntax rules.
8+
# https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#syntax-and-character-set
9+
10+
# Regex for mandatory name (DNS label) or value
11+
# Examples:
12+
# Valid matches: "a", "label-name", "a-._b", "123", "this_is_a_valid_label"
13+
# Invalid matches: "-abc", "abc-", "my@label", "a" * 64
14+
LABEL_REGEX = re.compile(r"[a-zA-Z0-9]([a-zA-Z0-9_.-]*[a-zA-Z0-9]){0,62}")
15+
16+
# Regex for optional prefix (DNS subdomain)
17+
# Examples:
18+
# Valid matches: "abc", "sub.domain.example", "my-label", "123.456.789"
19+
# Invalid matches: "-abc", "prefix_", "sub..domain", sub.$$.example
20+
LABEL_PREFIX_REGEX = rf"^({LABEL_REGEX.pattern}?(\.{LABEL_REGEX.pattern}?)*)$"
21+
22+
23+
def parse_node_labels_string(labels_str: str) -> Dict[str, str]:
24+
labels = {}
25+
26+
# Remove surrounding quotes if they exist
27+
if len(labels_str) > 1 and labels_str.startswith('"') and labels_str.endswith('"'):
28+
labels_str = labels_str[1:-1]
29+
30+
if labels_str == "":
31+
return labels
32+
33+
# Labels argument should consist of a string of key=value pairs
34+
# separated by commas. Labels follow Kubernetes label syntax.
35+
label_pairs = labels_str.split(",")
36+
for pair in label_pairs:
37+
# Split each pair by `=`
38+
key_value = pair.split("=")
39+
if len(key_value) != 2:
40+
raise ValueError("Label value is not a key-value pair.")
41+
key = key_value[0].strip()
42+
value = key_value[1].strip()
43+
labels[key] = value
44+
45+
return labels
46+
47+
48+
def parse_node_labels_from_yaml_file(path: str) -> Dict[str, str]:
49+
if path == "":
50+
return {}
51+
with open(path, "r") as file:
52+
# Expects valid YAML content
53+
labels = yaml.safe_load(file)
54+
if not isinstance(labels, dict):
55+
raise ValueError(
56+
"The format after deserialization is not a key-value pair map."
57+
)
58+
for key, value in labels.items():
59+
if not isinstance(key, str):
60+
raise ValueError("The key is not string type.")
61+
if not isinstance(value, str):
62+
raise ValueError(f'The value of "{key}" is not string type.')
63+
64+
return labels
65+
66+
67+
def validate_node_labels(labels: Dict[str, str]):
68+
if labels is None:
69+
return
70+
for key in labels.keys():
71+
if key.startswith(ray_constants.RAY_DEFAULT_LABEL_KEYS_PREFIX):
72+
raise ValueError(
73+
f"Custom label keys `{key}` cannot start with the prefix "
74+
f"`{ray_constants.RAY_DEFAULT_LABEL_KEYS_PREFIX}`. "
75+
f"This is reserved for Ray defined labels."
76+
)
77+
if "/" in key:
78+
prefix, name = key.rsplit("/")
79+
if len(prefix) > 253 or not re.match(LABEL_PREFIX_REGEX, prefix):
80+
raise ValueError(
81+
f"Invalid label key prefix `{prefix}`. Prefix must be a series of DNS labels "
82+
f"separated by dots (.),not longer than 253 characters in total."
83+
)
84+
else:
85+
name = key
86+
if len(name) > 63 or not re.match(LABEL_REGEX, name):
87+
raise ValueError(
88+
f"Invalid label key name `{name}`. Name must be 63 chars or less beginning and ending "
89+
f"with an alphanumeric character ([a-z0-9A-Z]) with dashes (-), underscores (_),"
90+
f"dots (.), and alphanumerics between."
91+
)
92+
value = labels.get(key)
93+
if value is None or value == "":
94+
return
95+
if len(value) > 63 or not re.match(LABEL_REGEX, value):
96+
raise ValueError(
97+
f"Invalid label key value `{value}`. Value must be 63 chars or less beginning and ending "
98+
f"with an alphanumeric character ([a-z0-9A-Z]) with dashes (-), underscores (_),"
99+
f"dots (.), and alphanumerics between."
100+
)

python/ray/_private/parameter.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,8 @@
33
from typing import Dict, List, Optional
44

55
import ray._private.ray_constants as ray_constants
6-
from ray._private.utils import (
7-
validate_node_labels,
8-
check_ray_client_dependencies_installed,
9-
)
6+
from ray._private.label_utils import validate_node_labels
7+
from ray._private.utils import check_ray_client_dependencies_installed
108

119

1210
logger = logging.getLogger(__name__)

python/ray/_private/utils.py

Lines changed: 0 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1803,43 +1803,6 @@ def update_envs(env_vars: Dict[str, str]):
18031803
os.environ[key] = result
18041804

18051805

1806-
def parse_node_labels_json(
1807-
labels_json: str, cli_logger, cf, command_arg="--labels"
1808-
) -> Dict[str, str]:
1809-
try:
1810-
labels = json.loads(labels_json)
1811-
if not isinstance(labels, dict):
1812-
raise ValueError(
1813-
"The format after deserialization is not a key-value pair map"
1814-
)
1815-
for key, value in labels.items():
1816-
if not isinstance(key, str):
1817-
raise ValueError("The key is not string type.")
1818-
if not isinstance(value, str):
1819-
raise ValueError(f'The value of the "{key}" is not string type')
1820-
except Exception as e:
1821-
cli_logger.abort(
1822-
"`{}` is not a valid JSON string, detail error:{}"
1823-
"Valid values look like this: `{}`",
1824-
cf.bold(f"{command_arg}={labels_json}"),
1825-
str(e),
1826-
cf.bold(f'{command_arg}=\'{{"gpu_type": "A100", "region": "us"}}\''),
1827-
)
1828-
return labels
1829-
1830-
1831-
def validate_node_labels(labels: Dict[str, str]):
1832-
if labels is None:
1833-
return
1834-
for key in labels.keys():
1835-
if key.startswith(ray_constants.RAY_DEFAULT_LABEL_KEYS_PREFIX):
1836-
raise ValueError(
1837-
f"Custom label keys `{key}` cannot start with the prefix "
1838-
f"`{ray_constants.RAY_DEFAULT_LABEL_KEYS_PREFIX}`. "
1839-
f"This is reserved for Ray defined labels."
1840-
)
1841-
1842-
18431806
def parse_pg_formatted_resources_to_original(
18441807
pg_formatted_resources: Dict[str, float]
18451808
) -> Dict[str, float]:

python/ray/scripts/scripts.py

Lines changed: 45 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,14 @@
2525
import ray
2626
import ray._private.ray_constants as ray_constants
2727
import ray._private.services as services
28+
from ray._private.label_utils import (
29+
parse_node_labels_from_yaml_file,
30+
parse_node_labels_string,
31+
)
2832
from ray._private.utils import (
2933
check_ray_client_dependencies_installed,
3034
load_class,
3135
parse_resources_json,
32-
parse_node_labels_json,
3336
)
3437
from ray._private.internal_api import memory_summary
3538
from ray._private.usage import usage_lib
@@ -635,9 +638,19 @@ def debug(address: str, verbose: bool):
635638
"--labels",
636639
required=False,
637640
hidden=True,
638-
default="{}",
641+
default="",
639642
type=str,
640-
help="a JSON serialized dictionary mapping label name to label value.",
643+
help="a string list of key-value pairs mapping label name to label value."
644+
"These values take precedence over conflicting keys passed in from --labels-file."
645+
'Ex: --labels "key1=val1,key2=val2"',
646+
)
647+
@click.option(
648+
"--labels-file",
649+
required=False,
650+
hidden=True,
651+
default="",
652+
type=str,
653+
help="a path to a YAML file containing a dictionary mapping of label keys to values.",
641654
)
642655
@click.option(
643656
"--include-log-monitor",
@@ -695,6 +708,7 @@ def start(
695708
ray_debugger_external,
696709
disable_usage_stats,
697710
labels,
711+
labels_file,
698712
include_log_monitor,
699713
):
700714
"""Start Ray processes manually on the local machine."""
@@ -715,7 +729,34 @@ def start(
715729
node_ip_address = services.resolve_ip_for_localhost(node_ip_address)
716730

717731
resources = parse_resources_json(resources, cli_logger, cf)
718-
labels_dict = parse_node_labels_json(labels, cli_logger, cf)
732+
733+
# Compose labels passed in with `--labels` and `--labels-file`.
734+
# The label value from `--labels` will overrwite the value of any duplicate keys.
735+
try:
736+
labels_from_file_dict = parse_node_labels_from_yaml_file(labels_file)
737+
except Exception as e:
738+
cli_logger.abort(
739+
"The file at `{}` is not a valid YAML file, detailed error:{}"
740+
"Valid values look like this: `{}`",
741+
cf.bold(f"--labels-file={labels_file}"),
742+
str(e),
743+
cf.bold("--labels-file='gpu_type: A100\nregion: us'"),
744+
)
745+
try:
746+
labels_from_string = parse_node_labels_string(labels)
747+
except Exception as e:
748+
cli_logger.abort(
749+
"`{}` is not a valid string of key-value pairs, detail error:{}"
750+
"Valid values look like this: `{}`",
751+
cf.bold(f"--labels={labels}"),
752+
str(e),
753+
cf.bold('--labels="key1=val1,key2=val2"'),
754+
)
755+
labels_dict = (
756+
{**labels_from_file_dict, **labels_from_string}
757+
if labels_from_file_dict
758+
else labels_from_string
759+
)
719760

720761
if plasma_store_socket_name is not None:
721762
warnings.warn(

python/ray/tests/BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -349,6 +349,7 @@ py_test_module_list(
349349
py_test_module_list(
350350
size = "medium",
351351
files = [
352+
"test_label_utils.py",
352353
"test_minimal_install.py",
353354
"test_runtime_env_ray_minimal.py",
354355
"test_utils.py",
Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
import pytest
2+
from ray._private.label_utils import (
3+
parse_node_labels_string,
4+
parse_node_labels_from_yaml_file,
5+
validate_node_labels,
6+
)
7+
import sys
8+
import tempfile
9+
10+
11+
def test_parse_node_labels_from_string():
12+
# Empty label argument passed
13+
labels_string = ""
14+
labels_dict = parse_node_labels_string(labels_string)
15+
assert labels_dict == {}
16+
17+
# Valid label key with empty value
18+
labels_string = "region="
19+
labels_dict = parse_node_labels_string(labels_string)
20+
assert labels_dict == {"region": ""}
21+
22+
# Multiple valid label keys and values
23+
labels_string = "ray.io/accelerator-type=A100,region=us-west4"
24+
labels_dict = parse_node_labels_string(labels_string)
25+
assert labels_dict == {"ray.io/accelerator-type": "A100", "region": "us-west4"}
26+
27+
# Invalid label
28+
labels_string = "ray.io/accelerator-type=type=A100"
29+
with pytest.raises(ValueError) as e:
30+
parse_node_labels_string(labels_string)
31+
assert "Label value is not a key-value pair" in str(e)
32+
33+
34+
def test_parse_node_labels_from_yaml_file():
35+
# Empty/invalid yaml
36+
with tempfile.NamedTemporaryFile(mode="w+", delete=True) as test_file:
37+
test_file.write("")
38+
test_file.flush() # Ensure data is written
39+
with pytest.raises(ValueError) as e:
40+
parse_node_labels_from_yaml_file(test_file.name)
41+
assert "The format after deserialization is not a key-value pair map" in str(e)
42+
43+
# With non-existent yaml file
44+
with pytest.raises(FileNotFoundError):
45+
parse_node_labels_from_yaml_file("missing-file.yaml")
46+
47+
# Valid label key with empty value
48+
with tempfile.NamedTemporaryFile(mode="w+", delete=True) as test_file:
49+
test_file.write('"ray.io/accelerator-type": ""')
50+
test_file.flush() # Ensure data is written
51+
labels_dict = parse_node_labels_from_yaml_file(test_file.name)
52+
assert labels_dict == {"ray.io/accelerator-type": ""}
53+
54+
# Multiple valid label keys and values
55+
with tempfile.NamedTemporaryFile(mode="w+", delete=True) as test_file:
56+
test_file.write(
57+
'"ray.io/accelerator-type": "A100"\n"region": "us"\n"market-type": "spot"'
58+
)
59+
test_file.flush() # Ensure data is written
60+
labels_dict = parse_node_labels_from_yaml_file(test_file.name)
61+
assert labels_dict == {
62+
"ray.io/accelerator-type": "A100",
63+
"region": "us",
64+
"market-type": "spot",
65+
}
66+
67+
# Non-string label key
68+
with tempfile.NamedTemporaryFile(mode="w+", delete=True) as test_file:
69+
test_file.write('{100: "A100"}')
70+
test_file.flush() # Ensure data is written
71+
with pytest.raises(ValueError) as e:
72+
parse_node_labels_from_yaml_file(test_file.name)
73+
assert "The key is not string type." in str(e)
74+
75+
# Non-string label value
76+
with tempfile.NamedTemporaryFile(mode="w+", delete=True) as test_file:
77+
test_file.write('{"gpu": 100}')
78+
test_file.flush() # Ensure data is written
79+
with pytest.raises(ValueError) as e:
80+
parse_node_labels_from_yaml_file(test_file.name)
81+
assert 'The value of "gpu" is not string type' in str(e)
82+
83+
84+
def test_validate_node_labels():
85+
# Custom label starts with ray.io prefix
86+
labels_dict = {"ray.io/accelerator-type": "A100"}
87+
with pytest.raises(ValueError) as e:
88+
validate_node_labels(labels_dict)
89+
assert "This is reserved for Ray defined labels." in str(e)
90+
91+
# Invalid key prefix syntax
92+
labels_dict = {"!invalidPrefix/accelerator-type": "A100"}
93+
with pytest.raises(ValueError) as e:
94+
validate_node_labels(labels_dict)
95+
assert "Invalid label key prefix" in str(e)
96+
97+
# Invalid key name syntax
98+
labels_dict = {"!!accelerator-type?": "A100"}
99+
with pytest.raises(ValueError) as e:
100+
validate_node_labels(labels_dict)
101+
assert "Invalid label key name" in str(e)
102+
103+
# Invalid key value syntax
104+
labels_dict = {"accelerator-type": "??"}
105+
with pytest.raises(ValueError) as e:
106+
validate_node_labels(labels_dict)
107+
assert "Invalid label key value" in str(e)
108+
109+
# Valid node label
110+
labels_dict = {"accelerator-type": "A100"}
111+
validate_node_labels(labels_dict)
112+
113+
114+
if __name__ == "__main__":
115+
import os
116+
117+
# Skip test_basic_2_client_mode for now- the test suite is breaking.
118+
if os.environ.get("PARALLEL_CI"):
119+
sys.exit(pytest.main(["-n", "auto", "--boxed", "-vs", __file__]))
120+
else:
121+
sys.exit(pytest.main(["-sv", __file__]))

python/ray/tests/test_node_label_scheduling_strategy.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def get_node_id():
3131

3232
@pytest.mark.parametrize(
3333
"call_ray_start",
34-
['ray start --head --labels={"gpu_type":"A100","region":"us"}'],
34+
["ray start --head --labels gpu_type=A100,region=us"],
3535
indirect=True,
3636
)
3737
def test_node_label_scheduling_basic(call_ray_start):

0 commit comments

Comments
 (0)