Skip to content

Commit 6af3f8c

Browse files
authored
[doc][build/02] remove local site-package files from global cache (#46781)
The generated doc build files might include local files, such as local python site-packages. Remove these files since they cannot be used as a global cache. Test: - CI Signed-off-by: can <can@anyscale.com>
1 parent 6b81634 commit 6af3f8c

2 files changed

Lines changed: 54 additions & 0 deletions

File tree

ci/ray_ci/doc/build_cache.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import tempfile
22
import subprocess
33
import os
4+
import pickle
45
from typing import Set
56

67
import boto3
@@ -10,6 +11,7 @@
1011

1112

1213
AWS_CACHE_KEY = "doc_build"
14+
ENVIRONMENT_PICKLE = "_build/doctrees/environment.pickle"
1315

1416

1517
class BuildCache:
@@ -29,6 +31,9 @@ def upload(self) -> None:
2931
"""
3032
Upload the build artifacts to S3
3133
"""
34+
logger.info("Massage the build artifacts to be used as a cache.")
35+
self._massage_cache(ENVIRONMENT_PICKLE)
36+
3237
logger.info("Obtaining the list of cache files.")
3338
cache_files = self._get_cache()
3439

@@ -40,6 +45,26 @@ def upload(self) -> None:
4045

4146
logger.info(f"Successfully uploaded {doc_tarball} to S3.")
4247

48+
def _massage_cache(self, environment_cache_file: str) -> None:
49+
"""
50+
Massage the build artifacts, remove the unnecessary files so that they can
51+
be used as a global cache
52+
"""
53+
environment_cache_path = os.path.join(self._cache_dir, environment_cache_file)
54+
environment_cache = None
55+
56+
with open(environment_cache_path, "rb") as f:
57+
environment_cache = pickle.load(f)
58+
for doc, dependencies in environment_cache.dependencies.items():
59+
# Remove the site-packages dependencies because they are local to the
60+
# build environment and cannot be used as a global cache
61+
local_dependencies = [d for d in dependencies if "site-packages" in d]
62+
for dependency in local_dependencies:
63+
environment_cache.dependencies[doc].remove(dependency)
64+
65+
with open(environment_cache_path, "wb+") as f:
66+
pickle.dump(environment_cache, f, pickle.HIGHEST_PROTOCOL)
67+
4368
def _get_cache(self) -> Set[str]:
4469
"""
4570
Get the list of cache files

ci/ray_ci/doc/test_build_cache.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,18 @@
11
import sys
22
import os
3+
import pickle
34
import pytest
45
import tempfile
56
from unittest import mock
67

78
from ci.ray_ci.doc.build_cache import BuildCache
89

910

11+
class FakeCache:
12+
def __init__(self, dependencies):
13+
self.dependencies = dependencies
14+
15+
1016
@mock.patch("subprocess.check_output")
1117
def test_get_cache(mock_check_output):
1218
mock_check_output.return_value = b"file1\nfile2\nfile3"
@@ -26,5 +32,28 @@ def test_zip_cache():
2632
assert BuildCache(temp_dir)._zip_cache(files) == "12345.tgz"
2733

2834

35+
def test_massage_cache():
36+
cache = FakeCache(
37+
{
38+
"doc1": ["site-packages/dep1", "dep2"],
39+
"doc2": ["dep3", "site-packages/dep4"],
40+
}
41+
)
42+
with tempfile.TemporaryDirectory() as temp_dir:
43+
cache_path = os.path.join(temp_dir, "env_cache.pkl")
44+
with open(cache_path, "wb") as file:
45+
pickle.dump(cache, file)
46+
47+
build_cache = BuildCache(temp_dir)
48+
build_cache._massage_cache("env_cache.pkl")
49+
50+
with open(cache_path, "rb") as file:
51+
cache = pickle.load(file)
52+
assert cache.dependencies == {
53+
"doc1": ["dep2"],
54+
"doc2": ["dep3"],
55+
}
56+
57+
2958
if __name__ == "__main__":
3059
sys.exit(pytest.main(["-vv", __file__]))

0 commit comments

Comments
 (0)