buildcache: Tell servers not to cache index or hash#40339
buildcache: Tell servers not to cache index or hash#40339scottwittenburg merged 1 commit intospack:developfrom
Conversation
|
Since it's hard to know whether this would work, I tested with the following script once I had set up cloudfront for the test scriptimport codecs
import json
import os
import spack.util.web as web_util
import spack.util.url as url_util
fetch_base_url = "https://binaries-prs.spack.io"
push_base_url = "s3://spack-binaries-prs"
object_prefix = "test/staletesting/myobject.json"
object_push_url = url_util.join(push_base_url, object_prefix)
object_fetch_url = url_util.join(fetch_base_url, object_prefix)
object_version_1 = {"version": 5}
local_object_path_1 = os.path.join(os.getcwd(), "myobject_local_v1.json")
with open(local_object_path_1, "w") as fd:
fd.write(json.dumps(object_version_1))
object_version_2 = {"version": 6}
local_object_path_2 = os.path.join(os.getcwd(), "myobject_local_v2.json")
with open(local_object_path_2, "w") as fd:
fd.write(json.dumps(object_version_2))
# Push version 1 to the single object prefix
web_util.push_to_url(
local_object_path_1,
object_push_url,
keep_original=True,
extra_args={
"ContentType": "application/json",
"CacheControl": "no-cache",
},
)
# Read version 1 from the single prefix using the cloudfront url
_, _, remote_file_obj = web_util.read_from_url(object_fetch_url)
remote_contents_v1 = codecs.getreader("utf-8")(remote_file_obj).read()
local_fetch_path_v1 = os.path.join(os.getcwd(), "myobject_fetched_v1.json")
with open(local_fetch_path_v1, "w") as fd:
fd.write(remote_contents_v1)
# Push version 2 to the single object prefix
web_util.push_to_url(
local_object_path_2,
object_push_url,
keep_original=True,
extra_args={
"ContentType": "application/json",
"CacheControl": "no-cache",
},
)
# Read version 2 (let's see about that though) from the single object
# using the cloudfront url
_, _, remote_file_obj = web_util.read_from_url(object_fetch_url)
remote_contents_v2 = codecs.getreader("utf-8")(remote_file_obj).read()
local_fetch_path_v2 = os.path.join(os.getcwd(), "myobject_fetched_v2.json")
with open(local_fetch_path_v2, "w") as fd:
fd.write(remote_contents_v2)
# Now we can:
# diff myobject_local_v1.json myobject_fetched_v1.json
# diff myobject_local_v2.json myobject_fetched_v2.jsonAnd that's how I learned that setting the |
kwryankrattiger
left a comment
There was a problem hiding this comment.
This looks good to me and matches the docs as well. The testing looks sufficiently thorough, I can't think of anything else that needs to be tried.
If the remote url is S3, this extra information is associated with the object so that a cloudfront distribution (which has appropriate cache policy attached) will never cache the mirror index or index hash. The goal is to avoid generating pipelines from a stale index.