Skip to content

Commit 6f02fc3

Browse files
malfetfacebook-github-bot
authored andcommitted
Revert D21511048: [pytorch][PR] .circleci: Improve docker image build workflow
Test Plan: revert-hammer Differential Revision: D21511048 Original commit changeset: e4b153a6078e fbshipit-source-id: 09ad9ad9b108479cba44070c82182dd91fd4f099
1 parent a4b427e commit 6f02fc3

15 files changed

Lines changed: 228 additions & 338 deletions

.circleci/cimodel/data/pytorch_build_definitions.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,11 @@
1111

1212
DOCKER_IMAGE_PATH_BASE = "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/"
1313

14+
# ARE YOU EDITING THIS NUMBER? MAKE SURE YOU READ THE GUIDANCE AT THE
15+
# TOP OF .circleci/config.yml
16+
DOCKER_IMAGE_VERSION = "9a3986fa-7ce7-4a36-a001-3c9bef9892e2"
17+
18+
1419
@dataclass
1520
class Conf:
1621
distro: str
@@ -55,14 +60,11 @@ def get_parms(self, for_docker):
5560
return result
5661

5762
def gen_docker_image_path(self):
58-
return miniutils.quote(
59-
DOCKER_IMAGE_PATH_BASE + self.gen_docker_image_name()
60-
)
6163

62-
def gen_docker_image_name(self):
6364
parms_source = self.parent_build or self
6465
base_build_env_name = "-".join(parms_source.get_parms(True))
65-
return base_build_env_name
66+
67+
return miniutils.quote(DOCKER_IMAGE_PATH_BASE + base_build_env_name + ":" + str(DOCKER_IMAGE_VERSION))
6668

6769
def get_build_job_name_pieces(self, build_or_test):
6870
return self.get_parms(False) + [build_or_test]
@@ -95,9 +97,6 @@ def gen_workflow_job(self, phase):
9597
job_def["name"] = self.gen_build_name(phase)
9698
job_def["requires"] = ["setup"]
9799

98-
if phase == "build":
99-
job_def["requires"].append(miniutils.quote("docker-" + self.gen_docker_image_name()))
100-
101100
if phase == "test":
102101

103102
# TODO When merging the caffe2 and pytorch jobs, it might be convenient for a while to make a

.circleci/config.yml

Lines changed: 99 additions & 188 deletions
Large diffs are not rendered by default.

.circleci/docker/build.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,6 @@ tmp_tag="tmp-$(cat /dev/urandom | tr -dc 'a-z' | fold -w 32 | head -n 1)"
188188
# it's no longer needed.
189189
docker build \
190190
--no-cache \
191-
--progress=plain \
192191
--build-arg "TRAVIS_DL_URL_PREFIX=${TRAVIS_DL_URL_PREFIX}" \
193192
--build-arg "BUILD_ENVIRONMENT=${image}" \
194193
--build-arg "PROTOBUF=${PROTOBUF:-}" \

.circleci/docker/build_docker.sh

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,14 @@ retry () {
66
$* || (sleep 1 && $*) || (sleep 2 && $*)
77
}
88

9-
# Setup from previous step, typically located in .circleci/artifacts/env_file
10-
tag="${DOCKER_TAG}"
9+
# If UPSTREAM_BUILD_ID is set (see trigger job), then we can
10+
# use it to tag this build with the same ID used to tag all other
11+
# base image builds. Also, we can try and pull the previous
12+
# image first, to avoid rebuilding layers that haven't changed.
13+
14+
#until we find a way to reliably reuse previous build, this last_tag is not in use
15+
# last_tag="$(( CIRCLE_BUILD_NUM - 1 ))"
16+
tag="${CIRCLE_WORKFLOW_ID}"
1117

1218

1319
registry="308535385114.dkr.ecr.us-east-1.amazonaws.com"
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
#!/usr/bin/env python3
2+
import cimodel.data.caffe2_build_definitions as caffe2_build_definitions
3+
import cimodel.data.pytorch_build_definitions as pytorch_build_definitions
4+
from yaml import load
5+
6+
try:
7+
from yaml import CLoader as Loader
8+
except ImportError:
9+
from yaml import Loader
10+
11+
12+
def load_config(filename=".circleci/config.yml"):
13+
with open(filename, "r") as fh:
14+
return load("".join(fh.readlines()), Loader)
15+
16+
17+
def load_tags_for_projects(workflow_config):
18+
return {
19+
v["ecr_gc_job"]["project"]: v["ecr_gc_job"]["tags_to_keep"]
20+
for v in workflow_config["workflows"]["ecr_gc"]["jobs"]
21+
if isinstance(v, dict) and "ecr_gc_job" in v
22+
}
23+
24+
25+
def check_version(job, tags, expected_version):
26+
valid_versions = tags[job].split(",")
27+
if expected_version not in valid_versions:
28+
raise RuntimeError(
29+
"We configured {} to use Docker version {}; but this "
30+
"version is not configured in job ecr_gc_job_for_{}. Non-deployed versions will be "
31+
"garbage collected two weeks after they are created. DO NOT LAND "
32+
"THIS TO MASTER without also updating ossci-job-dsl with this version."
33+
"\n\nDeployed versions: {}".format(job, expected_version, job, tags[job])
34+
)
35+
36+
37+
def validate_docker_version():
38+
tags = load_tags_for_projects(load_config())
39+
check_version("pytorch", tags, pytorch_build_definitions.DOCKER_IMAGE_VERSION)
40+
check_version("caffe2", tags, caffe2_build_definitions.DOCKER_IMAGE_VERSION)
41+
42+
43+
if __name__ == "__main__":
44+
validate_docker_version()

.circleci/verbatim-sources/commands.yml

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -10,21 +10,6 @@ commands:
1010
name: Attaching workspace
1111
at: ~/workspace
1212

13-
# Must be run after attaching workspace from previous steps
14-
load_shared_env:
15-
description: "Loads .circleci/shared/env_file into ${BASH_ENV}"
16-
parameters:
17-
# For some weird reason we decide to reattach our workspace to ~/workspace so
18-
# in the vein of making it simple let's assume our share env_file is here
19-
root:
20-
type: string
21-
default: "~/workspace"
22-
steps:
23-
- run:
24-
name: "Load .circleci/shared/env_file into ${BASH_ENV}"
25-
command: |
26-
cat << parameters.root >>/.circleci/shared/env_file >> ${BASH_ENV}
27-
2813
# This system setup script is meant to run before the CI-related scripts, e.g.,
2914
# installing Git client, checking out code, setting up CI env, and
3015
# building/testing.

.circleci/verbatim-sources/docker_jobs.yml

Lines changed: 2 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -8,45 +8,8 @@
88
resource_class: large
99
environment:
1010
IMAGE_NAME: << parameters.image_name >>
11-
# Enable 'docker manifest'
12-
DOCKER_CLI_EXPERIMENTAL: "enabled"
13-
DOCKER_BUILDKIT: 1
1411
steps:
1512
- checkout
16-
- run:
17-
name: Calculate docker tag
18-
command: |
19-
set -x
20-
mkdir .circleci/shared
21-
# git keeps a hash of all sub trees
22-
echo "export DOCKER_TAG=$(git rev-parse HEAD:.circleci/docker)" >> .circleci/shared/env_file
23-
# Saves our calculated docker tag to our workpace for later use
24-
- persist_to_workspace:
25-
root: .
26-
paths:
27-
- .circleci/shared/
28-
- load_shared_env:
29-
root: .
30-
- run:
31-
name: Check if image should be built
32-
command: |
33-
set +x
34-
export AWS_ACCESS_KEY_ID=${CIRCLECI_AWS_ACCESS_KEY_FOR_DOCKER_BUILDER_V1}
35-
export AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_DOCKER_BUILDER_V1}
36-
eval $(aws ecr get-login --no-include-email --region us-east-1)
37-
set -x
38-
# Check if image already exists, if it does then skip building it
39-
if docker manifest inspect "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/${IMAGE_NAME}:${DOCKER_TAG}"; then
40-
circleci-agent step halt
41-
fi
42-
PREVIOUS_DOCKER_TAG=$(git rev-parse "$(git merge-base HEAD << pipeline.git.base_revision >>):.circleci/docker")
43-
# If no image exists but the hash is the same as the previous hash then we should error out here
44-
# no stampeding herd effect plz.
45-
if [[ ${PREVIOUS_DOCKER_TAG} = ${DOCKER_TAG} ]]; then
46-
echo "ERROR: Something has gone wrong and the previous image isn't available for the merge-base of your branch"
47-
echo " contact the PyTorch team to restore the original images"
48-
exit 1
49-
fi
5013
- run:
5114
name: build_docker_image_<< parameters.image_name >>
5215
no_output_timeout: "1h"
@@ -55,9 +18,7 @@
5518
export AWS_ACCESS_KEY_ID=${CIRCLECI_AWS_ACCESS_KEY_FOR_DOCKER_BUILDER_V1}
5619
export AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_DOCKER_BUILDER_V1}
5720
set -x
58-
source .circleci/shared/env_file
5921
cd .circleci/docker && ./build_docker.sh
60-
6122
docker_for_ecr_gc_build_job:
6223
machine:
6324
image: ubuntu-1604:201903-01
@@ -84,25 +45,14 @@
8445
type: string
8546
environment:
8647
PROJECT: << parameters.project >>
87-
# TODO: Remove legacy image tags once we feel comfortable with new docker image tags
8848
IMAGE_TAG: << parameters.tags_to_keep >>
8949
docker:
9050
- image: 308535385114.dkr.ecr.us-east-1.amazonaws.com/gc/ecr
9151
aws_auth:
9252
aws_access_key_id: ${CIRCLECI_AWS_ACCESS_KEY_FOR_DOCKER_BUILDER_V1}
9353
aws_secret_access_key: ${CIRCLECI_AWS_SECRET_KEY_FOR_DOCKER_BUILDER_V1}
54+
9455
steps:
95-
- checkout
96-
- run:
97-
# NOTE: see 'docker_build_job' for how these tags actually get built
98-
name: dynamically generate tags to keep
99-
no_output_timeout: "1h"
100-
command: |
101-
GENERATED_IMAGE_TAG=$(\
102-
git log --oneline --pretty='%H' .circleci/docker \
103-
| xargs -I '{}' git rev-parse '{}:.circleci/docker' \
104-
| paste -sd "," -)
105-
echo "export GENERATED_IMAGE_TAG='${GENERATED_IMAGE_TAG}'" >> ${BASH_ENV}
10656
- run:
10757
name: garbage collecting for ecr images
10858
no_output_timeout: "1h"
@@ -111,7 +61,7 @@
11161
export AWS_ACCESS_KEY_ID=${CIRCLECI_AWS_ACCESS_KEY_FOR_DOCKER_BUILDER_V1}
11262
export AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_DOCKER_BUILDER_V1}
11363
set -x
114-
/usr/bin/gc.py --filter-prefix ${PROJECT} --ignore-tags "${IMAGE_TAG},${GENERATED_IMAGE_TAG}"
64+
/usr/bin/gc.py --filter-prefix ${PROJECT} --ignore-tags ${IMAGE_TAG}
11565
11666
docker_hub_index_job:
11767
docker:

0 commit comments

Comments
 (0)